script + cpu optimisations

2026-02-22 12:21:39 +00:00 · 2023-10-01 19:16:13 +01:00
parent 6045cbbc62
commit 9a33bf2210
6 changed files with 12 additions and 38 deletions
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -118,6 +118,8 @@ def complete_stream(question, docsearch, chat_history, api_key, conversation_id)
    

    docs = docsearch.search(question, k=2)
+    if settings.LLM_NAME == "llama.cpp":
+        docs = [docs[0]]
    # join all page_content together with a newline
    docs_together = "\n".join([doc.page_content for doc in docs])
    p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -2,7 +2,7 @@ from pathlib import Path
 import os

 from pydantic import BaseSettings
-current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))


 class Settings(BaseSettings):
--- a/application/llm/llama_cpp.py
+++ b/application/llm/llama_cpp.py
@@ -1,15 +1,16 @@
 from application.llm.base import BaseLLM
+from application.core.settings import settings

 class LlamaCpp(BaseLLM):

-    def __init__(self, api_key, llm_name='/Users/pavel/Desktop/docsgpt/application/models/orca-test.bin'):
+    def __init__(self, api_key, llm_name=settings.MODEL_PATH, **kwargs):
        global llama
        try:
            from llama_cpp import Llama
        except ImportError:
            raise ImportError("Please install llama_cpp using pip install llama-cpp-python")

-        llama = Llama(model_path=llm_name)
+        llama = Llama(model_path=llm_name, n_ctx=2048)

    def gen(self, model, engine, messages, stream=False, **kwargs):
        context = messages[0]['content']