script + cpu optimisations

This commit is contained in:
Alex
2023-10-01 19:16:13 +01:00
parent 6045cbbc62
commit 9a33bf2210
6 changed files with 12 additions and 38 deletions

View File

@@ -118,6 +118,8 @@ def complete_stream(question, docsearch, chat_history, api_key, conversation_id)
docs = docsearch.search(question, k=2)
if settings.LLM_NAME == "llama.cpp":
docs = [docs[0]]
# join all page_content together with a newline
docs_together = "\n".join([doc.page_content for doc in docs])
p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)

View File

@@ -2,7 +2,7 @@ from pathlib import Path
import os
from pydantic import BaseSettings
current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
class Settings(BaseSettings):

View File

@@ -1,15 +1,16 @@
from application.llm.base import BaseLLM
from application.core.settings import settings
class LlamaCpp(BaseLLM):
def __init__(self, api_key, llm_name='/Users/pavel/Desktop/docsgpt/application/models/orca-test.bin'):
def __init__(self, api_key, llm_name=settings.MODEL_PATH, **kwargs):
global llama
try:
from llama_cpp import Llama
except ImportError:
raise ImportError("Please install llama_cpp using pip install llama-cpp-python")
llama = Llama(model_path=llm_name)
llama = Llama(model_path=llm_name, n_ctx=2048)
def gen(self, model, engine, messages, stream=False, **kwargs):
context = messages[0]['content']