Merge pull request #355 from arc53/feature/cpu-llm

llama-cpp local
This commit is contained in:
Alex
2023-10-01 17:55:26 +01:00
committed by GitHub
8 changed files with 174 additions and 56 deletions

View File

@@ -32,20 +32,6 @@ if settings.LLM_NAME == "gpt4":
else:
gpt_model = 'gpt-3.5-turbo'
if settings.SELF_HOSTED_MODEL:
from langchain.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
model_id = settings.LLM_NAME # hf model id (Arc53/docsgpt-7b-falcon, Arc53/docsgpt-14b)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
pipe = pipeline(
"text-generation", model=model,
tokenizer=tokenizer, max_new_tokens=2000,
device_map="auto", eos_token_id=tokenizer.eos_token_id
)
hf = HuggingFacePipeline(pipeline=pipe)
# load the prompts
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
with open(os.path.join(current_dir, "prompts", "combine_prompt.txt"), "r") as f:

View File

@@ -1,6 +1,8 @@
from pathlib import Path
import os
from pydantic import BaseSettings
current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
class Settings(BaseSettings):
@@ -9,9 +11,8 @@ class Settings(BaseSettings):
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
MODEL_PATH: str = "./models/gpt4all-model.bin"
MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
TOKENS_MAX_HISTORY: int = 150
SELF_HOSTED_MODEL: bool = False
UPLOAD_FOLDER: str = "inputs"
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch"

View File

@@ -0,0 +1,38 @@
from application.llm.base import BaseLLM
class LlamaCpp(BaseLLM):
def __init__(self, api_key, llm_name='/Users/pavel/Desktop/docsgpt/application/models/orca-test.bin'):
global llama
try:
from llama_cpp import Llama
except ImportError:
raise ImportError("Please install llama_cpp using pip install llama-cpp-python")
llama = Llama(model_path=llm_name)
def gen(self, model, engine, messages, stream=False, **kwargs):
context = messages[0]['content']
user_question = messages[-1]['content']
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
result = llama(prompt, max_tokens=150, echo=False)
# import sys
# print(result['choices'][0]['text'].split('### Answer \n')[-1], file=sys.stderr)
return result['choices'][0]['text'].split('### Answer \n')[-1]
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
context = messages[0]['content']
user_question = messages[-1]['content']
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
result = llama(prompt, max_tokens=150, echo=False, stream=stream)
# import sys
# print(list(result), file=sys.stderr)
for item in result:
for choice in item['choices']:
yield choice['text']

View File

@@ -1,6 +1,7 @@
from application.llm.openai import OpenAILLM, AzureOpenAILLM
from application.llm.sagemaker import SagemakerAPILLM
from application.llm.huggingface import HuggingFaceLLM
from application.llm.llama_cpp import LlamaCpp
@@ -9,7 +10,8 @@ class LLMCreator:
'openai': OpenAILLM,
'azure_openai': AzureOpenAILLM,
'sagemaker': SagemakerAPILLM,
'huggingface': HuggingFaceLLM
'huggingface': HuggingFaceLLM,
'llama.cpp': LlamaCpp
}
@classmethod

View File

@@ -2,7 +2,7 @@ from abc import ABC, abstractmethod
import os
from langchain.embeddings import (
OpenAIEmbeddings,
HuggingFaceHubEmbeddings,
HuggingFaceEmbeddings,
CohereEmbeddings,
HuggingFaceInstructEmbeddings,
)
@@ -22,7 +22,7 @@ class BaseVectorStore(ABC):
def _get_embeddings(self, embeddings_name, embeddings_key=None):
embeddings_factory = {
"openai_text-embedding-ada-002": OpenAIEmbeddings,
"huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceHubEmbeddings,
"huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings,
"huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings,
"cohere_medium": CohereEmbeddings
}