Merge branch 'main' into feature/streaming

2025-11-29 08:33:20 +00:00 · 2023-05-31 22:15:53 +01:00
parent fae3f55010 0b94f1717f
commit 3b8039a580
6 changed files with 53 additions and 16 deletions
--- a/application/app.py
+++ b/application/app.py
@@ -24,9 +24,11 @@ from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
+    AIMessagePromptTemplate,
 )
 from pymongo import MongoClient
 from werkzeug.utils import secure_filename
+from langchain.llms import GPT4All

 from core.settings import settings
 from error import bad_request
@@ -108,6 +110,7 @@ def run_async_chain(chain, question, chat_history):
    result["answer"] = answer
    return result

+
 def get_vectorstore(data):
    if "active_docs" in data:
        if data["active_docs"].split("/")[0] == "local":
@@ -134,6 +137,7 @@ def get_docsearch(vectorstore, embeddings_key):
        docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
    return docsearch

+
@celery.task(bind=True)
 def ingest(self, directory, formats, name_job, filename, user):
    resp = ingest_worker(self, directory, formats, name_job, filename, user)
@@ -216,17 +220,26 @@ def api_answer():
        # Note if you have used other embeddings than OpenAI, you need to change the embeddings
        docsearch = get_docsearch(vectorstore, embeddings_key)

-        c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template,
-                                  template_format="jinja2")
-
        q_prompt = PromptTemplate(input_variables=["context", "question"], template=template_quest,
                                  template_format="jinja2")
        if settings.LLM_NAME == "openai_chat":
            llm = ChatOpenAI(openai_api_key=api_key)  # optional parameter: model_name="gpt-4"
-            messages_combine = [
-                SystemMessagePromptTemplate.from_template(chat_combine_template),
-                HumanMessagePromptTemplate.from_template("{question}")
-            ]
+            messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)]
+            if history:
+                tokens_current_history = 0
+                tokens_max_history = 1000
+                #count tokens in history
+                history.reverse()
+                for i in history:
+                    if "prompt" in i and "response" in i:
+                        tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
+                        if tokens_current_history + tokens_batch < tokens_max_history:
+                            tokens_current_history += tokens_batch
+                            messages_combine.append(HumanMessagePromptTemplate.from_template(i["prompt"]))
+                            messages_combine.append(AIMessagePromptTemplate.from_template(i["response"]))
+            messages_combine.append(HumanMessagePromptTemplate.from_template("{question}"))
+            import sys
+            print(messages_combine, file=sys.stderr)
            p_chat_combine = ChatPromptTemplate.from_messages(messages_combine)
        elif settings.LLM_NAME == "openai":
            llm = OpenAI(openai_api_key=api_key, temperature=0)
@@ -236,6 +249,8 @@ def api_answer():
            llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
        elif settings.LLM_NAME == "cohere":
            llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
+        elif settings.LLM_NAME == "gpt4all":
+            llm = GPT4All(model=settings.MODEL_PATH)
        else:
            raise ValueError("unknown LLM model")

@@ -251,9 +266,22 @@ def api_answer():
            # result = chain({"question": question, "chat_history": chat_history})
            # generate async with async generate method
            result = run_async_chain(chain, question, chat_history)
+        elif settings.LLM_NAME == "gpt4all":
+            question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
+            doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
+            chain = ConversationalRetrievalChain(
+                retriever=docsearch.as_retriever(k=2),
+                question_generator=question_generator,
+                combine_docs_chain=doc_chain,
+            )
+            chat_history = []
+            # result = chain({"question": question, "chat_history": chat_history})
+            # generate async with async generate method
+            result = run_async_chain(chain, question, chat_history)
+
        else:
            qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
-                                     combine_prompt=c_prompt, question_prompt=q_prompt)
+                                     combine_prompt=chat_combine_template, question_prompt=q_prompt)
            chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=3)
            result = chain({"query": question})

--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -9,6 +9,7 @@ class Settings(BaseSettings):
    CELERY_BROKER_URL: str = "redis://localhost:6379/0"
    CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
    MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
+    MODEL_PATH: str = "./models/gpt4all-model.bin"

    API_URL: str = "http://localhost:5001"  # backend url for celery worker

--- a/application/prompts/chat_combine_prompt.txt
+++ b/application/prompts/chat_combine_prompt.txt
@@ -1,5 +1,6 @@
 You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible.
 Use the following pieces of context to help answer the users question. If its not relevant to the question, provide friendly responses.
+You have access to chat history, and can use it to help answer the question.
 When using code examples, use the following format:
 ```(language)
 (code)
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -26,10 +26,12 @@ ecdsa==0.18.0
 entrypoints==0.4
 faiss-cpu==1.7.3
 filelock==3.9.0
-Flask==2.3.2
+Flask==2.2.3
+Flask-Cors==3.0.10
 frozenlist==1.3.3
 geojson==2.5.0
 greenlet==2.0.2
+gpt4all==0.1.7
 hub==3.0.1
 huggingface-hub==0.12.1
 humbug==0.2.8
@@ -39,7 +41,8 @@ Jinja2==3.1.2
 jmespath==1.0.1
 joblib==1.2.0
 kombu==5.2.4
-langchain==0.0.126
+langchain==0.0.179
+loguru==0.6.0
 lxml==4.9.2
 MarkupSafe==2.1.2
 marshmallow==3.19.0
--- a/scripts/parser/file/bulk.py
+++ b/scripts/parser/file/bulk.py
@@ -1,8 +1,5 @@
 """Simple reader that reads files of different formats from a directory."""
 import logging
-from pathlib import Path
-from typing import Callable, Dict, List, Optional, Union
-
 from parser.file.base import BaseReader
 from parser.file.base_parser import BaseParser
 from parser.file.docs_parser import DocxParser, PDFParser
@@ -12,6 +9,8 @@ from parser.file.markdown_parser import MarkdownParser
 from parser.file.rst_parser import RstParser
 from parser.file.tabular_parser import PandasCSVParser
 from parser.schema.base import Document
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Union

 DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
    ".pdf": PDFParser(),
@@ -151,11 +150,16 @@ class SimpleDirectoryReader(BaseReader):
                    data = f.read()
            if isinstance(data, List):
                data_list.extend(data)
+                if self.file_metadata is not None:
+                    for _ in range(len(data)):
+                        metadata_list.append(self.file_metadata(str(input_file)))
            else:
                data_list.append(str(data))
                if self.file_metadata is not None:
                    metadata_list.append(self.file_metadata(str(input_file)))

+            
+
        if concatenate:
            return [Document("\n".join(data_list))]
        elif self.file_metadata is not None: