diff --git a/application/app.py b/application/app.py
index 0b0bb143..fa92cb85 100644
--- a/application/app.py
+++ b/application/app.py
@@ -24,9 +24,11 @@ from langchain.prompts.chat import (
     ChatPromptTemplate,
     SystemMessagePromptTemplate,
     HumanMessagePromptTemplate,
+    AIMessagePromptTemplate,
 )
 from pymongo import MongoClient
 from werkzeug.utils import secure_filename
+from langchain.llms import GPT4All
 
 from core.settings import settings
 from error import bad_request
@@ -108,6 +110,7 @@ def run_async_chain(chain, question, chat_history):
     result["answer"] = answer
     return result
 
+
 def get_vectorstore(data):
     if "active_docs" in data:
         if data["active_docs"].split("/")[0] == "local":
@@ -134,6 +137,7 @@ def get_docsearch(vectorstore, embeddings_key):
         docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
     return docsearch
 
+
 @celery.task(bind=True)
 def ingest(self, directory, formats, name_job, filename, user):
     resp = ingest_worker(self, directory, formats, name_job, filename, user)
@@ -216,17 +220,26 @@ def api_answer():
         # Note if you have used other embeddings than OpenAI, you need to change the embeddings
         docsearch = get_docsearch(vectorstore, embeddings_key)
 
-        c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template,
-                                  template_format="jinja2")
-
         q_prompt = PromptTemplate(input_variables=["context", "question"], template=template_quest,
                                   template_format="jinja2")
         if settings.LLM_NAME == "openai_chat":
             llm = ChatOpenAI(openai_api_key=api_key)  # optional parameter: model_name="gpt-4"
-            messages_combine = [
-                SystemMessagePromptTemplate.from_template(chat_combine_template),
-                HumanMessagePromptTemplate.from_template("{question}")
-            ]
+            messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)]
+            if history:
+                tokens_current_history = 0
+                tokens_max_history = 1000
+                #count tokens in history
+                history.reverse()
+                for i in history:
+                    if "prompt" in i and "response" in i:
+                        tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
+                        if tokens_current_history + tokens_batch < tokens_max_history:
+                            tokens_current_history += tokens_batch
+                            messages_combine.append(HumanMessagePromptTemplate.from_template(i["prompt"]))
+                            messages_combine.append(AIMessagePromptTemplate.from_template(i["response"]))
+            messages_combine.append(HumanMessagePromptTemplate.from_template("{question}"))
+            import sys
+            print(messages_combine, file=sys.stderr)
             p_chat_combine = ChatPromptTemplate.from_messages(messages_combine)
         elif settings.LLM_NAME == "openai":
             llm = OpenAI(openai_api_key=api_key, temperature=0)
@@ -236,6 +249,8 @@ def api_answer():
             llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
         elif settings.LLM_NAME == "cohere":
             llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
+        elif settings.LLM_NAME == "gpt4all":
+            llm = GPT4All(model=settings.MODEL_PATH)
         else:
             raise ValueError("unknown LLM model")
 
@@ -251,9 +266,22 @@ def api_answer():
             # result = chain({"question": question, "chat_history": chat_history})
             # generate async with async generate method
             result = run_async_chain(chain, question, chat_history)
+        elif settings.LLM_NAME == "gpt4all":
+            question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
+            doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
+            chain = ConversationalRetrievalChain(
+                retriever=docsearch.as_retriever(k=2),
+                question_generator=question_generator,
+                combine_docs_chain=doc_chain,
+            )
+            chat_history = []
+            # result = chain({"question": question, "chat_history": chat_history})
+            # generate async with async generate method
+            result = run_async_chain(chain, question, chat_history)
+
         else:
             qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
-                                     combine_prompt=c_prompt, question_prompt=q_prompt)
+                                     combine_prompt=chat_combine_template, question_prompt=q_prompt)
             chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=3)
             result = chain({"query": question})
 
diff --git a/application/core/settings.py b/application/core/settings.py
index fa654ed5..3c0672da 100644
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -9,6 +9,7 @@ class Settings(BaseSettings):
     CELERY_BROKER_URL: str = "redis://localhost:6379/0"
     CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
     MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
+    MODEL_PATH: str = "./models/gpt4all-model.bin"
 
     API_URL: str = "http://localhost:5001"  # backend url for celery worker
 
diff --git a/application/prompts/chat_combine_prompt.txt b/application/prompts/chat_combine_prompt.txt
index 295a0b72..2f9a61c9 100644
--- a/application/prompts/chat_combine_prompt.txt
+++ b/application/prompts/chat_combine_prompt.txt
@@ -1,5 +1,6 @@
 You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible.
 Use the following pieces of context to help answer the users question. If its not relevant to the question, provide friendly responses.
+You have access to chat history, and can use it to help answer the question.
 When using code examples, use the following format:
 ```(language)
 (code)
diff --git a/application/requirements.txt b/application/requirements.txt
index 7f737e32..4d9c1b0d 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -26,10 +26,12 @@ ecdsa==0.18.0
 entrypoints==0.4
 faiss-cpu==1.7.3
 filelock==3.9.0
-Flask==2.3.2
+Flask==2.2.3
+Flask-Cors==3.0.10
 frozenlist==1.3.3
 geojson==2.5.0
 greenlet==2.0.2
+gpt4all==0.1.7
 hub==3.0.1
 huggingface-hub==0.12.1
 humbug==0.2.8
@@ -39,7 +41,8 @@ Jinja2==3.1.2
 jmespath==1.0.1
 joblib==1.2.0
 kombu==5.2.4
-langchain==0.0.126
+langchain==0.0.179
+loguru==0.6.0
 lxml==4.9.2
 MarkupSafe==2.1.2
 marshmallow==3.19.0
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 584dea0d..c06b61bf 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -9,7 +9,7 @@ services:
     ports:
       - "5173:5173"
     depends_on:
-        - backend
+      - backend
 
   backend:
     build: ./application
diff --git a/scripts/parser/file/bulk.py b/scripts/parser/file/bulk.py
index 8a963104..8b5bd406 100644
--- a/scripts/parser/file/bulk.py
+++ b/scripts/parser/file/bulk.py
@@ -1,8 +1,5 @@
 """Simple reader that reads files of different formats from a directory."""
 import logging
-from pathlib import Path
-from typing import Callable, Dict, List, Optional, Union
-
 from parser.file.base import BaseReader
 from parser.file.base_parser import BaseParser
 from parser.file.docs_parser import DocxParser, PDFParser
@@ -12,6 +9,8 @@ from parser.file.markdown_parser import MarkdownParser
 from parser.file.rst_parser import RstParser
 from parser.file.tabular_parser import PandasCSVParser
 from parser.schema.base import Document
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Union
 
 DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
     ".pdf": PDFParser(),
@@ -151,10 +150,15 @@ class SimpleDirectoryReader(BaseReader):
                     data = f.read()
             if isinstance(data, List):
                 data_list.extend(data)
+                if self.file_metadata is not None:
+                    for _ in range(len(data)):
+                        metadata_list.append(self.file_metadata(str(input_file)))
             else:
                 data_list.append(str(data))
-            if self.file_metadata is not None:
-                metadata_list.append(self.file_metadata(str(input_file)))
+                if self.file_metadata is not None:
+                    metadata_list.append(self.file_metadata(str(input_file)))
+
+            
 
         if concatenate:
             return [Document("\n".join(data_list))]