less token less issues

Update README.md
Merge pull request #250 from tardigrde/main
2025-11-29 16:43:16 +00:00 · 2023-06-03 16:31:10 +01:00 · 2023-06-03 16:09:10 +01:00 · 2023-06-01 14:56:48 +01:00 · 2023-05-31 23:49:41 +01:00 · 2023-05-31 23:47:16 +01:00
73 changed files with 7737 additions and 1952 deletions
--- a/.env-template
+++ b/.env-template
@@ -0,0 +1,2 @@
+OPENAI_API_KEY=<LLM api key (for example, open ai key)>
+EMBEDDINGS_KEY=<LLM embeddings api key (for example, open ai key)>
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -9,6 +9,10 @@ on:
 jobs:
  deploy:
    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
    steps:
      - uses: actions/checkout@v3

@@ -23,17 +27,17 @@ jobs:
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_PASSWORD }}
-          
+
      - name: Login to ghcr.io
        uses: docker/login-action@v2
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
-          password: ${{ secrets.GHCR_TOKEN }}
+          password: ${{ secrets.GITHUB_TOKEN }}

      # Runs a single command using the runners shell
      - name: Build and push Docker images to docker.io and ghcr.io
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v4
        with:
          file: './application/Dockerfile'
          platforms: linux/amd64
--- a/.github/workflows/cife.yml
+++ b/.github/workflows/cife.yml
@@ -9,6 +9,10 @@ on:
 jobs:
  deploy:
    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
    steps:
      - uses: actions/checkout@v3

@@ -23,17 +27,17 @@ jobs:
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_PASSWORD }}
-          
+
      - name: Login to ghcr.io
        uses: docker/login-action@v2
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
-          password: ${{ secrets.GHCR_TOKEN }}
+          password: ${{ secrets.GITHUB_TOKEN }}

      # Runs a single command using the runners shell
      - name: Build and push Docker images to docker.io and ghcr.io
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v4
        with:
          file: './frontend/Dockerfile'
          platforms: linux/amd64
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,17 @@
+name: Python linting
+
+on:
+  push:
+    branches:
+      - '*'
+  pull_request:
+    types: [ opened, synchronize ]
+
+jobs:
+  ruff:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Lint with Ruff
+        uses: chartboost/ruff-action@v1
--- a/.github/workflows/sync_fork.yaml
+++ b/.github/workflows/sync_fork.yaml
@@ -0,0 +1,41 @@
+name: Upstream Sync
+
+permissions:
+  contents: write
+
+on:
+  schedule:
+    - cron: "0 * * * *" # every hour
+  workflow_dispatch:
+
+jobs:
+  sync_latest_from_upstream:
+    name: Sync latest commits from upstream repo
+    runs-on: ubuntu-latest
+    if: ${{ github.event.repository.fork }}
+
+    steps:
+      # Step 1: run a standard checkout action
+      - name: Checkout target repo
+        uses: actions/checkout@v3
+
+      # Step 2: run the sync action
+      - name: Sync upstream changes
+        id: sync
+        uses: aormsby/Fork-Sync-With-Upstream-action@v3.4
+        with:
+          # set your upstream repo and branch
+          upstream_sync_repo: arc53/DocsGPT
+          upstream_sync_branch: main
+          target_sync_branch: main
+          target_repo_token: ${{ secrets.GITHUB_TOKEN }} # automatically generated, no need to set
+
+          # Set test_mode true to run tests instead of the true action!!
+          test_mode: false
+
+      - name: Sync check
+        if: failure()
+        run: |
+          echo "::error::由于权限不足，导致同步失败（这是预期的行为），请前往仓库首页手动执行[Sync fork]。"
+          echo "::error::Due to insufficient permissions, synchronization failed (as expected). Please go to the repository homepage and manually perform [Sync fork]."
+          exit 1
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -0,0 +1,2 @@
+# Allow lines to be as long as 120 characters.
+line-length = 120
--- a/README.md
+++ b/README.md
@@ -55,8 +55,9 @@ You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here, pleas
 Note: Make sure you have docker installed

 1. Open dowload this repository with `git clone https://github.com/arc53/DocsGPT.git`
-2. Open docker-compose.yaml and replace <your_api_key> with your OpenAI's key (there are 4 places)
+2. Create .env file in your root directory and set your OPENAI_API_KEY with your openai api key and  VITE_API_STREAMING to true or false if you dont want streaming answers
 3. Run `docker-compose build && docker-compose up`
+4. Navigate to http://localhost:5173/

 To stop just run Ctrl + C

@@ -67,19 +68,23 @@ Spin up only 2 containers from docker-compose.yaml (by deleting all services exc
 Make sure you have python 3.10 or 3.11 installed

 1. Navigate to `/application` folder
-2. Install dependencies
+2. Run `docker-compose -f docker-compose-dev.yaml build && docker-compose -f docker-compose-dev.yaml up -d`
+3. Export required variables              
+`export CELERY_BROKER_URL=redis://localhost:6379/0`   
+`export CELERY_RESULT_BACKEND=redis://localhost:6379/1`
+`export MONGO_URI=mongodb://localhost:27017/docsgpt`
+4. Install dependencies
 `pip install -r requirements.txt`
-3. Prepare .env file
+5. Prepare .env file
 Copy .env_sample and create .env with your openai api token
-4. Run the app
-`python app.py`
-5. Start worker with `celery -A app.celery worker -l INFO`
+6. Run the app
+`python wsgi.py`
+7. Start worker with `celery -A app.celery worker -l INFO`

 To start frontend
 1. Navigate to `/frontend` folder
 2. Install dependencies
 `npm install`
-3. In the file  `.env.development` instead of `VITE_API_HOST = https://docsapi.arc53.com` use `VITE_API_HOST=http://localhost:5001`
 3. Run the app
 4. `npm run dev`

--- a/application/Dockerfile
+++ b/application/Dockerfile
@@ -4,7 +4,7 @@ FROM python:3.10-slim-bullseye as builder
 RUN apt-get update && apt-get install -y gcc curl
 RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
 ENV PATH="/root/.cargo/bin:${PATH}"
-RUN pip install --upgrade pip && pip install tiktoken==0.1.2
+RUN pip install --upgrade pip && pip install tiktoken==0.3.3
 COPY requirements.txt .
 RUN pip install -r requirements.txt

--- a/application/app.py
+++ b/application/app.py
@@ -1,16 +1,20 @@
+import asyncio
 import datetime
+import http.client
 import json
 import os
 import traceback

+import openai
 import dotenv
 import requests
 from celery import Celery
 from celery.result import AsyncResult
-from flask import Flask, request, render_template, send_from_directory, jsonify
+from flask import Flask, request, render_template, send_from_directory, jsonify, Response
 from langchain import FAISS
 from langchain import VectorDBQA, HuggingFaceHub, Cohere, OpenAI
-from langchain.chains import ChatVectorDBChain
+from langchain.chains import LLMChain, ConversationalRetrievalChain
+from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
 from langchain.chains.question_answering import load_qa_chain
 from langchain.chat_models import ChatOpenAI
 from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, \
@@ -20,26 +24,19 @@ from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
+    AIMessagePromptTemplate,
 )
 from pymongo import MongoClient
 from werkzeug.utils import secure_filename
+from langchain.llms import GPT4All

+from core.settings import settings
 from error import bad_request
 from worker import ingest_worker

 # os.environ["LANGCHAIN_HANDLER"] = "langchain"

-if os.getenv("LLM_NAME") is not None:
-    llm_choice = os.getenv("LLM_NAME")
-else:
-    llm_choice = "openai_chat"
-
-if os.getenv("EMBEDDINGS_NAME") is not None:
-    embeddings_choice = os.getenv("EMBEDDINGS_NAME")
-else:
-    embeddings_choice = "openai_text-embedding-ada-002"
-
-if llm_choice == "manifest":
+if settings.LLM_NAME == "manifest":
    from manifest import Manifest
    from langchain.llms.manifest import ManifestWrapper

@@ -76,27 +73,71 @@ with open("prompts/chat_combine_prompt.txt", "r") as f:
 with open("prompts/chat_reduce_prompt.txt", "r") as f:
    chat_reduce_template = f.read()

-if os.getenv("API_KEY") is not None:
+if settings.API_KEY is not None:
    api_key_set = True
 else:
    api_key_set = False
-if os.getenv("EMBEDDINGS_KEY") is not None:
+if settings.EMBEDDINGS_KEY is not None:
    embeddings_key_set = True
 else:
    embeddings_key_set = False

 app = Flask(__name__)
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER = "inputs"
-app.config['CELERY_BROKER_URL'] = os.getenv("CELERY_BROKER_URL")
-app.config['CELERY_RESULT_BACKEND'] = os.getenv("CELERY_RESULT_BACKEND")
-app.config['MONGO_URI'] = os.getenv("MONGO_URI")
-celery = Celery(app.name, broker=app.config['CELERY_BROKER_URL'], backend=app.config['CELERY_RESULT_BACKEND'])
-celery.conf.update(app.config)
+app.config['CELERY_BROKER_URL'] = settings.CELERY_BROKER_URL
+app.config['CELERY_RESULT_BACKEND'] = settings.CELERY_RESULT_BACKEND
+app.config['MONGO_URI'] = settings.MONGO_URI
+celery = Celery()
+celery.config_from_object('celeryconfig')
 mongo = MongoClient(app.config['MONGO_URI'])
 db = mongo["docsgpt"]
 vectors_collection = db["vectors"]


+async def async_generate(chain, question, chat_history):
+    result = await chain.arun({"question": question, "chat_history": chat_history})
+    return result
+
+
+def run_async_chain(chain, question, chat_history):
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    result = {}
+    try:
+        answer = loop.run_until_complete(async_generate(chain, question, chat_history))
+    finally:
+        loop.close()
+    result["answer"] = answer
+    return result
+
+
+def get_vectorstore(data):
+    if "active_docs" in data:
+        if data["active_docs"].split("/")[0] == "local":
+            if data["active_docs"].split("/")[1] == "default":
+                vectorstore = ""
+            else:
+                vectorstore = "indexes/" + data["active_docs"]
+        else:
+            vectorstore = "vectors/" + data["active_docs"]
+        if data['active_docs'] == "default":
+            vectorstore = ""
+    else:
+        vectorstore = ""
+    return vectorstore
+
+def get_docsearch(vectorstore, embeddings_key):
+    if settings.EMBEDDINGS_NAME == "openai_text-embedding-ada-002":
+        docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
+    elif settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
+        docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
+    elif settings.EMBEDDINGS_NAME == "huggingface_hkunlp/instructor-large":
+        docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
+    elif settings.EMBEDDINGS_NAME == "cohere_medium":
+        docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
+    return docsearch
+
+
@celery.task(bind=True)
 def ingest(self, directory, formats, name_job, filename, user):
    resp = ingest_worker(self, directory, formats, name_job, filename, user)
@@ -105,8 +146,68 @@ def ingest(self, directory, formats, name_job, filename, user):

@app.route("/")
 def home():
-    return render_template("index.html", api_key_set=api_key_set, llm_choice=llm_choice,
-                           embeddings_choice=embeddings_choice)
+    return render_template("index.html", api_key_set=api_key_set, llm_choice=settings.LLM_NAME,
+                           embeddings_choice=settings.EMBEDDINGS_NAME)
+
+def complete_stream(question, docsearch, chat_history, api_key):
+    openai.api_key = api_key
+    llm = ChatOpenAI(openai_api_key=api_key)
+    docs = docsearch.similarity_search(question, k=2)
+    # join all page_content together with a newline
+    docs_together = "\n".join([doc.page_content for doc in docs])
+    p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)
+    messages_combine = [{"role": "system", "content": p_chat_combine}]
+    if len(chat_history) > 1:
+        tokens_current_history = 0
+        # count tokens in history
+        chat_history.reverse()
+        for i in chat_history:
+            if "prompt" in i and "response" in i:
+                tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
+                if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
+                    tokens_current_history += tokens_batch
+                    messages_combine.append({"role": "user", "content": i["prompt"]})
+                    messages_combine.append({"role": "system", "content": i["response"]})
+    messages_combine.append({"role": "user", "content": question})
+    completion = openai.ChatCompletion.create(model="gpt-3.5-turbo",
+                                              messages=messages_combine, stream=True, max_tokens=500, temperature=0)
+
+    for line in completion:
+        if 'content' in line['choices'][0]['delta']:
+            # check if the delta contains content
+            data = json.dumps({"answer": str(line['choices'][0]['delta']['content'])})
+            yield f"data: {data}\n\n"
+    # send data.type = "end" to indicate that the stream has ended as json
+    data = json.dumps({"type": "end"})
+    yield f"data: {data}\n\n"
+@app.route("/stream", methods=['POST', 'GET'])
+def stream():
+    # get parameter from url question
+    question = request.args.get('question')
+    history = request.args.get('history')
+    # history to json object from string
+    history = json.loads(history)
+
+    # check if active_docs is set
+
+    if not api_key_set:
+        api_key = request.args.get("api_key")
+    else:
+        api_key = settings.API_KEY
+    if not embeddings_key_set:
+        embeddings_key = request.args.get("embeddings_key")
+    else:
+        embeddings_key = settings.EMBEDDINGS_KEY
+    if "active_docs" in request.args:
+        vectorstore = get_vectorstore({"active_docs": request.args.get("active_docs")})
+    else:
+        vectorstore = ""
+    docsearch = get_docsearch(vectorstore, embeddings_key)
+
+
+    #question = "Hi"
+    return Response(complete_stream(question, docsearch,
+                                    chat_history= history, api_key=api_key), mimetype='text/event-stream')


@app.route("/api/answer", methods=["POST"])
@@ -118,85 +219,82 @@ def api_answer():
    if not api_key_set:
        api_key = data["api_key"]
    else:
-        api_key = os.getenv("API_KEY")
+        api_key = settings.API_KEY
    if not embeddings_key_set:
        embeddings_key = data["embeddings_key"]
    else:
-        embeddings_key = os.getenv("EMBEDDINGS_KEY")
+        embeddings_key = settings.EMBEDDINGS_KEY

    # use try and except  to check for exception
    try:
        # check if the vectorstore is set
-        if "active_docs" in data:
-            if data["active_docs"].split("/")[0] == "local":
-                vectorstore = "indexes/" + data["active_docs"]
-            else:
-                vectorstore = "vectors/" + data["active_docs"]
-            if data['active_docs'] == "default":
-                vectorstore = ""
-        else:
-            vectorstore = ""
-        print(vectorstore)
-        # vectorstore = "outputs/inputs/"
+        vectorstore = get_vectorstore(data)
        # loading the index and the store and the prompt template
        # Note if you have used other embeddings than OpenAI, you need to change the embeddings
-        if embeddings_choice == "openai_text-embedding-ada-002":
-            docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
-        elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
-            docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
-        elif embeddings_choice == "huggingface_hkunlp/instructor-large":
-            docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
-        elif embeddings_choice == "cohere_medium":
-            docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
-
-        # create a prompt template
-        if history:
-            history = json.loads(history)
-            template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}",
-                                                                                           history[1])
-            c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp,
-                                      template_format="jinja2")
-        else:
-            c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template,
-                                      template_format="jinja2")
+        docsearch = get_docsearch(vectorstore, embeddings_key)

        q_prompt = PromptTemplate(input_variables=["context", "question"], template=template_quest,
                                  template_format="jinja2")
-        if llm_choice == "openai_chat":
-            # llm = ChatOpenAI(openai_api_key=api_key, model_name="gpt-4")
-            llm = ChatOpenAI(openai_api_key=api_key)
-            messages_combine = [
-                SystemMessagePromptTemplate.from_template(chat_combine_template),
-                HumanMessagePromptTemplate.from_template("{question}")
-            ]
+        if settings.LLM_NAME == "openai_chat":
+            llm = ChatOpenAI(openai_api_key=api_key)  # optional parameter: model_name="gpt-4"
+            messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)]
+            if history:
+                tokens_current_history = 0
+                #count tokens in history
+                history.reverse()
+                for i in history:
+                    if "prompt" in i and "response" in i:
+                        tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
+                        if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
+                            tokens_current_history += tokens_batch
+                            messages_combine.append(HumanMessagePromptTemplate.from_template(i["prompt"]))
+                            messages_combine.append(AIMessagePromptTemplate.from_template(i["response"]))
+            messages_combine.append(HumanMessagePromptTemplate.from_template("{question}"))
+            import sys
+            print(messages_combine, file=sys.stderr)
            p_chat_combine = ChatPromptTemplate.from_messages(messages_combine)
-            messages_reduce = [
-                SystemMessagePromptTemplate.from_template(chat_reduce_template),
-                HumanMessagePromptTemplate.from_template("{question}")
-            ]
-            p_chat_reduce = ChatPromptTemplate.from_messages(messages_reduce)
-        elif llm_choice == "openai":
+        elif settings.LLM_NAME == "openai":
            llm = OpenAI(openai_api_key=api_key, temperature=0)
-        elif llm_choice == "manifest":
+        elif settings.LLM_NAME == "manifest":
            llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
-        elif llm_choice == "huggingface":
+        elif settings.LLM_NAME == "huggingface":
            llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
-        elif llm_choice == "cohere":
+        elif settings.LLM_NAME == "cohere":
            llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
+        elif settings.LLM_NAME == "gpt4all":
+            llm = GPT4All(model=settings.MODEL_PATH)
+        else:
+            raise ValueError("unknown LLM model")
+
+        if settings.LLM_NAME == "openai_chat":
+            question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
+            doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
+            chain = ConversationalRetrievalChain(
+                retriever=docsearch.as_retriever(k=2),
+                question_generator=question_generator,
+                combine_docs_chain=doc_chain,
+            )
+            chat_history = []
+            # result = chain({"question": question, "chat_history": chat_history})
+            # generate async with async generate method
+            result = run_async_chain(chain, question, chat_history)
+        elif settings.LLM_NAME == "gpt4all":
+            question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
+            doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
+            chain = ConversationalRetrievalChain(
+                retriever=docsearch.as_retriever(k=2),
+                question_generator=question_generator,
+                combine_docs_chain=doc_chain,
+            )
+            chat_history = []
+            # result = chain({"question": question, "chat_history": chat_history})
+            # generate async with async generate method
+            result = run_async_chain(chain, question, chat_history)

-        if llm_choice == "openai_chat":
-            chain = ChatVectorDBChain.from_llm(
-                llm=llm,
-                vectorstore=docsearch,
-                prompt=p_chat_combine,
-                qa_prompt=p_chat_reduce,
-                top_k_docs_for_context=3,
-                return_source_documents=False)
-            result = chain({"question": question, "chat_history": []})
        else:
            qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
-                                     combine_prompt=c_prompt, question_prompt=q_prompt)
-            chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=4)
+                                     combine_prompt=chat_combine_template, question_prompt=q_prompt)
+            chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=3)
            result = chain({"query": question})

        print(result)
@@ -207,7 +305,7 @@ def api_answer():
        result['answer'] = result['answer'].replace("\\n", "\n")
        try:
            result['answer'] = result['answer'].split("SOURCES:")[0]
-        except:
+        except Exception:
            pass

        # mock result
@@ -276,7 +374,7 @@ def api_feedback():
            "feedback": feedback
        })
    )
-    return {"status": 'ok'}
+    return {"status": http.client.responses.get(response.status_code, 'ok')}


@app.route('/api/combine', methods=['GET'])
@@ -285,7 +383,17 @@ def combined_json():
    """Provide json file with combined available indexes."""
    # get json from https://d3dg1063dc54p9.cloudfront.net/combined.json

-    data = []
+    data = [{
+        "name": 'default',
+        "language": 'default',
+        "version": '',
+        "description": 'default',
+        "fullName": 'default',
+        "date": 'default',
+        "docLink": 'default',
+        "model": settings.EMBEDDINGS_NAME,
+        "location": "local"
+    }]
    # structure: name, language, version, description, fullName, date, docLink
    # append data from vectors_collection
    for index in vectors_collection.find({'user': user}):
@@ -297,7 +405,7 @@ def combined_json():
            "fullName": index['name'],
            "date": index['date'],
            "docLink": index['location'],
-            "model": embeddings_choice,
+            "model": settings.EMBEDDINGS_NAME,
            "location": "local"
        })

@@ -335,7 +443,7 @@ def upload_file():
            os.makedirs(save_dir)

        file.save(os.path.join(save_dir, filename))
-        task = ingest.delay('temp', [".rst", ".md", ".pdf"], job_name, filename, user)
+        task = ingest.delay('temp', [".rst", ".md", ".pdf", ".txt"], job_name, filename, user)
        # task id
        task_id = task.id
        return {"status": 'ok', "task_id": task_id}
@@ -388,7 +496,7 @@ def upload_index_files():
        "language": job_name,
        "location": save_dir,
        "date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
-        "model": embeddings_choice,
+        "model": settings.EMBEDDINGS_NAME,
        "type": "local"
    })
    return {"status": 'ok'}
--- a/application/celeryconfig.py
+++ b/application/celeryconfig.py
@@ -0,0 +1,8 @@
+import os
+
+broker_url = os.getenv("CELERY_BROKER_URL")
+result_backend = os.getenv("CELERY_RESULT_BACKEND")
+
+task_serializer = 'json'
+result_serializer = 'json'
+accept_content = ['json']
--- a/application/core/init.py
+++ b/application/core/init.py
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -0,0 +1,22 @@
+from pathlib import Path
+
+from pydantic import BaseSettings
+
+
+class Settings(BaseSettings):
+    LLM_NAME: str = "openai_chat"
+    EMBEDDINGS_NAME: str = "openai_text-embedding-ada-002"
+    CELERY_BROKER_URL: str = "redis://localhost:6379/0"
+    CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
+    MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
+    MODEL_PATH: str = "./models/gpt4all-model.bin"
+    TOKENS_MAX_HISTORY: int = 150
+
+    API_URL: str = "http://localhost:5001"  # backend url for celery worker
+
+    API_KEY: str = None  # LLM api key
+    EMBEDDINGS_KEY: str = None  # api key for embeddings (if using openai, just copy API_KEY
+
+
+path = Path(__file__).parent.parent.absolute()
+settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
--- a/application/error.py
+++ b/application/error.py
@@ -1,13 +1,15 @@
 from flask import jsonify
 from werkzeug.http import HTTP_STATUS_CODES

-def response_error(code_status,message=None):
-    payload = {'error':HTTP_STATUS_CODES.get(code_status,"something went wrong")}
+
+def response_error(code_status, message=None):
+    payload = {'error': HTTP_STATUS_CODES.get(code_status, "something went wrong")}
    if message:
        payload['message'] = message
    response = jsonify(payload)
    response.status_code = code_status
    return response

-def bad_request(status_code=400,message=''):
-    return response_error(code_status=status_code,message=message)
+
+def bad_request(status_code=400, message=''):
+    return response_error(code_status=status_code, message=message)
--- a/application/parser/file/base.py
+++ b/application/parser/file/base.py
@@ -3,7 +3,6 @@ from abc import abstractmethod
 from typing import Any, List

 from langchain.docstore.document import Document as LCDocument
-
 from parser.schema.base import Document


--- a/application/parser/file/bulk.py
+++ b/application/parser/file/bulk.py
@@ -52,17 +52,17 @@ class SimpleDirectoryReader(BaseReader):
    """

    def __init__(
-        self,
-        input_dir: Optional[str] = None,
-        input_files: Optional[List] = None,
-        exclude_hidden: bool = True,
-        errors: str = "ignore",
-        recursive: bool = True,
-        required_exts: Optional[List[str]] = None,
-        file_extractor: Optional[Dict[str, BaseParser]] = None,
-        num_files_limit: Optional[int] = None,
-        file_metadata: Optional[Callable[[str], Dict]] = None,
-        chunk_size_max: int = 2048,
+            self,
+            input_dir: Optional[str] = None,
+            input_files: Optional[List] = None,
+            exclude_hidden: bool = True,
+            errors: str = "ignore",
+            recursive: bool = True,
+            required_exts: Optional[List[str]] = None,
+            file_extractor: Optional[Dict[str, BaseParser]] = None,
+            num_files_limit: Optional[int] = None,
+            file_metadata: Optional[Callable[[str], Dict]] = None,
+            chunk_size_max: int = 2048,
    ) -> None:
        """Initialize with parameters."""
        super().__init__()
@@ -102,8 +102,8 @@ class SimpleDirectoryReader(BaseReader):
            elif self.exclude_hidden and input_file.name.startswith("."):
                continue
            elif (
-                self.required_exts is not None
-                and input_file.suffix not in self.required_exts
+                    self.required_exts is not None
+                    and input_file.suffix not in self.required_exts
            ):
                continue
            else:
@@ -114,7 +114,7 @@ class SimpleDirectoryReader(BaseReader):
            new_input_files.extend(sub_input_files)

        if self.num_files_limit is not None and self.num_files_limit > 0:
-            new_input_files = new_input_files[0 : self.num_files_limit]
+            new_input_files = new_input_files[0: self.num_files_limit]

        # print total number of files added
        logging.debug(
--- a/application/parser/file/html_parser.py
+++ b/application/parser/file/html_parser.py
@@ -9,6 +9,7 @@ from typing import Dict, Union

 from parser.file.base_parser import BaseParser

+
 class HTMLParser(BaseParser):
    """HTML parser."""

@@ -23,38 +24,37 @@ class HTMLParser(BaseParser):
            Union[str, List[str]]: a string or a List of strings.
        """
        try:
-            import unstructured
+            from unstructured.partition.html import partition_html
+            from unstructured.staging.base import convert_to_isd
+            from unstructured.cleaners.core import clean
        except ImportError:
            raise ValueError("unstructured package is required to parse HTML files.")
-        from unstructured.partition.html import partition_html
-        from unstructured.staging.base import convert_to_isd
-        from unstructured.cleaners.core import clean

        # Using the unstructured library to convert the html to isd format
        # isd sample : isd = [
-                            #   {"text": "My Title", "type": "Title"},
-                            #   {"text": "My Narrative", "type": "NarrativeText"}
-                            # ]
+        #   {"text": "My Title", "type": "Title"},
+        #   {"text": "My Narrative", "type": "NarrativeText"}
+        # ]
        with open(file, "r", encoding="utf-8") as fp:
            elements = partition_html(file=fp)
-            isd = convert_to_isd(elements)  
+            isd = convert_to_isd(elements)

-        # Removing non ascii charactwers from isd_el['text']
+            # Removing non ascii charactwers from isd_el['text']
        for isd_el in isd:
            isd_el['text'] = isd_el['text'].encode("ascii", "ignore").decode()

        # Removing all the \n characters from isd_el['text'] using regex and replace with single space
        # Removing all the extra spaces  from isd_el['text'] using regex and replace with single space
        for isd_el in isd:
-            isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE|re.DOTALL)
-            isd_el['text'] = re.sub(r"\s{2,}"," ", isd_el['text'], flags=re.MULTILINE|re.DOTALL)
+            isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
+            isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)

        # more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
        for isd_el in isd:
-            clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True )
+            clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)

        # Creating a list of all the indexes of isd_el['type'] = 'Title'
-        title_indexes = [i for i,isd_el in enumerate(isd) if isd_el['type'] == 'Title']
+        title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']

        # Creating 'Chunks' - List of lists of strings 
        # each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
@@ -64,19 +64,20 @@ class HTMLParser(BaseParser):
        Chunks = [[]]
        final_chunks = list(list())

-        for i,isd_el in enumerate(isd):
+        for i, isd_el in enumerate(isd):
            if i in title_indexes:
                Chunks.append([])
            Chunks[-1].append(isd_el['text'])

-        # Removing all the chunks with sum of lenth of all the strings in the chunk < 25 #TODO: This value can be an user defined variable
+        # Removing all the chunks with sum of lenth of all the strings in the chunk < 25
+        # TODO: This value can be an user defined variable
        for chunk in Chunks:
            # sum of lenth of all the strings in the chunk
            sum = 0
            sum += len(str(chunk))
            if sum < 25:
                Chunks.remove(chunk)
-            else :         
+            else:
                # appending all the approved chunks to final_chunks as a single string       
                final_chunks.append(" ".join([str(item) for item in chunk]))
        return final_chunks
--- a/application/parser/file/markdown_parser.py
+++ b/application/parser/file/markdown_parser.py
@@ -7,8 +7,8 @@ import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Union, cast

-from parser.file.base_parser import BaseParser
 import tiktoken
+from parser.file.base_parser import BaseParser


 class MarkdownParser(BaseParser):
@@ -20,13 +20,13 @@ class MarkdownParser(BaseParser):
    """

    def __init__(
-        self,
-        *args: Any,
-        remove_hyperlinks: bool = True,
-        remove_images: bool = True,
-        max_tokens: int = 2048,
-        # remove_tables: bool = True,
-        **kwargs: Any,
+            self,
+            *args: Any,
+            remove_hyperlinks: bool = True,
+            remove_images: bool = True,
+            max_tokens: int = 2048,
+            # remove_tables: bool = True,
+            **kwargs: Any,
    ) -> None:
        """Init params."""
        super().__init__(*args, **kwargs)
@@ -35,8 +35,8 @@ class MarkdownParser(BaseParser):
        self._max_tokens = max_tokens
        # self._remove_tables = remove_tables

-
-    def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str], current_text: str):
+    def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str],
+                          current_text: str):
        """Append to tups chunk."""
        num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(current_text))
        if num_tokens > self._max_tokens:
@@ -46,6 +46,7 @@ class MarkdownParser(BaseParser):
        else:
            tups.append((current_header, current_text))
        return tups
+
    def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
        """Convert a markdown file to a dictionary.

@@ -115,7 +116,7 @@ class MarkdownParser(BaseParser):
        return {}

    def parse_tups(
-        self, filepath: Path, errors: str = "ignore"
+            self, filepath: Path, errors: str = "ignore"
    ) -> List[Tuple[Optional[str], str]]:
        """Parse file into tuples."""
        with open(filepath, "r") as f:
@@ -130,7 +131,7 @@ class MarkdownParser(BaseParser):
        return markdown_tups

    def parse_file(
-        self, filepath: Path, errors: str = "ignore"
+            self, filepath: Path, errors: str = "ignore"
    ) -> Union[str, List[str]]:
        """Parse file into string."""
        tups = self.parse_tups(filepath, errors=errors)
--- a/application/parser/file/rst_parser.py
+++ b/application/parser/file/rst_parser.py
@@ -5,10 +5,10 @@ Contains parser for md files.
 """
 import re
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union, cast
+from typing import Any, Dict, List, Optional, Tuple, Union

 from parser.file.base_parser import BaseParser
-import tiktoken
+

 class RstParser(BaseParser):
    """reStructuredText parser.
@@ -19,17 +19,17 @@ class RstParser(BaseParser):
    """

    def __init__(
-        self,
-        *args: Any,
-        remove_hyperlinks: bool = True,
-        remove_images: bool = True,
-        remove_table_excess: bool = True,
-        remove_interpreters: bool = True,
-        remove_directives: bool = True,
-        remove_whitespaces_excess: bool = True,
-        #Be carefull with remove_characters_excess, might cause data loss
-        remove_characters_excess: bool = True,
-        **kwargs: Any,
+            self,
+            *args: Any,
+            remove_hyperlinks: bool = True,
+            remove_images: bool = True,
+            remove_table_excess: bool = True,
+            remove_interpreters: bool = True,
+            remove_directives: bool = True,
+            remove_whitespaces_excess: bool = True,
+            # Be carefull with remove_characters_excess, might cause data loss
+            remove_characters_excess: bool = True,
+            **kwargs: Any,
    ) -> None:
        """Init params."""
        super().__init__(*args, **kwargs)
@@ -41,7 +41,6 @@ class RstParser(BaseParser):
        self._remove_whitespaces_excess = remove_whitespaces_excess
        self._remove_characters_excess = remove_characters_excess

-
    def rst_to_tups(self, rst_text: str) -> List[Tuple[Optional[str], str]]:
        """Convert a reStructuredText file to a dictionary.

@@ -56,7 +55,8 @@ class RstParser(BaseParser):

        for i, line in enumerate(lines):
            header_match = re.match(r"^[^\S\n]*[-=]+[^\S\n]*$", line)
-            if header_match and i > 0 and (len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
+            if header_match and i > 0 and (
+                    len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
                if current_header is not None:
                    if current_text == "" or None:
                        continue
@@ -72,7 +72,7 @@ class RstParser(BaseParser):

        rst_tups.append((current_header, current_text))

-        #TODO: Format for rst
+        # TODO: Format for rst
        #
        # if current_header is not None:
        #     # pass linting, assert keys are defined
@@ -136,7 +136,7 @@ class RstParser(BaseParser):
        return {}

    def parse_tups(
-        self, filepath: Path, errors: str = "ignore"
+            self, filepath: Path, errors: str = "ignore"
    ) -> List[Tuple[Optional[str], str]]:
        """Parse file into tuples."""
        with open(filepath, "r") as f:
@@ -159,7 +159,7 @@ class RstParser(BaseParser):
        return rst_tups

    def parse_file(
-        self, filepath: Path, errors: str = "ignore"
+            self, filepath: Path, errors: str = "ignore"
    ) -> Union[str, List[str]]:
        """Parse file into string."""
        tups = self.parse_tups(filepath, errors=errors)
--- a/application/parser/file/tabular_parser.py
+++ b/application/parser/file/tabular_parser.py
@@ -77,13 +77,13 @@ class PandasCSVParser(BaseParser):
    """

    def __init__(
-        self,
-        *args: Any,
-        concat_rows: bool = True,
-        col_joiner: str = ", ",
-        row_joiner: str = "\n",
-        pandas_config: dict = {},
-        **kwargs: Any
+            self,
+            *args: Any,
+            concat_rows: bool = True,
+            col_joiner: str = ", ",
+            row_joiner: str = "\n",
+            pandas_config: dict = {},
+            **kwargs: Any
    ) -> None:
        """Init params."""
        super().__init__(*args, **kwargs)
--- a/application/parser/java2doc.py
+++ b/application/parser/java2doc.py
@@ -1,6 +1,8 @@
 import os
+
 import javalang

+
 def find_files(directory):
    files_list = []
    for root, dirs, files in os.walk(directory):
@@ -9,6 +11,7 @@ def find_files(directory):
                files_list.append(os.path.join(root, file))
    return files_list

+
 def extract_functions(file_path):
    with open(file_path, "r") as file:
        java_code = file.read()
@@ -28,6 +31,7 @@ def extract_functions(file_path):
            methods[method_name] = method_source_code
    return methods

+
 def extract_classes(file_path):
    with open(file_path, 'r') as file:
        source_code = file.read()
@@ -47,6 +51,7 @@ def extract_classes(file_path):
            classes[class_name] = class_string
    return classes

+
 def extract_functions_and_classes(directory):
    files = find_files(directory)
    functions_dict = {}
@@ -58,4 +63,4 @@ def extract_functions_and_classes(directory):
        classes = extract_classes(file)
        if classes:
            classes_dict[file] = classes
-    return functions_dict, classes_dict
+    return functions_dict, classes_dict
--- a/application/parser/js2doc.py
+++ b/application/parser/js2doc.py
@@ -1,6 +1,7 @@
 import os
-import esprima
+
 import escodegen
+import esprima


 def find_files(directory):
@@ -11,6 +12,7 @@ def find_files(directory):
                files_list.append(os.path.join(root, file))
    return files_list

+
 def extract_functions(file_path):
    with open(file_path, 'r') as file:
        source_code = file.read()
@@ -26,7 +28,6 @@ def extract_functions(file_path):
                        func_name = declaration.id.name if declaration.id else '<anonymous>'
                        functions[func_name] = escodegen.generate(declaration.init)
            elif node.type == 'ClassDeclaration':
-                class_name = node.id.name
                for subnode in node.body.body:
                    if subnode.type == 'MethodDefinition':
                        func_name = subnode.key.name
@@ -38,6 +39,7 @@ def extract_functions(file_path):
                                functions[func_name] = escodegen.generate(declaration.init)
        return functions

+
 def extract_classes(file_path):
    with open(file_path, 'r') as file:
        source_code = file.read()
@@ -53,6 +55,7 @@ def extract_classes(file_path):
                classes[class_name] = ", ".join(function_names)
    return classes

+
 def extract_functions_and_classes(directory):
    files = find_files(directory)
    functions_dict = {}
--- a/application/parser/open_ai_func.py
+++ b/application/parser/open_ai_func.py
@@ -1,32 +1,32 @@
 import os
-import faiss
-import pickle
+
 import tiktoken
-from langchain.vectorstores import FAISS
 from langchain.embeddings import OpenAIEmbeddings
-
-#from langchain.embeddings import HuggingFaceEmbeddings
-#from langchain.embeddings import HuggingFaceInstructEmbeddings
-#from langchain.embeddings import CohereEmbeddings
-
+from langchain.vectorstores import FAISS
 from retry import retry


+# from langchain.embeddings import HuggingFaceEmbeddings
+# from langchain.embeddings import HuggingFaceInstructEmbeddings
+# from langchain.embeddings import CohereEmbeddings
+

 def num_tokens_from_string(string: str, encoding_name: str) -> int:
-# Function to convert string to tokens and estimate user cost.
+    # Function to convert string to tokens and estimate user cost.
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
-    total_price = ((num_tokens/1000) * 0.0004)
+    total_price = ((num_tokens / 1000) * 0.0004)
    return num_tokens, total_price

+
@retry(tries=10, delay=60)
 def store_add_texts_with_retry(store, i):
    store.add_texts([i.page_content], metadatas=[i.metadata])
-    #store_pine.add_texts([i.page_content], metadatas=[i.metadata])
+    # store_pine.add_texts([i.page_content], metadatas=[i.metadata])
+

 def call_openai_api(docs, folder_name, task_status):
-# Function to create a vector store from the documents and save it to disk.
+    # Function to create a vector store from the documents and save it to disk.

    # create output folder if it doesn't exist
    if not os.path.exists(f"{folder_name}"):
@@ -44,7 +44,8 @@ def call_openai_api(docs, folder_name, task_status):
    # hf = HuggingFaceEmbeddings(model_name=model_name)
    # store = FAISS.from_documents(docs_test, hf)
    s1 = len(docs)
-    for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs), bar_format='{l_bar}{bar}| Time Left: {remaining}'):
+    for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs),
+                  bar_format='{l_bar}{bar}| Time Left: {remaining}'):
        try:
            task_status.update_state(state='PROGRESS', meta={'current': int((c1 / s1) * 100)})
            store_add_texts_with_retry(store, i)
@@ -58,20 +59,20 @@ def call_openai_api(docs, folder_name, task_status):
        c1 += 1
    store.save_local(f"{folder_name}")

+
 def get_user_permission(docs, folder_name):
-# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
+    # Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
    # Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
-    #docs_content = (" ".join(docs))
+    # docs_content = (" ".join(docs))
    docs_content = ""
    for doc in docs:
        docs_content += doc.page_content

-
    tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
    # Here we print the number of tokens and the approx user cost with some visually appealing formatting.
    print(f"Number of Tokens = {format(tokens, ',d')}")
    print(f"Approx Cost = ${format(total_price, ',.2f')}")
-    #Here we check for user permission before calling the API.
+    # Here we check for user permission before calling the API.
    user_input = input("Price Okay? (Y/N) \n").lower()
    if user_input == "y":
        call_openai_api(docs, folder_name)
--- a/application/parser/py2doc.py
+++ b/application/parser/py2doc.py
@@ -1,10 +1,12 @@
-import os
 import ast
-import tiktoken
+import os
 from pathlib import Path
+
+import tiktoken
 from langchain.llms import OpenAI
 from langchain.prompts import PromptTemplate

+
 def find_files(directory):
    files_list = []
    for root, dirs, files in os.walk(directory):
@@ -13,6 +15,7 @@ def find_files(directory):
                files_list.append(os.path.join(root, file))
    return files_list

+
 def extract_functions(file_path):
    with open(file_path, 'r') as file:
        source_code = file.read()
@@ -25,6 +28,7 @@ def extract_functions(file_path):
                functions[func_name] = func_def
    return functions

+
 def extract_classes(file_path):
    with open(file_path, 'r') as file:
        source_code = file.read()
@@ -40,6 +44,7 @@ def extract_classes(file_path):
                classes[class_name] = ", ".join(function_names)
    return classes

+
 def extract_functions_and_classes(directory):
    files = find_files(directory)
    functions_dict = {}
@@ -53,11 +58,12 @@ def extract_functions_and_classes(directory):
            classes_dict[file] = classes
    return functions_dict, classes_dict

+
 def parse_functions(functions_dict, formats, dir):
    c1 = len(functions_dict)
    for i, (source, functions) in enumerate(functions_dict.items(), start=1):
        print(f"Processing file {i}/{c1}")
-        source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
+        source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
        subfolders = "/".join(source_w.split("/")[:-1])
        Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
        for j, (name, function) in enumerate(functions.items(), start=1):
@@ -70,18 +76,19 @@ def parse_functions(functions_dict, formats, dir):
            response = llm(prompt.format(code=function))
            mode = "a" if Path(f"outputs/{source_w}").exists() else "w"
            with open(f"outputs/{source_w}", mode) as f:
-                f.write(f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
+                f.write(
+                    f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")


 def parse_classes(classes_dict, formats, dir):
    c1 = len(classes_dict)
    for i, (source, classes) in enumerate(classes_dict.items()):
-        print(f"Processing file {i+1}/{c1}")
-        source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
+        print(f"Processing file {i + 1}/{c1}")
+        source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
        subfolders = "/".join(source_w.split("/")[:-1])
        Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
        for name, function_names in classes.items():
-            print(f"Processing Class {i+1}/{c1}")
+            print(f"Processing Class {i + 1}/{c1}")
            prompt = PromptTemplate(
                input_variables=["class_name", "functions_names"],
                template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
@@ -92,6 +99,7 @@ def parse_classes(classes_dict, formats, dir):
            with open(f"outputs/{source_w}", "a" if Path(f"outputs/{source_w}").exists() else "w") as f:
                f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")

+
 def transform_to_docs(functions_dict, classes_dict, formats, dir):
    docs_content = ''.join([str(key) + str(value) for key, value in functions_dict.items()])
    docs_content += ''.join([str(key) + str(value) for key, value in classes_dict.items()])
@@ -110,4 +118,4 @@ def transform_to_docs(functions_dict, classes_dict, formats, dir):
        parse_classes(classes_dict, formats, dir)
        print("All done!")
    else:
-        print("The API was not called. No money was spent.")
+        print("The API was not called. No money was spent.")
--- a/application/parser/schema/base.py
+++ b/application/parser/schema/base.py
@@ -2,7 +2,6 @@
 from dataclasses import dataclass

 from langchain.docstore.document import Document as LCDocument
-
 from parser.schema.schema import BaseDocument


--- a/application/parser/token_func.py
+++ b/application/parser/token_func.py
@@ -1,9 +1,9 @@
 import re
-import tiktoken
-
-from typing import List
-from parser.schema.base import Document
 from math import ceil
+from typing import List
+
+import tiktoken
+from parser.schema.base import Document


 def separate_header_and_body(text):
@@ -13,6 +13,7 @@ def separate_header_and_body(text):
    body = text[len(header):]
    return header, body

+
 def group_documents(documents: List[Document], min_tokens: int, max_tokens: int) -> List[Document]:
    docs = []
    current_group = None
@@ -23,7 +24,8 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
        if current_group is None:
            current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
                                     extra_info=doc.extra_info)
-        elif len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
+        elif len(tiktoken.get_encoding("cl100k_base").encode(
+                current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
            current_group.text += " " + doc.text
        else:
            docs.append(current_group)
@@ -35,6 +37,7 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)

    return docs

+
 def split_documents(documents: List[Document], max_tokens: int) -> List[Document]:
    docs = []
    for doc in documents:
@@ -54,17 +57,18 @@ def split_documents(documents: List[Document], max_tokens: int) -> List[Document
                docs.append(new_doc)
    return docs

+
 def group_split(documents: List[Document], max_tokens: int = 2000, min_tokens: int = 150, token_check: bool = True):
-    if token_check == False:
+    if not token_check:
        return documents
    print("Grouping small documents")
    try:
        documents = group_documents(documents=documents, min_tokens=min_tokens, max_tokens=max_tokens)
-    except:
+    except Exception:
        print("Grouping failed, try running without token_check")
    print("Separating large documents")
    try:
        documents = split_documents(documents=documents, max_tokens=max_tokens)
-    except:
+    except Exception:
        print("Grouping failed, try running without token_check")
    return documents
--- a/application/prompts/chat_combine_prompt.txt
+++ b/application/prompts/chat_combine_prompt.txt
@@ -1,4 +1,9 @@
 You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible.
-Use the following pieces of context to help answer the users question.
+Use the following pieces of context to help answer the users question. If its not relevant to the question, provide friendly responses.
+You have access to chat history, and can use it to help answer the question.
+When using code examples, use the following format:
+```(language)
+(code)
+```
 ----------------
 {summaries}
--- a/application/prompts/chat_reduce_prompt.txt
+++ b/application/prompts/chat_reduce_prompt.txt
@@ -1,3 +1,3 @@
-Use the following portion of a long document to see if any of the text is relevant to answer the question.
-{context}
-Provide all relevant text to the question verbatim. Summarize if needed. If nothing relevant return "-".
+Use the following pieces of context to help answer the users question. If its not relevant to the question, respond with "-"
+----------------
+{context}
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -8,8 +8,8 @@ async-timeout==4.0.2
 attrs==22.2.0
 billiard==3.6.4.0
 blobfile==2.0.1
-boto3==1.26.84
-botocore==1.29.84
+boto3==1.26.102
+botocore==1.29.102
 cffi==1.15.1
 charset-normalizer==3.1.0
 click==8.1.3
@@ -27,8 +27,11 @@ entrypoints==0.4
 faiss-cpu==1.7.3
 filelock==3.9.0
 Flask==2.2.3
+Flask-Cors==3.0.10
 frozenlist==1.3.3
+geojson==2.5.0
 greenlet==2.0.2
+gpt4all==0.1.7
 hub==3.0.1
 huggingface-hub==0.12.1
 humbug==0.2.8
@@ -38,14 +41,17 @@ Jinja2==3.1.2
 jmespath==1.0.1
 joblib==1.2.0
 kombu==5.2.4
-langchain==0.0.118
+langchain==0.0.179
+loguru==0.6.0
 lxml==4.9.2
 MarkupSafe==2.1.2
 marshmallow==3.19.0
 marshmallow-enum==1.5.1
+mpmath==1.3.0
 multidict==6.0.4
 multiprocess==0.70.14
 mypy-extensions==1.0.0
+networkx==3.0
 nltk==3.8.1
 numcodecs==0.11.0
 numpy==1.24.2
@@ -64,29 +70,37 @@ pycryptodomex==3.17
 pydantic==1.10.5
 PyJWT==2.6.0
 pymongo==4.3.3
+pyowm==3.3.0
 PyPDF2==3.0.1
+PySocks==1.7.1
 python-dateutil==2.8.2
 python-dotenv==1.0.0
 python-jose==3.3.0
 pytz==2022.7.1
 PyYAML==6.0
-redis==4.5.2
+redis==4.5.4
 regex==2022.10.31
 requests==2.28.2
 retry==0.9.2
 rsa==4.9
 s3transfer==0.6.0
+scikit-learn==1.2.2
+scipy==1.10.1
+sentence-transformers==2.2.2
+sentencepiece==0.1.97
 six==1.16.0
 SQLAlchemy==1.4.46
+sympy==1.11.1
 tenacity==8.2.2
-tiktoken==0.3.0
-tokenizers==0.13.2
+threadpoolctl==3.1.0
+torch==2.0.0
+torchvision==0.15.1
 tqdm==4.65.0
-transformers==4.26.1
+transformers==4.27.2
+typer==0.7.0
 typing-inspect==0.8.0
 typing_extensions==4.5.0
 urllib3==1.26.14
 vine==5.0.0
 wcwidth==0.2.6
-Werkzeug==2.2.3
 yarl==1.8.2
--- a/application/worker.py
+++ b/application/worker.py
@@ -1,28 +1,31 @@
-import requests
-import nltk
 import os
-
-from parser.file.bulk import SimpleDirectoryReader
-from parser.schema.base import Document
-from parser.open_ai_func import call_openai_api
-from parser.token_func import group_split
-from celery import current_task
-
-
+import shutil
 import string
 import zipfile
-import shutil
+from urllib.parse import urljoin
+
+import nltk
+import requests
+
+from core.settings import settings
+from parser.file.bulk import SimpleDirectoryReader
+from parser.open_ai_func import call_openai_api
+from parser.schema.base import Document
+from parser.token_func import group_split

 try:
    nltk.download('punkt', quiet=True)
    nltk.download('averaged_perceptron_tagger', quiet=True)
 except FileExistsError:
    pass
+
+def metadata_from_filename(title):
+    return {'title': title}
+
 def generate_random_string(length):
    return ''.join([string.ascii_letters[i % 52] for i in range(length)])


-
 def ingest_worker(self, directory, formats, name_job, filename, user):
    # directory = 'inputs' or 'temp'
    # formats = [".rst", ".md"]
@@ -39,12 +42,8 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
    max_tokens = 1250
    full_path = directory + '/' + user + '/' + name_job
    # check if API_URL env variable is set
-    if not os.environ.get('API_URL'):
-        url = 'http://localhost:5001/api/download'
-    else:
-        url = os.environ.get('API_URL') + '/api/download'
    file_data = {'name': name_job, 'file': filename, 'user': user}
-    response = requests.get(url, params=file_data)
+    response = requests.get(urljoin(settings.API_URL, "/api/download"), params=file_data)
    file = response.content

    if not os.path.exists(full_path):
@@ -52,19 +51,17 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
    with open(full_path + '/' + filename, 'wb') as f:
        f.write(file)

-    #check if file is .zip and extract it
+    # check if file is .zip and extract it
    if filename.endswith('.zip'):
        with zipfile.ZipFile(full_path + '/' + filename, 'r') as zip_ref:
            zip_ref.extractall(full_path)
        os.remove(full_path + '/' + filename)

-
-    import time
    self.update_state(state='PROGRESS', meta={'current': 1})

    raw_docs = SimpleDirectoryReader(input_dir=full_path, input_files=input_files, recursive=recursive,
                                     required_exts=formats, num_files_limit=limit,
-                                     exclude_hidden=exclude).load_data()
+                                     exclude_hidden=exclude, file_metadata=metadata_from_filename).load_data()
    raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)

    docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
@@ -72,28 +69,26 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
    call_openai_api(docs, full_path, self)
    self.update_state(state='PROGRESS', meta={'current': 100})

-    if sample == True:
+    if sample:
        for i in range(min(5, len(raw_docs))):
            print(raw_docs[i].text)

    # get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
    # and send them to the server (provide user and name in form)
-    if not os.environ.get('API_URL'):
-        url = 'http://localhost:5001/api/upload_index'
-    else:
-        url = os.environ.get('API_URL') + '/api/upload_index'
    file_data = {'name': name_job, 'user': user}
    files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
             'file_pkl': open(full_path + '/index.pkl', 'rb')}
-    response = requests.post(url, files=files, data=file_data)
+    response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)

-    #deletes remote
-    if not os.environ.get('API_URL'):
-        url = 'http://localhost:5001/api/delete_old?path=' + 'inputs/' + user + '/' + name_job
-    else:
-        url = os.environ.get('API_URL') + '/api/delete_old?path=' + 'inputs/' + user + '/' + name_job
-    response = requests.get(url)
+    response = requests.get(urljoin(settings.API_URL, "/api/delete_old?path="))
    # delete local
    shutil.rmtree(full_path)

-    return {'directory': directory, 'formats': formats, 'name_job': name_job, 'filename': filename, 'user': user}
+    return {
+        'directory': directory,
+        'formats': formats,
+        'name_job': name_job,
+        'filename': filename,
+        'user': user,
+        'limited': False
+    }
--- a/application/wsgi.py
+++ b/application/wsgi.py
@@ -1,4 +1,4 @@
 from app import app

 if __name__ == "__main__":
-    app.run()
+    app.run(debug=True, port=5001)
--- a/docker-compose-dev.yaml
+++ b/docker-compose-dev.yaml
@@ -0,0 +1,20 @@
+version: "3.9"
+
+services:
+
+  redis:
+    image: redis:6-alpine
+    ports:
+      - 6379:6379
+
+  mongo:
+    image: mongo:6
+    ports:
+      - 27017:27017
+    volumes:
+      - mongodb_data_container:/data/db
+
+
+
+volumes:
+  mongodb_data_container:
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -5,23 +5,26 @@ services:
    build: ./frontend
    environment:
      - VITE_API_HOST=http://localhost:5001
+      - VITE_API_STREAMING=$VITE_API_STREAMING
    ports:
      - "5173:5173"
    depends_on:
-        - backend
+      - backend

  backend:
    build: ./application
    environment:
-      - API_KEY=<your_api_key>
-      - EMBEDDINGS_KEY=<your_api_key>
+      - API_KEY=$OPENAI_API_KEY
+      - EMBEDDINGS_KEY=$OPENAI_API_KEY
      - CELERY_BROKER_URL=redis://redis:6379/0
      - CELERY_RESULT_BACKEND=redis://redis:6379/1
      - MONGO_URI=mongodb://mongo:27017/docsgpt
    ports:
      - "5001:5001"
    volumes:
-      - app_data_container:/app
+      - ./application/indexes:/app/indexes
+      - ./application/inputs:/app/inputs
+      - ./application/vectors:/app/vectors
    depends_on:
        - redis
        - mongo
@@ -30,8 +33,8 @@ services:
    build: ./application
    command: celery -A app.celery worker -l INFO
    environment:
-      - API_KEY=<your_api_key>
-      - EMBEDDINGS_KEY=<your_api_key>
+      - API_KEY=$OPENAI_API_KEY
+      - EMBEDDINGS_KEY=$OPENAI_API_KEY
      - CELERY_BROKER_URL=redis://redis:6379/0
      - CELERY_RESULT_BACKEND=redis://redis:6379/1
      - MONGO_URI=mongodb://mongo:27017/docsgpt
@@ -55,5 +58,4 @@ services:


 volumes:
-  mongodb_data_container:
-  app_data_container:
+  mongodb_data_container:
--- a/extensions/chatwoot/app.py
+++ b/extensions/chatwoot/app.py
@@ -1,18 +1,20 @@
-import requests
-import dotenv
 import os
-import json
 import pprint

+import dotenv
+import requests
+from flask import Flask, request
+
 dotenv.load_dotenv()
 docsgpt_url = os.getenv("docsgpt_url")
 chatwoot_url = os.getenv("chatwoot_url")
 docsgpt_key = os.getenv("docsgpt_key")
 chatwoot_token = os.getenv("chatwoot_token")
-#account_id = os.getenv("account_id")
-#assignee_id = os.getenv("assignee_id")
+# account_id = os.getenv("account_id")
+# assignee_id = os.getenv("assignee_id")
 label_stop = "human-requested"

+
 def send_to_bot(sender, message):
    data = {
        'sender': sender,
@@ -43,7 +45,6 @@ def send_to_chatwoot(account, conversation, message):
    return r.json()


-from flask import Flask, request
 app = Flask(__name__)


@@ -74,7 +75,7 @@ def docsgpt():
    # elif str(assignee) != str(assignee_id):
    #     return "Not the right assignee"

-    if(message_type == "incoming"):
+    if (message_type == "incoming"):
        bot_response = send_to_bot(contact, message)
        create_message = send_to_chatwoot(
            account, conversation, bot_response)
@@ -83,5 +84,6 @@ def docsgpt():

    return create_message

+
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=80)
+    app.run(host='0.0.0.0', port=80)
--- a/extensions/discord/bot.py
+++ b/extensions/discord/bot.py
@@ -10,7 +10,7 @@ dotenv.load_dotenv()

 # Replace 'YOUR_BOT_TOKEN' with your bot's token
 TOKEN = os.getenv("DISCORD_TOKEN")
-PREFIX = '@docsgpt '
+PREFIX = '@DocsGPT'
 BASE_API_URL = 'http://localhost:5001'

 intents = discord.Intents.default()
@@ -20,13 +20,11 @@ bot = commands.Bot(command_prefix=PREFIX, intents=intents)


 def split_string(input_str):
-    pattern = r'<(.*?)>'
-    match = re.search(pattern, input_str)
-
+    pattern = r'^<@!?{0}>\s*'.format(bot.user.id)
+    match = re.match(pattern, input_str)
    if match:
-        content = match.group(1)
-        rest = input_str[:match.start()] + input_str[match.end():]
-        return content, rest.strip()
+        content = input_str[match.end():].strip()
+        return str(bot.user.id), content
    return None, input_str


@@ -59,8 +57,8 @@ async def on_message(message):
    if prefix is None:
        return

-    part_prefix = "@"
-    if part_prefix in prefix:
+    part_prefix = str(bot.user.id)
+    if part_prefix == prefix:
        answer = await fetch_answer(content)
        await message.channel.send(answer)

--- a/extensions/web-widget/README.md
+++ b/extensions/web-widget/README.md
@@ -0,0 +1,25 @@
+# Chat Widget
+
+A simple chat widget that can be easily integrated into any website.
+
+## Installation
+
+1. Host the `widget.html`, `styles.css`, and `script.js` files from the `src` folder on your own server or a Content Delivery Network (CDN). Make sure to note the URLs for these files.
+
+2. Update the URLs in the `dist/chat-widget.js` file to match the locations of your hosted files:
+
+   ```javascript
+   fetch("https://your-server-or-cdn.com/path/to/widget.html"),
+   fetch("https://your-server-or-cdn.com/path/to/styles.css"),
+   fetch("https://your-server-or-cdn.com/path/to/script.js"),
+    ```
+   
+3. Host the `dist/chat-widget.js` file on your own server or a Content Delivery Network (CDN). Make sure to note the URL for this file.
+
+
+##Integration
+
+To integrate the chat widget into a website, add the following script tag to the HTML file, replacing URL_TO_CHAT_WIDGET_JS with the actual URL of your hosted chat-widget.js file:
+```javascript
+<script src="URL_TO_CHAT_WIDGET_JS"></script>
+```
--- a/extensions/web-widget/dist/chat-widget.js
+++ b/extensions/web-widget/dist/chat-widget.js
@@ -0,0 +1,41 @@
+(async function () {
+  // Fetch the HTML, CSS, and JavaScript from your server or CDN
+  const [htmlRes, jsRes] = await Promise.all([
+    fetch("https://s3-eu-west-2.amazonaws.com/arc53data/widget.html"),
+    // fetch("https://s3-eu-west-2.amazonaws.com/arc53data/tailwind.css"),
+    fetch("https://s3-eu-west-2.amazonaws.com/arc53data/script.js"),
+  ]);
+
+  const html = await htmlRes.text();
+  //const css = await cssRes.text();
+  const js = await jsRes.text();
+
+  // create a new link element
+  const link = document.createElement("link");
+
+  //set the rel, href, type, and integrity attributes
+  link.rel = "stylesheet";
+  link.href = "https://cdn.tailwindcss.com/";
+  link.type = "text/css";
+  link.integrity = "sha384-PDOmVviaTm8N1W35y1NSmo80w6GPaGhbDuOBAF/5hRffaeGc6yOwIo1qAt4gqLGA%";
+
+  // get the document head and append the link element to it
+  // document.head.appendChild(link);
+
+
+
+  // Create a style element for the CSS
+  // const style = document.createElement("style");
+  // style.innerHTML = css;
+  // document.head.appendChild(style);
+
+  // Create a container for the chat widget and inject the HTML
+  const chatWidgetContainer = document.createElement("div");
+  chatWidgetContainer.innerHTML = html;
+  document.body.appendChild(chatWidgetContainer);
+
+  // Execute the JavaScript code
+  const script = document.createElement("script");
+  script.innerHTML = js;
+  document.body.appendChild(script);
+})();
--- a/extensions/web-widget/dist/output.css
+++ b/extensions/web-widget/dist/output.css
@@ -0,0 +1,807 @@
+/*
+! tailwindcss v3.3.1 | MIT License | https://tailwindcss.com
+*/
+
+/*
+1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4)
+2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116)
+*/
+
+*,
+::before,
+::after {
+  box-sizing: border-box;
+  /* 1 */
+  border-width: 0;
+  /* 2 */
+  border-style: solid;
+  /* 2 */
+  border-color: #e5e7eb;
+  /* 2 */
+}
+
+::before,
+::after {
+  --tw-content: '';
+}
+
+/*
+1. Use a consistent sensible line-height in all browsers.
+2. Prevent adjustments of font size after orientation changes in iOS.
+3. Use a more readable tab size.
+4. Use the user's configured `sans` font-family by default.
+5. Use the user's configured `sans` font-feature-settings by default.
+6. Use the user's configured `sans` font-variation-settings by default.
+*/
+
+html {
+  line-height: 1.5;
+  /* 1 */
+  -webkit-text-size-adjust: 100%;
+  /* 2 */
+  -moz-tab-size: 4;
+  /* 3 */
+  -o-tab-size: 4;
+     tab-size: 4;
+  /* 3 */
+  font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  /* 4 */
+  font-feature-settings: normal;
+  /* 5 */
+  font-variation-settings: normal;
+  /* 6 */
+}
+
+/*
+1. Remove the margin in all browsers.
+2. Inherit line-height from `html` so users can set them as a class directly on the `html` element.
+*/
+
+body {
+  margin: 0;
+  /* 1 */
+  line-height: inherit;
+  /* 2 */
+}
+
+/*
+1. Add the correct height in Firefox.
+2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655)
+3. Ensure horizontal rules are visible by default.
+*/
+
+hr {
+  height: 0;
+  /* 1 */
+  color: inherit;
+  /* 2 */
+  border-top-width: 1px;
+  /* 3 */
+}
+
+/*
+Add the correct text decoration in Chrome, Edge, and Safari.
+*/
+
+abbr:where([title]) {
+  -webkit-text-decoration: underline dotted;
+          text-decoration: underline dotted;
+}
+
+/*
+Remove the default font size and weight for headings.
+*/
+
+h1,
+h2,
+h3,
+h4,
+h5,
+h6 {
+  font-size: inherit;
+  font-weight: inherit;
+}
+
+/*
+Reset links to optimize for opt-in styling instead of opt-out.
+*/
+
+a {
+  color: inherit;
+  text-decoration: inherit;
+}
+
+/*
+Add the correct font weight in Edge and Safari.
+*/
+
+b,
+strong {
+  font-weight: bolder;
+}
+
+/*
+1. Use the user's configured `mono` font family by default.
+2. Correct the odd `em` font sizing in all browsers.
+*/
+
+code,
+kbd,
+samp,
+pre {
+  font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
+  /* 1 */
+  font-size: 1em;
+  /* 2 */
+}
+
+/*
+Add the correct font size in all browsers.
+*/
+
+small {
+  font-size: 80%;
+}
+
+/*
+Prevent `sub` and `sup` elements from affecting the line height in all browsers.
+*/
+
+sub,
+sup {
+  font-size: 75%;
+  line-height: 0;
+  position: relative;
+  vertical-align: baseline;
+}
+
+sub {
+  bottom: -0.25em;
+}
+
+sup {
+  top: -0.5em;
+}
+
+/*
+1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297)
+2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016)
+3. Remove gaps between table borders by default.
+*/
+
+table {
+  text-indent: 0;
+  /* 1 */
+  border-color: inherit;
+  /* 2 */
+  border-collapse: collapse;
+  /* 3 */
+}
+
+/*
+1. Change the font styles in all browsers.
+2. Remove the margin in Firefox and Safari.
+3. Remove default padding in all browsers.
+*/
+
+button,
+input,
+optgroup,
+select,
+textarea {
+  font-family: inherit;
+  /* 1 */
+  font-size: 100%;
+  /* 1 */
+  font-weight: inherit;
+  /* 1 */
+  line-height: inherit;
+  /* 1 */
+  color: inherit;
+  /* 1 */
+  margin: 0;
+  /* 2 */
+  padding: 0;
+  /* 3 */
+}
+
+/*
+Remove the inheritance of text transform in Edge and Firefox.
+*/
+
+button,
+select {
+  text-transform: none;
+}
+
+/*
+1. Correct the inability to style clickable types in iOS and Safari.
+2. Remove default button styles.
+*/
+
+button,
+[type='button'],
+[type='reset'],
+[type='submit'] {
+  -webkit-appearance: button;
+  /* 1 */
+  background-color: transparent;
+  /* 2 */
+  background-image: none;
+  /* 2 */
+}
+
+/*
+Use the modern Firefox focus style for all focusable elements.
+*/
+
+:-moz-focusring {
+  outline: auto;
+}
+
+/*
+Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737)
+*/
+
+:-moz-ui-invalid {
+  box-shadow: none;
+}
+
+/*
+Add the correct vertical alignment in Chrome and Firefox.
+*/
+
+progress {
+  vertical-align: baseline;
+}
+
+/*
+Correct the cursor style of increment and decrement buttons in Safari.
+*/
+
+::-webkit-inner-spin-button,
+::-webkit-outer-spin-button {
+  height: auto;
+}
+
+/*
+1. Correct the odd appearance in Chrome and Safari.
+2. Correct the outline style in Safari.
+*/
+
+[type='search'] {
+  -webkit-appearance: textfield;
+  /* 1 */
+  outline-offset: -2px;
+  /* 2 */
+}
+
+/*
+Remove the inner padding in Chrome and Safari on macOS.
+*/
+
+::-webkit-search-decoration {
+  -webkit-appearance: none;
+}
+
+/*
+1. Correct the inability to style clickable types in iOS and Safari.
+2. Change font properties to `inherit` in Safari.
+*/
+
+::-webkit-file-upload-button {
+  -webkit-appearance: button;
+  /* 1 */
+  font: inherit;
+  /* 2 */
+}
+
+/*
+Add the correct display in Chrome and Safari.
+*/
+
+summary {
+  display: list-item;
+}
+
+/*
+Removes the default spacing and border for appropriate elements.
+*/
+
+blockquote,
+dl,
+dd,
+h1,
+h2,
+h3,
+h4,
+h5,
+h6,
+hr,
+figure,
+p,
+pre {
+  margin: 0;
+}
+
+fieldset {
+  margin: 0;
+  padding: 0;
+}
+
+legend {
+  padding: 0;
+}
+
+ol,
+ul,
+menu {
+  list-style: none;
+  margin: 0;
+  padding: 0;
+}
+
+/*
+Prevent resizing textareas horizontally by default.
+*/
+
+textarea {
+  resize: vertical;
+}
+
+/*
+1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300)
+2. Set the default placeholder color to the user's configured gray 400 color.
+*/
+
+input::-moz-placeholder, textarea::-moz-placeholder {
+  opacity: 1;
+  /* 1 */
+  color: #9ca3af;
+  /* 2 */
+}
+
+input::placeholder,
+textarea::placeholder {
+  opacity: 1;
+  /* 1 */
+  color: #9ca3af;
+  /* 2 */
+}
+
+/*
+Set the default cursor for buttons.
+*/
+
+button,
+[role="button"] {
+  cursor: pointer;
+}
+
+/*
+Make sure disabled buttons don't get the pointer cursor.
+*/
+
+:disabled {
+  cursor: default;
+}
+
+/*
+1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14)
+2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210)
+   This can trigger a poorly considered lint error in some tools but is included by design.
+*/
+
+img,
+svg,
+video,
+canvas,
+audio,
+iframe,
+embed,
+object {
+  display: block;
+  /* 1 */
+  vertical-align: middle;
+  /* 2 */
+}
+
+/*
+Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14)
+*/
+
+img,
+video {
+  max-width: 100%;
+  height: auto;
+}
+
+/* Make elements with the HTML hidden attribute stay hidden by default */
+
+[hidden] {
+  display: none;
+}
+
+*, ::before, ::after {
+  --tw-border-spacing-x: 0;
+  --tw-border-spacing-y: 0;
+  --tw-translate-x: 0;
+  --tw-translate-y: 0;
+  --tw-rotate: 0;
+  --tw-skew-x: 0;
+  --tw-skew-y: 0;
+  --tw-scale-x: 1;
+  --tw-scale-y: 1;
+  --tw-pan-x:  ;
+  --tw-pan-y:  ;
+  --tw-pinch-zoom:  ;
+  --tw-scroll-snap-strictness: proximity;
+  --tw-ordinal:  ;
+  --tw-slashed-zero:  ;
+  --tw-numeric-figure:  ;
+  --tw-numeric-spacing:  ;
+  --tw-numeric-fraction:  ;
+  --tw-ring-inset:  ;
+  --tw-ring-offset-width: 0px;
+  --tw-ring-offset-color: #fff;
+  --tw-ring-color: rgb(59 130 246 / 0.5);
+  --tw-ring-offset-shadow: 0 0 #0000;
+  --tw-ring-shadow: 0 0 #0000;
+  --tw-shadow: 0 0 #0000;
+  --tw-shadow-colored: 0 0 #0000;
+  --tw-blur:  ;
+  --tw-brightness:  ;
+  --tw-contrast:  ;
+  --tw-grayscale:  ;
+  --tw-hue-rotate:  ;
+  --tw-invert:  ;
+  --tw-saturate:  ;
+  --tw-sepia:  ;
+  --tw-drop-shadow:  ;
+  --tw-backdrop-blur:  ;
+  --tw-backdrop-brightness:  ;
+  --tw-backdrop-contrast:  ;
+  --tw-backdrop-grayscale:  ;
+  --tw-backdrop-hue-rotate:  ;
+  --tw-backdrop-invert:  ;
+  --tw-backdrop-opacity:  ;
+  --tw-backdrop-saturate:  ;
+  --tw-backdrop-sepia:  ;
+}
+
+::backdrop {
+  --tw-border-spacing-x: 0;
+  --tw-border-spacing-y: 0;
+  --tw-translate-x: 0;
+  --tw-translate-y: 0;
+  --tw-rotate: 0;
+  --tw-skew-x: 0;
+  --tw-skew-y: 0;
+  --tw-scale-x: 1;
+  --tw-scale-y: 1;
+  --tw-pan-x:  ;
+  --tw-pan-y:  ;
+  --tw-pinch-zoom:  ;
+  --tw-scroll-snap-strictness: proximity;
+  --tw-ordinal:  ;
+  --tw-slashed-zero:  ;
+  --tw-numeric-figure:  ;
+  --tw-numeric-spacing:  ;
+  --tw-numeric-fraction:  ;
+  --tw-ring-inset:  ;
+  --tw-ring-offset-width: 0px;
+  --tw-ring-offset-color: #fff;
+  --tw-ring-color: rgb(59 130 246 / 0.5);
+  --tw-ring-offset-shadow: 0 0 #0000;
+  --tw-ring-shadow: 0 0 #0000;
+  --tw-shadow: 0 0 #0000;
+  --tw-shadow-colored: 0 0 #0000;
+  --tw-blur:  ;
+  --tw-brightness:  ;
+  --tw-contrast:  ;
+  --tw-grayscale:  ;
+  --tw-hue-rotate:  ;
+  --tw-invert:  ;
+  --tw-saturate:  ;
+  --tw-sepia:  ;
+  --tw-drop-shadow:  ;
+  --tw-backdrop-blur:  ;
+  --tw-backdrop-brightness:  ;
+  --tw-backdrop-contrast:  ;
+  --tw-backdrop-grayscale:  ;
+  --tw-backdrop-hue-rotate:  ;
+  --tw-backdrop-invert:  ;
+  --tw-backdrop-opacity:  ;
+  --tw-backdrop-saturate:  ;
+  --tw-backdrop-sepia:  ;
+}
+
+.fixed {
+  position: fixed;
+}
+
+.absolute {
+  position: absolute;
+}
+
+.relative {
+  position: relative;
+}
+
+.inset-y-0 {
+  top: 0px;
+  bottom: 0px;
+}
+
+.bottom-5 {
+  bottom: 1.25rem;
+}
+
+.left-5 {
+  left: 1.25rem;
+}
+
+.right-2 {
+  right: 0.5rem;
+}
+
+.z-50 {
+  z-index: 50;
+}
+
+.m-0 {
+  margin: 0px;
+}
+
+.-mx-2 {
+  margin-left: -0.5rem;
+  margin-right: -0.5rem;
+}
+
+.mt-1 {
+  margin-top: 0.25rem;
+}
+
+.flex {
+  display: flex;
+}
+
+.hidden {
+  display: none;
+}
+
+.w-full {
+  width: 100%;
+}
+
+.flex-1 {
+  flex: 1 1 0%;
+}
+
+.transform {
+  transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));
+}
+
+.items-center {
+  align-items: center;
+}
+
+.justify-center {
+  justify-content: center;
+}
+
+.gap-2 {
+  gap: 0.5rem;
+}
+
+.divide-y > :not([hidden]) ~ :not([hidden]) {
+  --tw-divide-y-reverse: 0;
+  border-top-width: calc(1px * calc(1 - var(--tw-divide-y-reverse)));
+  border-bottom-width: calc(1px * var(--tw-divide-y-reverse));
+}
+
+.rounded-md {
+  border-radius: 0.375rem;
+}
+
+.rounded-b {
+  border-bottom-right-radius: 0.25rem;
+  border-bottom-left-radius: 0.25rem;
+}
+
+.border {
+  border-width: 1px;
+}
+
+.bg-transparent {
+  background-color: transparent;
+}
+
+.bg-gradient-to-br {
+  background-image: linear-gradient(to bottom right, var(--tw-gradient-stops));
+}
+
+.from-gray-100\/80 {
+  --tw-gradient-from: rgb(243 244 246 / 0.8) var(--tw-gradient-from-position);
+  --tw-gradient-from-position:  ;
+  --tw-gradient-to: rgb(243 244 246 / 0)  var(--tw-gradient-from-position);
+  --tw-gradient-to-position:  ;
+  --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
+}
+
+.via-white {
+  --tw-gradient-via-position:  ;
+  --tw-gradient-to: rgb(255 255 255 / 0)  var(--tw-gradient-to-position);
+  --tw-gradient-to-position:  ;
+  --tw-gradient-stops: var(--tw-gradient-from), #fff var(--tw-gradient-via-position), var(--tw-gradient-to);
+}
+
+.to-white {
+  --tw-gradient-to: #fff var(--tw-gradient-to-position);
+  --tw-gradient-to-position:  ;
+}
+
+.p-3 {
+  padding: 0.75rem;
+}
+
+.px-2 {
+  padding-left: 0.5rem;
+  padding-right: 0.5rem;
+}
+
+.px-5 {
+  padding-left: 1.25rem;
+  padding-right: 1.25rem;
+}
+
+.py-3 {
+  padding-top: 0.75rem;
+  padding-bottom: 0.75rem;
+}
+
+.pl-5 {
+  padding-left: 1.25rem;
+}
+
+.pr-8 {
+  padding-right: 2rem;
+}
+
+.font-sans {
+  font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+}
+
+.text-sm {
+  font-size: 0.875rem;
+  line-height: 1.25rem;
+}
+
+.text-xs {
+  font-size: 0.75rem;
+  line-height: 1rem;
+}
+
+.font-bold {
+  font-weight: 700;
+}
+
+.text-gray-400 {
+  --tw-text-opacity: 1;
+  color: rgb(156 163 175 / var(--tw-text-opacity));
+}
+
+.text-gray-600 {
+  --tw-text-opacity: 1;
+  color: rgb(75 85 99 / var(--tw-text-opacity));
+}
+
+.text-gray-700 {
+  --tw-text-opacity: 1;
+  color: rgb(55 65 81 / var(--tw-text-opacity));
+}
+
+.text-gray-800 {
+  --tw-text-opacity: 1;
+  color: rgb(31 41 55 / var(--tw-text-opacity));
+}
+
+.shadow {
+  --tw-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
+  --tw-shadow-colored: 0 1px 3px 0 var(--tw-shadow-color), 0 1px 2px -1px var(--tw-shadow-color);
+  box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow);
+}
+
+.backdrop-blur-sm {
+  --tw-backdrop-blur: blur(4px);
+  -webkit-backdrop-filter: var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia);
+          backdrop-filter: var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia);
+}
+
+.transition {
+  transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, -webkit-backdrop-filter;
+  transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, backdrop-filter;
+  transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, backdrop-filter, -webkit-backdrop-filter;
+  transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
+  transition-duration: 150ms;
+}
+
+.delay-200 {
+  transition-delay: 200ms;
+}
+
+.duration-300 {
+  transition-duration: 300ms;
+}
+
+.hover\:bg-gray-100:hover {
+  --tw-bg-opacity: 1;
+  background-color: rgb(243 244 246 / var(--tw-bg-opacity));
+}
+
+.focus\:outline-none:focus {
+  outline: 2px solid transparent;
+  outline-offset: 2px;
+}
+
+@media (prefers-color-scheme: dark) {
+  .dark\:divide-gray-700 > :not([hidden]) ~ :not([hidden]) {
+    --tw-divide-opacity: 1;
+    border-color: rgb(55 65 81 / var(--tw-divide-opacity));
+  }
+
+  .dark\:border-gray-700 {
+    --tw-border-opacity: 1;
+    border-color: rgb(55 65 81 / var(--tw-border-opacity));
+  }
+
+  .dark\:from-gray-900\/80 {
+    --tw-gradient-from: rgb(17 24 39 / 0.8) var(--tw-gradient-from-position);
+    --tw-gradient-from-position:  ;
+    --tw-gradient-to: rgb(17 24 39 / 0)  var(--tw-gradient-from-position);
+    --tw-gradient-to-position:  ;
+    --tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
+  }
+
+  .dark\:via-gray-900 {
+    --tw-gradient-via-position:  ;
+    --tw-gradient-to: rgb(17 24 39 / 0)  var(--tw-gradient-to-position);
+    --tw-gradient-to-position:  ;
+    --tw-gradient-stops: var(--tw-gradient-from), #111827 var(--tw-gradient-via-position), var(--tw-gradient-to);
+  }
+
+  .dark\:to-gray-900 {
+    --tw-gradient-to: #111827 var(--tw-gradient-to-position);
+    --tw-gradient-to-position:  ;
+  }
+
+  .dark\:text-gray-200 {
+    --tw-text-opacity: 1;
+    color: rgb(229 231 235 / var(--tw-text-opacity));
+  }
+
+  .dark\:text-gray-300 {
+    --tw-text-opacity: 1;
+    color: rgb(209 213 219 / var(--tw-text-opacity));
+  }
+
+  .dark\:text-gray-500 {
+    --tw-text-opacity: 1;
+    color: rgb(107 114 128 / var(--tw-text-opacity));
+  }
+
+  .dark\:text-white {
+    --tw-text-opacity: 1;
+    color: rgb(255 255 255 / var(--tw-text-opacity));
+  }
+
+  .dark\:hover\:bg-gray-800\/70:hover {
+    background-color: rgb(31 41 55 / 0.7);
+  }
+}
+
+@media (min-width: 768px) {
+  .md\:pl-0 {
+    padding-left: 0px;
+  }
+}
--- a/extensions/web-widget/index.html
+++ b/extensions/web-widget/index.html
@@ -0,0 +1,12 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Chat Widget Test</title>
+    <link href="dist/output.css" rel="stylesheet">
+</head>
+<body>
+  <script src="dist/chat-widget.js"></script>
+</body>
+</html>
--- a/extensions/web-widget/package-lock.json
+++ b/extensions/web-widget/package-lock.json
--- a/extensions/web-widget/package.json
+++ b/extensions/web-widget/package.json
@@ -0,0 +1,15 @@
+{
+  "name": "web-widget",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "keywords": [],
+  "author": "",
+  "license": "ISC",
+  "devDependencies": {
+    "tailwindcss": "^3.3.1"
+  }
+}
--- a/extensions/web-widget/src/html/widget.html
+++ b/extensions/web-widget/src/html/widget.html
@@ -0,0 +1,58 @@
+<div id="docsgpt-widget" class="dark fixed bottom-5 left-5 pl-5 md:pl-0 z-50">
+<style>
+  @keyframes dotBounce {
+    0%, 80%, 100% {
+      transform: translateY(0);
+    }
+    40% {
+      transform: translateY(-5px);
+    }
+  }
+
+  .dot-animation {
+    display: inline-block;
+    animation: dotBounce 1s infinite ease-in-out;
+  }
+
+  .delay-200 {
+    animation-delay: 200ms;
+  }
+
+  .delay-400 {
+    animation-delay: 400ms;
+  }
+</style>
+
+
+<div class="divide-y dark:divide-gray-700 rounded-md border dark:border-gray-700 bg-gradient-to-br from-gray-100/80 via-white to-white dark:from-gray-900/80 dark:via-gray-900 dark:to-gray-900 font-sans shadow backdrop-blur-sm" style="width: 18rem; transform: translateY(0%) translateZ(0px);"><div>
+    <div class="flex items-center gap-2 p-3">
+        <div id="docsgpt-init-message" class="flex-1">
+            <h3 class="text-sm font-bold text-gray-700 dark:text-gray-200">Looking for help with documentation?</h3>
+            <p class="mt-1 text-xs text-gray-400 dark:text-gray-500">DocsGPT AI assistant will help you with docs</p>
+        </div>
+        <div id="docsgpt-answer" class="hidden">
+            <p class="mt-1 text-xs text-gray-600 dark:text-gray-300">Come cool  answer</p>
+        </div>
+
+    </div>
+</div>
+    <div class="w-full">
+        <button id="ask-docsgpt" class="flex w-full justify-center px-5 py-3 text-sm text-gray-800 font-bold dark:text-white transition duration-300 hover:bg-gray-100 rounded-b dark:hover:bg-gray-800/70">
+            Ask DocsGPT
+        </button>
+
+        <form id="docsgpt-chat-form" class="relative w-full m-0 hidden" style="opacity: 1;" data-projection-id="1">
+            <input id="docsgpt-chat-input" type="text" class="w-full bg-transparent px-5 py-3 pr-8 text-sm text-gray-700 dark:text-white focus:outline-none" placeholder="What do you want to do?" value="">
+            <button class="absolute inset-y-0 right-2 -mx-2 px-2" type="submit" style="opacity: 0;" data-projection-id="2">
+
+            </button>
+        </form>
+        <p id="docsgpt-chat-processing" class="hidden flex w-full justify-center px-5 py-3 text-sm text-gray-800 font-bold dark:text-white transition duration-300 rounded-b animate-fadeIn animate-2s">
+          Processing<span class="dot-animation">.</span><span class="dot-animation delay-200">.</span><span class="dot-animation delay-400">.</span>
+        </p>
+
+
+
+    </div>
+</div>
+</div>
--- a/extensions/web-widget/src/input.css
+++ b/extensions/web-widget/src/input.css
@@ -0,0 +1,3 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
--- a/extensions/web-widget/src/js/script.js
+++ b/extensions/web-widget/src/js/script.js
@@ -0,0 +1,56 @@
+const API_ENDPOINT = "http://localhost:5001/api/answer"; // Replace with your API endpoint
+
+const widgetInitMessage = document.getElementById("docsgpt-init-message");
+const widgetAnswerMessage = document.getElementById("docsgpt-answer");
+const widgetAnswerMessageP = widgetAnswerMessage.querySelector("p");
+const askDocsGPTButton = document.getElementById("ask-docsgpt");
+const chatInput = document.getElementById("docsgpt-chat-input");
+const chatForm = document.getElementById("docsgpt-chat-form");
+const chatProcessing = document.getElementById("docsgpt-chat-processing");
+
+async function sendMessage(message) {
+  const requestData = {
+    "question": message,
+    "active_docs": "default",
+    "api_key": "token",
+    "embeddings_key": "token",
+    "model": "default",
+    "history": null,
+  }
+  const response = await fetch(API_ENDPOINT, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(requestData),
+  });
+  const data = await response.json();
+  return data.answer;
+}
+
+askDocsGPTButton.addEventListener("click", () => {
+  askDocsGPTButton.classList.add("hidden");
+  chatForm.classList.remove("hidden");
+  chatForm.focus();
+  widgetInitMessage.classList.remove("hidden");
+  widgetAnswerMessage.classList.add("hidden");
+
+
+});
+
+chatForm.addEventListener("submit", async (e) => {
+  e.preventDefault();
+  const message = chatInput.value.trim();
+  if (!message) return;
+
+  chatInput.value = "";
+  chatForm.classList.add("hidden");
+  chatProcessing.classList.remove("hidden");
+
+const reply = await sendMessage(message);
+chatProcessing.classList.add("hidden");
+
+// inside <p> tag
+widgetAnswerMessageP.innerHTML = reply;
+widgetAnswerMessage.classList.remove("hidden");
+widgetInitMessage.classList.add("hidden");
+askDocsGPTButton.classList.remove("hidden");
+});
--- a/extensions/web-widget/tailwind.config.js
+++ b/extensions/web-widget/tailwind.config.js
@@ -0,0 +1,10 @@
+/** @type {import('tailwindcss').Config} */
+module.exports = {
+  content: ["./src/**/*.{html,js}"],
+  theme: {
+    extend: {},
+  },
+  plugins: [],
+}
+
+
--- a/frontend/.env.development
+++ b/frontend/.env.development
@@ -1,2 +1,2 @@
 # Please put appropriate value
-VITE_API_HOST = http://localhost:5001
+VITE_API_HOST=http://localhost:5001
--- a/frontend/.env.production
+++ b/frontend/.env.production
@@ -1 +1 @@
-VITE_API_HOST = https://docsapi.arc53.com
+VITE_API_HOST = https://gptcloud.arc53.com
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -24,12 +24,15 @@
    "react": "^18.2.0",
    "react-dom": "^18.2.0",
    "react-dropzone": "^14.2.3",
+    "react-markdown": "^8.0.7",
    "react-redux": "^8.0.5",
-    "react-router-dom": "^6.8.1"
+    "react-router-dom": "^6.8.1",
+    "react-syntax-highlighter": "^15.5.0"
  },
  "devDependencies": {
    "@types/react": "^18.0.27",
    "@types/react-dom": "^18.0.10",
+    "@types/react-syntax-highlighter": "^15.5.6",
    "@typescript-eslint/eslint-plugin": "^5.51.0",
    "@typescript-eslint/parser": "^5.51.0",
    "@vitejs/plugin-react": "^3.1.0",
--- a/frontend/src/Navigation.tsx
+++ b/frontend/src/Navigation.tsx
@@ -38,9 +38,8 @@ export default function Navigation({
  const [isDocsListOpen, setIsDocsListOpen] = useState(false);

  const isApiKeySet = useSelector(selectApiKeyStatus);
-  const [apiKeyModalState, setApiKeyModalState] = useState<ActiveState>(
-    isApiKeySet ? 'INACTIVE' : 'ACTIVE',
-  );
+  const [apiKeyModalState, setApiKeyModalState] =
+    useState<ActiveState>('INACTIVE');

  const isSelectedDocsSet = useSelector(selectSelectedDocsStatus);
  const [selectedDocsModalState, setSelectedDocsModalState] =
@@ -148,7 +147,7 @@ export default function Navigation({
                src={Arrow2}
                alt="arrow"
                className={`${
-                  isDocsListOpen ? 'rotate-0' : '-rotate-90'
+                  isDocsListOpen ? 'rotate-0' : 'rotate-180'
                } mr-3 w-3 transition-all`}
              />
            </div>
--- a/frontend/src/conversation/Conversation.tsx
+++ b/frontend/src/conversation/Conversation.tsx
@@ -71,19 +71,15 @@ export default function Conversation() {
  };

  return (
-    <div className="flex justify-center p-6">
+    <div className="flex justify-center p-4">
      {queries.length > 0 && (
-        <div className="mt-20 flex w-10/12 flex-col transition-all md:w-3/4">
+        <div className="mt-20 flex flex-col transition-all md:w-3/4">
          {queries.map((query, index) => {
            return (
              <Fragment key={index}>
                <ConversationBubble
                  ref={endMessageRef}
-                  className={`${
-                    index === queries.length - 1 && status === 'loading'
-                      ? 'mb-24'
-                      : 'mb-7'
-                  }`}
+                  className={'mb-7'}
                  key={`${index}QUESTION`}
                  message={query.prompt}
                  type="QUESTION"
--- a/frontend/src/conversation/ConversationBubble.tsx
+++ b/frontend/src/conversation/ConversationBubble.tsx
@@ -4,6 +4,9 @@ import { FEEDBACK, MESSAGE_TYPE } from './conversationModels';
 import Alert from './../assets/alert.svg';
 import { ReactComponent as Like } from './../assets/like.svg';
 import { ReactComponent as Dislike } from './../assets/dislike.svg';
+import ReactMarkdown from 'react-markdown';
+import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
+import { vscDarkPlus } from 'react-syntax-highlighter/dist/cjs/styles/prism';

 const ConversationBubble = forwardRef<
  HTMLDivElement,
@@ -19,14 +22,26 @@ const ConversationBubble = forwardRef<
  ref,
 ) {
  const [showFeedback, setShowFeedback] = useState(false);
+  const List = ({
+    ordered,
+    children,
+  }: {
+    ordered?: boolean;
+    children: React.ReactNode;
+  }) => {
+    const Tag = ordered ? 'ol' : 'ul';
+    return <Tag className="list-inside list-disc">{children}</Tag>;
+  };
  let bubble;

  if (type === 'QUESTION') {
    bubble = (
      <div ref={ref} className={`flex flex-row-reverse self-end ${className}`}>
        <Avatar className="mt-4 text-2xl" avatar="🧑‍💻"></Avatar>
-        <div className="mr-2 ml-10 flex items-center rounded-3xl bg-blue-1000 py-5 px-5 text-white">
-          <p className="whitespace-pre-wrap break-words">{message}</p>
+        <div className="mr-2 ml-10 flex items-center rounded-3xl bg-blue-1000 p-3.5 text-white">
+          <ReactMarkdown className="whitespace-pre-wrap break-words">
+            {message}
+          </ReactMarkdown>
        </div>
      </div>
    );
@@ -40,7 +55,7 @@ const ConversationBubble = forwardRef<
      >
        <Avatar className="mt-4 text-2xl" avatar="🦖"></Avatar>
        <div
-          className={`ml-2 mr-5 flex items-center rounded-3xl bg-gray-1000 py-5 px-5 ${
+          className={`ml-2 mr-5 flex items-center rounded-3xl bg-gray-1000 p-3.5 ${
            type === 'ERROR'
              ? ' rounded-lg border border-red-2000 bg-red-1000 p-2 text-red-3000'
              : ''
@@ -49,7 +64,37 @@ const ConversationBubble = forwardRef<
          {type === 'ERROR' && (
            <img src={Alert} alt="alert" className="mr-2 inline" />
          )}
-          <p className="whitespace-pre-wrap break-words">{message}</p>
+          <ReactMarkdown
+            className="whitespace-pre-wrap break-words"
+            components={{
+              code({ node, inline, className, children, ...props }) {
+                const match = /language-(\w+)/.exec(className || '');
+
+                return !inline && match ? (
+                  <SyntaxHighlighter
+                    PreTag="div"
+                    language={match[1]}
+                    {...props}
+                    style={vscDarkPlus}
+                  >
+                    {String(children).replace(/\n$/, '')}
+                  </SyntaxHighlighter>
+                ) : (
+                  <code className={className ? className : ''} {...props}>
+                    {children}
+                  </code>
+                );
+              },
+              ul({ node, children }) {
+                return <List>{children}</List>;
+              },
+              ol({ node, children }) {
+                return <List ordered>{children}</List>;
+              },
+            }}
+          >
+            {message}
+          </ReactMarkdown>
        </div>
        <div
          className={`mr-2 flex items-center justify-center ${
--- a/frontend/src/conversation/conversationApi.ts
+++ b/frontend/src/conversation/conversationApi.ts
@@ -7,6 +7,7 @@ export function fetchAnswerApi(
  question: string,
  apiKey: string,
  selectedDocs: Doc,
+  history: Array<any> = [],
 ): Promise<Answer> {
  let namePath = selectedDocs.name;
  if (selectedDocs.language === namePath) {
@@ -37,7 +38,7 @@ export function fetchAnswerApi(
      question: question,
      api_key: apiKey,
      embeddings_key: apiKey,
-      history: localStorage.getItem('chatHistory'),
+      history: history,
      active_docs: docPath,
    }),
  })
@@ -45,7 +46,7 @@ export function fetchAnswerApi(
      if (response.ok) {
        return response.json();
      } else {
-        Promise.reject(response);
+        return Promise.reject(new Error(response.statusText));
      }
    })
    .then((data) => {
@@ -54,6 +55,52 @@ export function fetchAnswerApi(
    });
 }

+export function fetchAnswerSteaming(
+  question: string,
+  apiKey: string,
+  selectedDocs: Doc,
+  history: Array<any> = [],
+  onEvent: (event: MessageEvent) => void,
+): Promise<Answer> {
+  let namePath = selectedDocs.name;
+  if (selectedDocs.language === namePath) {
+    namePath = '.project';
+  }
+
+  let docPath = 'default';
+  if (selectedDocs.location === 'local') {
+    docPath = 'local' + '/' + selectedDocs.name + '/';
+  } else if (selectedDocs.location === 'remote') {
+    docPath =
+      selectedDocs.language +
+      '/' +
+      namePath +
+      '/' +
+      selectedDocs.version +
+      '/' +
+      selectedDocs.model +
+      '/';
+  }
+
+  return new Promise<Answer>((resolve, reject) => {
+    const url = new URL(apiHost + '/stream');
+    url.searchParams.append('question', question);
+    url.searchParams.append('api_key', apiKey);
+    url.searchParams.append('embeddings_key', apiKey);
+    url.searchParams.append('active_docs', docPath);
+    url.searchParams.append('history', JSON.stringify(history));
+
+    const eventSource = new EventSource(url.href);
+
+    eventSource.onmessage = onEvent;
+
+    eventSource.onerror = (error) => {
+      console.log('Connection failed.');
+      eventSource.close();
+    };
+  });
+}
+
 export function sendFeedback(
  prompt: string,
  response: string,
--- a/frontend/src/conversation/conversationSlice.ts
+++ b/frontend/src/conversation/conversationSlice.ts
@@ -1,27 +1,65 @@
 import { createAsyncThunk, createSlice, PayloadAction } from '@reduxjs/toolkit';
 import store from '../store';
-import { fetchAnswerApi } from './conversationApi';
-import { Answer, ConversationState, Query } from './conversationModels';
+import { fetchAnswerApi, fetchAnswerSteaming } from './conversationApi';
+import { Answer, ConversationState, Query, Status } from './conversationModels';

 const initialState: ConversationState = {
  queries: [],
  status: 'idle',
 };

-export const fetchAnswer = createAsyncThunk<
-  Answer,
-  { question: string },
-  { state: RootState }
->('fetchAnswer', async ({ question }, { getState }) => {
-  const state = getState();
+const API_STREAMING = import.meta.env.VITE_API_STREAMING === 'true';

-  const answer = await fetchAnswerApi(
-    question,
-    state.preference.apiKey,
-    state.preference.selectedDocs!,
-  );
-  return answer;
-});
+export const fetchAnswer = createAsyncThunk<Answer, { question: string }>(
+  'fetchAnswer',
+  async ({ question }, { dispatch, getState }) => {
+    const state = getState() as RootState;
+    if (state.preference) {
+      if (API_STREAMING) {
+        await fetchAnswerSteaming(
+          question,
+          state.preference.apiKey,
+          state.preference.selectedDocs!,
+          state.conversation.queries,
+          (event) => {
+            const data = JSON.parse(event.data);
+
+            // check if the 'end' event has been received
+            if (data.type === 'end') {
+              // set status to 'idle'
+              dispatch(conversationSlice.actions.setStatus('idle'));
+            } else {
+              const result = data.answer;
+              dispatch(
+                updateStreamingQuery({
+                  index: state.conversation.queries.length - 1,
+                  query: { response: result },
+                }),
+              );
+            }
+          },
+        );
+      } else {
+        const answer = await fetchAnswerApi(
+          question,
+          state.preference.apiKey,
+          state.preference.selectedDocs!,
+          state.conversation.queries,
+        );
+        if (answer) {
+          dispatch(
+            updateQuery({
+              index: state.conversation.queries.length - 1,
+              query: { response: answer.answer },
+            }),
+          );
+          dispatch(conversationSlice.actions.setStatus('idle'));
+        }
+      }
+    }
+    return { answer: '', query: question, result: '' };
+  },
+);

 export const conversationSlice = createSlice({
  name: 'conversation',
@@ -30,6 +68,21 @@ export const conversationSlice = createSlice({
    addQuery(state, action: PayloadAction<Query>) {
      state.queries.push(action.payload);
    },
+    updateStreamingQuery(
+      state,
+      action: PayloadAction<{ index: number; query: Partial<Query> }>,
+    ) {
+      const index = action.payload.index;
+      if (action.payload.query.response) {
+        state.queries[index].response =
+          (state.queries[index].response || '') + action.payload.query.response;
+      } else {
+        state.queries[index] = {
+          ...state.queries[index],
+          ...action.payload.query,
+        };
+      }
+    },
    updateQuery(
      state,
      action: PayloadAction<{ index: number; query: Partial<Query> }>,
@@ -40,17 +93,15 @@ export const conversationSlice = createSlice({
        ...action.payload.query,
      };
    },
+    setStatus(state, action: PayloadAction<Status>) {
+      state.status = action.payload;
+    },
  },
  extraReducers(builder) {
    builder
      .addCase(fetchAnswer.pending, (state) => {
        state.status = 'loading';
      })
-      .addCase(fetchAnswer.fulfilled, (state, action) => {
-        state.status = 'idle';
-        state.queries[state.queries.length - 1].response =
-          action.payload.answer;
-      })
      .addCase(fetchAnswer.rejected, (state, action) => {
        state.status = 'failed';
        state.queries[state.queries.length - 1].error =
@@ -65,5 +116,6 @@ export const selectQueries = (state: RootState) => state.conversation.queries;

 export const selectStatus = (state: RootState) => state.conversation.status;

-export const { addQuery, updateQuery } = conversationSlice.actions;
+export const { addQuery, updateQuery, updateStreamingQuery } =
+  conversationSlice.actions;
 export default conversationSlice.reducer;
--- a/frontend/src/preferences/preferenceSlice.ts
+++ b/frontend/src/preferences/preferenceSlice.ts
@@ -13,8 +13,18 @@ interface Preference {
 }

 const initialState: Preference = {
-  apiKey: '',
-  selectedDocs: null,
+  apiKey: 'xxx',
+  selectedDocs: {
+    name: 'default',
+    language: 'default',
+    location: 'default',
+    version: 'default',
+    description: 'default',
+    fullName: 'default',
+    dat: 'default',
+    docLink: 'default',
+    model: 'openai_text-embedding-ada-002',
+  } as Doc,
  sourceDocs: null,
 };

@@ -29,7 +39,7 @@ export const prefSlice = createSlice({
      state.selectedDocs = action.payload;
    },
    setSourceDocs: (state, action) => {
-      state.sourceDocs?.push(...action.payload);
+      state.sourceDocs = action.payload;
    },
  },
 });
--- a/frontend/src/upload/Upload.tsx
+++ b/frontend/src/upload/Upload.tsx
@@ -19,20 +19,27 @@ export default function Upload({
    type: 'UPLOAD' | 'TRAINIING';
    percentage: number;
    taskId?: string;
+    failed?: boolean;
  }>();

  function Progress({
    title,
    isCancellable = false,
+    isFailed = false,
  }: {
    title: string;
    isCancellable?: boolean;
+    isFailed?: boolean;
  }) {
    return (
      <div className="mt-5 flex flex-col items-center gap-2">
        <p className="text-xl tracking-[0.15px]">{title}...</p>
        <p className="text-sm text-gray-2000">This may take several minutes</p>
+        <p className={`ml-5 text-xl text-red-400 ${isFailed ? '' : 'hidden'}`}>
+          Over the token limit, please consider uploading smaller document
+        </p>
        <p className="mt-10 text-2xl">{progress?.percentage || 0}%</p>
+
        <div className="mb-10 w-[50%]">
          <div className="h-1 w-[100%] bg-blue-4000"></div>
          <div
@@ -40,6 +47,7 @@ export default function Upload({
            style={{ width: `${progress?.percentage || 0}%` }}
          ></div>
        </div>
+
        <button
          onClick={() => {
            setDocName('');
@@ -71,11 +79,28 @@ export default function Upload({
            .then((data) => data.json())
            .then((data) => {
              if (data.status == 'SUCCESS') {
-                getDocs().then((data) => dispatch(setSourceDocs(data)));
-                setProgress(
-                  (progress) => progress && { ...progress, percentage: 100 },
-                );
-              } else {
+                if (data.result.limited === true) {
+                  getDocs().then((data) => dispatch(setSourceDocs(data)));
+                  setProgress(
+                    (progress) =>
+                      progress && {
+                        ...progress,
+                        percentage: 100,
+                        failed: true,
+                      },
+                  );
+                } else {
+                  getDocs().then((data) => dispatch(setSourceDocs(data)));
+                  setProgress(
+                    (progress) =>
+                      progress && {
+                        ...progress,
+                        percentage: 100,
+                        failed: false,
+                      },
+                  );
+                }
+              } else if (data.status == 'PROGRESS') {
                setProgress(
                  (progress) =>
                    progress && {
@@ -91,6 +116,7 @@ export default function Upload({
      <Progress
        title="Training is in progress"
        isCancellable={progress?.percentage === 100}
+        isFailed={progress?.failed === true}
      ></Progress>
    );
  }
@@ -125,10 +151,18 @@ export default function Upload({

  const { getRootProps, getInputProps, isDragActive } = useDropzone({
    onDrop,
-    multiple: true,
+    multiple: false,
    onDragEnter: doNothing,
    onDragOver: doNothing,
    onDragLeave: doNothing,
+    maxSize: 25000000,
+    accept: {
+      'application/pdf': ['.pdf'],
+      'text/plain': ['.txt'],
+      'text/x-rst': ['.rst'],
+      'text/x-markdown': ['.md'],
+      'application/zip': ['.zip'],
+    },
  });

  let view;
@@ -139,7 +173,10 @@ export default function Upload({
  } else {
    view = (
      <>
-        <p className="mb-7 text-xl text-jet">Upload New Documentation</p>
+        <p className="text-xl text-jet">Upload New Documentation</p>
+        <p className="mb-3 text-xs text-gray-4000">
+          Please upload .pdf, .txt, .rst, .md, .zip limited to 25mb
+        </p>
        <input
          type="text"
          className="h-10 w-[60%] rounded-md border-2 border-gray-5000 px-3 outline-none"
--- a/scripts/code_docs_gen.py
+++ b/scripts/code_docs_gen.py
@@ -1,20 +1,13 @@
+import ast
+import json
 from pathlib import Path
-from langchain.text_splitter import CharacterTextSplitter
-import faiss
-from langchain.vectorstores import FAISS
-from langchain.embeddings import OpenAIEmbeddings
+
+import dotenv
 from langchain.llms import OpenAI
 from langchain.prompts import PromptTemplate
-import pickle
-import dotenv
-import tiktoken
-import sys
-from argparse import ArgumentParser
-import ast

 dotenv.load_dotenv()

-
 ps = list(Path("inputs").glob("**/*.py"))
 data = []
 sources = []
@@ -24,13 +17,6 @@ for p in ps:
    sources.append(p)


-
-# with open('inputs/client.py', 'r') as f:
-#     tree = ast.parse(f.read())
-
-# print(tree)
-
-
 def get_functions_in_class(node):
    functions = []
    functions_code = []
@@ -64,21 +50,9 @@ for code in data:
    c1 += 1

 # save the structure dict as json
-import json
 with open('structure_dict.json', 'w') as f:
    json.dump(structure_dict, f)

-
-# llm = OpenAI(temperature=0)
-# prompt = PromptTemplate(
-#     input_variables=["code"],
-#     template="Code: {code}, Documentation: ",
-# )
-#
-# print(prompt.format(code="print('hello world')"))
-# print(llm(prompt.format(code="print('hello world')")))
-
-
 if not Path("outputs").exists():
    Path("outputs").mkdir()

@@ -119,8 +93,3 @@ for source, classes in structure_dict.items():
            else:
                with open(f"outputs/{source_w}", "a") as f:
                    f.write(f"\n\nFunction: {functions[function]}, \nDocumentation: {response}")
-
-
-
-
-
--- a/scripts/ingest.py
+++ b/scripts/ingest.py
@@ -1,21 +1,20 @@
 import os
 import sys
-import nltk
-import dotenv
-import typer
-
 from collections import defaultdict
 from typing import List, Optional

-from parser.file.bulk import SimpleDirectoryReader
-from parser.schema.base import Document
-from parser.open_ai_func import call_openai_api, get_user_permission
-from parser.py2doc import transform_to_docs
-from parser.py2doc import extract_functions_and_classes as extract_py
-from parser.js2doc import extract_functions_and_classes as extract_js
-from parser.java2doc import extract_functions_and_classes as extract_java
-from parser.token_func import group_split
+import dotenv
+import nltk
+import typer

+from parser.file.bulk import SimpleDirectoryReader
+from parser.java2doc import extract_functions_and_classes as extract_java
+from parser.js2doc import extract_functions_and_classes as extract_js
+from parser.open_ai_func import call_openai_api, get_user_permission
+from parser.py2doc import extract_functions_and_classes as extract_py
+from parser.py2doc import transform_to_docs
+from parser.schema.base import Document
+from parser.token_func import group_split

 dotenv.load_dotenv()

@@ -25,28 +24,32 @@ nltk.download('punkt', quiet=True)
 nltk.download('averaged_perceptron_tagger', quiet=True)


-#Splits all files in specified folder to documents
+def metadata_from_filename(title):
+    return {'title': title}
+
+# Splits all files in specified folder to documents
@app.command()
 def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
-                                                   help="Whether to skip price confirmation"),
+                                    help="Whether to skip price confirmation"),
           dir: Optional[List[str]] = typer.Option(["inputs"],
                                                   help="""List of paths to directory for index creation.
                                                        E.g. --dir inputs --dir inputs2"""),
           file: Optional[List[str]] = typer.Option(None,
-                                                   help="""File paths to use (Optional; overrides dir).
+                                                    help="""File paths to use (Optional; overrides dir).
                                                        E.g. --file inputs/1.md --file inputs/2.md"""),
           recursive: Optional[bool] = typer.Option(True, help="Whether to recursively search in subdirectories."),
           limit: Optional[int] = typer.Option(None, help="Maximum number of files to read."),
           formats: Optional[List[str]] = typer.Option([".rst", ".md"],
-                                                   help="""List of required extensions (list with .)
-                                                        Currently supported: .rst, .md, .pdf, .docx, .csv, .epub, .html, .mdx"""),
+                                                       help="""List of required extensions (list with .)
+                                                        Currently supported: 
+                                                        .rst, .md, .pdf, .docx, .csv, .epub, .html, .mdx"""),
           exclude: Optional[bool] = typer.Option(True, help="Whether to exclude hidden files (dotfiles)."),
-           sample: Optional[bool] = typer.Option(False, help="Whether to output sample of the first 5 split documents."),
+           sample: Optional[bool] = typer.Option(False,
+                                                 help="Whether to output sample of the first 5 split documents."),
           token_check: Optional[bool] = typer.Option(True, help="Whether to group small documents and split large."),
           min_tokens: Optional[int] = typer.Option(150, help="Minimum number of tokens to not group."),
           max_tokens: Optional[int] = typer.Option(2000, help="Maximum number of tokens to not split."),
           ):
-
    """
        Creates index from specified location or files.
        By default /inputs folder is used, .rst and .md are parsed.
@@ -55,23 +58,23 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
    def process_one_docs(directory, folder_name):
        raw_docs = SimpleDirectoryReader(input_dir=directory, input_files=file, recursive=recursive,
                                         required_exts=formats, num_files_limit=limit,
-                                         exclude_hidden=exclude).load_data()
+                                         exclude_hidden=exclude, file_metadata=metadata_from_filename).load_data()

        # Here we split the documents, as needed, into smaller chunks.
        # We do this due to the context limits of the LLMs.
-        raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
-        #Old method
+        raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens,
+                               token_check=token_check)
+        # Old method
        # text_splitter = RecursiveCharacterTextSplitter()
        # docs = text_splitter.split_documents(raw_docs)

-        #Sample feature
-        if sample == True:
+        # Sample feature
+        if sample:
            for i in range(min(5, len(raw_docs))):
                print(raw_docs[i].text)

        docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]

-
        # Here we check for command line arguments for bot calls.
        # If no argument exists or the yes is not True, then the
        # user permission is requested to call the API.
@@ -98,12 +101,11 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,

@app.command()
 def convert(dir: Optional[str] = typer.Option("inputs",
-                                                   help="""Path to directory to make documentation for.
+                                              help="""Path to directory to make documentation for.
                                                        E.g. --dir inputs """),
            formats: Optional[str] = typer.Option("py",
-                                                        help="""Required language. 
+                                                  help="""Required language. 
                                                        py, js, java supported for now""")):
-
    """
            Creates documentation linked to original functions from specified location.
            By default /inputs folder is used, .py is parsed.
@@ -117,7 +119,7 @@ def convert(dir: Optional[str] = typer.Option("inputs",
    else:
        raise Exception("Sorry, language not supported yet")
    transform_to_docs(functions_dict, classes_dict, formats, dir)
+
+
 if __name__ == "__main__":
-  app()
-
-
+    app()
--- a/scripts/old/ingest_rst.py
+++ b/scripts/old/ingest_rst.py
@@ -1,38 +1,42 @@
-from pathlib import Path
-from langchain.text_splitter import CharacterTextSplitter
-import faiss
-from langchain.vectorstores import FAISS
-from langchain.embeddings import OpenAIEmbeddings
 import pickle
-import dotenv
-import tiktoken
 import sys
 from argparse import ArgumentParser
+from pathlib import Path
+
+import dotenv
+import faiss
+import tiktoken
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import FAISS
+

 def num_tokens_from_string(string: str, encoding_name: str) -> int:
-# Function to convert string to tokens and estimate user cost.
+    # Function to convert string to tokens and estimate user cost.
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
-    total_price = ((num_tokens/1000) * 0.0004)
+    total_price = ((num_tokens / 1000) * 0.0004)
    return num_tokens, total_price

+
 def call_openai_api():
-# Function to create a vector store from the documents and save it to disk.
+    # Function to create a vector store from the documents and save it to disk.
    store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
    faiss.write_index(store.index, "docs.index")
    store.index = None
    with open("faiss_store.pkl", "wb") as f:
        pickle.dump(store, f)

+
 def get_user_permission():
-# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
+    # Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
    # Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
    docs_content = (" ".join(docs))
    tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
    # Here we print the number of tokens and the approx user cost with some visually appealing formatting.
    print(f"Number of Tokens = {format(tokens, ',d')}")
    print(f"Approx Cost = ${format(total_price, ',.2f')}")
-    #Here we check for user permission before calling the API.
+    # Here we check for user permission before calling the API.
    user_input = input("Price Okay? (Y/N) \n").lower()
    if user_input == "y":
        call_openai_api()
@@ -41,7 +45,8 @@ def get_user_permission():
    else:
        print("The API was not called. No money was spent.")

-#Load .env file
+
+# Load .env file
 dotenv.load_dotenv()

 ap = ArgumentParser("Script for training DocsGPT on .rst documentation files.")
--- a/scripts/old/ingest_rst_sphinx.py
+++ b/scripts/old/ingest_rst_sphinx.py
@@ -1,71 +1,75 @@
 import os
 import pickle
-import dotenv
-import tiktoken
-import sys
-import faiss
 import shutil
+import sys
+from argparse import ArgumentParser
 from pathlib import Path
-from langchain.vectorstores import FAISS
+
+import dotenv
+import faiss
+import tiktoken
 from langchain.embeddings import OpenAIEmbeddings
 from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import FAISS
 from sphinx.cmd.build import main as sphinx_main
-from argparse import ArgumentParser
+

 def convert_rst_to_txt(src_dir, dst_dir):
-  # Check if the source directory exists
-  if not os.path.exists(src_dir):
-    raise Exception("Source directory does not exist")
-  # Walk through the source directory
-  for root, dirs, files in os.walk(src_dir):
-    for file in files:
-      # Check if the file has .rst extension
-      if file.endswith(".rst"):
-        # Construct the full path of the file
-        src_file = os.path.join(root, file.replace(".rst", ""))
-        # Convert the .rst file to .txt file using sphinx-build
-        args = f". -b text -D extensions=sphinx.ext.autodoc " \
-               f"-D master_doc={src_file} " \
-               f"-D source_suffix=.rst " \
-               f"-C {dst_dir} "
-        sphinx_main(args.split())
-      elif file.endswith(".md"):
-        # Rename the .md file to .rst file
-        src_file = os.path.join(root, file)
-        dst_file = os.path.join(root, file.replace(".md", ".rst"))
-        os.rename(src_file, dst_file)
-        # Convert the .rst file to .txt file using sphinx-build
-        args = f". -b text -D extensions=sphinx.ext.autodoc " \
-                f"-D master_doc={dst_file} " \
-                f"-D source_suffix=.rst " \
-                f"-C {dst_dir} "
-        sphinx_main(args.split())
+    # Check if the source directory exists
+    if not os.path.exists(src_dir):
+        raise Exception("Source directory does not exist")
+    # Walk through the source directory
+    for root, dirs, files in os.walk(src_dir):
+        for file in files:
+            # Check if the file has .rst extension
+            if file.endswith(".rst"):
+                # Construct the full path of the file
+                src_file = os.path.join(root, file.replace(".rst", ""))
+                # Convert the .rst file to .txt file using sphinx-build
+                args = f". -b text -D extensions=sphinx.ext.autodoc " \
+                       f"-D master_doc={src_file} " \
+                       f"-D source_suffix=.rst " \
+                       f"-C {dst_dir} "
+                sphinx_main(args.split())
+            elif file.endswith(".md"):
+                # Rename the .md file to .rst file
+                src_file = os.path.join(root, file)
+                dst_file = os.path.join(root, file.replace(".md", ".rst"))
+                os.rename(src_file, dst_file)
+                # Convert the .rst file to .txt file using sphinx-build
+                args = f". -b text -D extensions=sphinx.ext.autodoc " \
+                       f"-D master_doc={dst_file} " \
+                       f"-D source_suffix=.rst " \
+                       f"-C {dst_dir} "
+                sphinx_main(args.split())


 def num_tokens_from_string(string: str, encoding_name: str) -> int:
-# Function to convert string to tokens and estimate user cost.
+    # Function to convert string to tokens and estimate user cost.
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
-    total_price = ((num_tokens/1000) * 0.0004)
+    total_price = ((num_tokens / 1000) * 0.0004)
    return num_tokens, total_price

+
 def call_openai_api():
-# Function to create a vector store from the documents and save it to disk.
+    # Function to create a vector store from the documents and save it to disk.
    store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
    faiss.write_index(store.index, "docs.index")
    store.index = None
    with open("faiss_store.pkl", "wb") as f:
        pickle.dump(store, f)

+
 def get_user_permission():
-# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
+    # Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
    # Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
    docs_content = (" ".join(docs))
    tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
    # Here we print the number of tokens and the approx user cost with some visually appealing formatting.
    print(f"Number of Tokens = {format(tokens, ',d')}")
    print(f"Approx Cost = ${format(total_price, ',.2f')}")
-    #Here we check for user permission before calling the API.
+    # Here we check for user permission before calling the API.
    user_input = input("Price Okay? (Y/N) \n").lower()
    if user_input == "y":
        call_openai_api()
@@ -74,6 +78,7 @@ def get_user_permission():
    else:
        print("The API was not called. No money was spent.")

+
 ap = ArgumentParser("Script for training DocsGPT on Sphinx documentation")
 ap.add_argument("-i", "--inputs",
                type=str,
@@ -81,17 +86,17 @@ ap.add_argument("-i", "--inputs",
                help="Directory containing documentation files")
 args = ap.parse_args()

-#Load .env file
+# Load .env file
 dotenv.load_dotenv()

-#Directory to vector
+# Directory to vector
 src_dir = args.inputs
 dst_dir = "tmp"

 convert_rst_to_txt(src_dir, dst_dir)

 # Here we load in the data in the format that Notion exports it in.
-ps = list(Path("tmp/"+ src_dir).glob("**/*.txt"))
+ps = list(Path("tmp/" + src_dir).glob("**/*.txt"))

 # parse all child directories
 data = []
--- a/scripts/parser/file/base.py
+++ b/scripts/parser/file/base.py
@@ -3,7 +3,6 @@ from abc import abstractmethod
 from typing import Any, List

 from langchain.docstore.document import Document as LCDocument
-
 from parser.schema.base import Document


--- a/scripts/parser/file/bulk.py
+++ b/scripts/parser/file/bulk.py
@@ -1,8 +1,5 @@
 """Simple reader that reads files of different formats from a directory."""
 import logging
-from pathlib import Path
-from typing import Callable, Dict, List, Optional, Union
-
 from parser.file.base import BaseReader
 from parser.file.base_parser import BaseParser
 from parser.file.docs_parser import DocxParser, PDFParser
@@ -12,6 +9,8 @@ from parser.file.markdown_parser import MarkdownParser
 from parser.file.rst_parser import RstParser
 from parser.file.tabular_parser import PandasCSVParser
 from parser.schema.base import Document
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Union

 DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
    ".pdf": PDFParser(),
@@ -52,17 +51,17 @@ class SimpleDirectoryReader(BaseReader):
    """

    def __init__(
-        self,
-        input_dir: Optional[str] = None,
-        input_files: Optional[List] = None,
-        exclude_hidden: bool = True,
-        errors: str = "ignore",
-        recursive: bool = True,
-        required_exts: Optional[List[str]] = None,
-        file_extractor: Optional[Dict[str, BaseParser]] = None,
-        num_files_limit: Optional[int] = None,
-        file_metadata: Optional[Callable[[str], Dict]] = None,
-        chunk_size_max: int = 2048,
+            self,
+            input_dir: Optional[str] = None,
+            input_files: Optional[List] = None,
+            exclude_hidden: bool = True,
+            errors: str = "ignore",
+            recursive: bool = True,
+            required_exts: Optional[List[str]] = None,
+            file_extractor: Optional[Dict[str, BaseParser]] = None,
+            num_files_limit: Optional[int] = None,
+            file_metadata: Optional[Callable[[str], Dict]] = None,
+            chunk_size_max: int = 2048,
    ) -> None:
        """Initialize with parameters."""
        super().__init__()
@@ -103,8 +102,8 @@ class SimpleDirectoryReader(BaseReader):
            elif self.exclude_hidden and input_file.name.startswith("."):
                continue
            elif (
-                self.required_exts is not None
-                and input_file.suffix not in self.required_exts
+                    self.required_exts is not None
+                    and input_file.suffix not in self.required_exts
            ):
                continue
            else:
@@ -115,7 +114,7 @@ class SimpleDirectoryReader(BaseReader):
            new_input_files.extend(sub_input_files)

        if self.num_files_limit is not None and self.num_files_limit > 0:
-            new_input_files = new_input_files[0 : self.num_files_limit]
+            new_input_files = new_input_files[0: self.num_files_limit]

        # print total number of files added
        logging.debug(
@@ -151,10 +150,15 @@ class SimpleDirectoryReader(BaseReader):
                    data = f.read()
            if isinstance(data, List):
                data_list.extend(data)
+                if self.file_metadata is not None:
+                    for _ in range(len(data)):
+                        metadata_list.append(self.file_metadata(str(input_file)))
            else:
                data_list.append(str(data))
-            if self.file_metadata is not None:
-                metadata_list.append(self.file_metadata(str(input_file)))
+                if self.file_metadata is not None:
+                    metadata_list.append(self.file_metadata(str(input_file)))
+
+            

        if concatenate:
            return [Document("\n".join(data_list))]
--- a/scripts/parser/file/html_parser.py
+++ b/scripts/parser/file/html_parser.py
@@ -9,6 +9,7 @@ from typing import Dict, Union

 from parser.file.base_parser import BaseParser

+
 class HTMLParser(BaseParser):
    """HTML parser."""

@@ -23,21 +24,20 @@ class HTMLParser(BaseParser):
            Union[str, List[str]]: a string or a List of strings.
        """
        try:
-            import unstructured
+            from unstructured.partition.html import partition_html
+            from unstructured.staging.base import convert_to_isd
+            from unstructured.cleaners.core import clean
        except ImportError:
            raise ValueError("unstructured package is required to parse HTML files.")
-        from unstructured.partition.html import partition_html
-        from unstructured.staging.base import convert_to_isd
-        from unstructured.cleaners.core import clean

        # Using the unstructured library to convert the html to isd format
        # isd sample : isd = [
-                            #   {"text": "My Title", "type": "Title"},
-                            #   {"text": "My Narrative", "type": "NarrativeText"}
-                            # ]
+        #   {"text": "My Title", "type": "Title"},
+        #   {"text": "My Narrative", "type": "NarrativeText"}
+        # ]
        with open(file, "r", encoding="utf-8") as fp:
            elements = partition_html(file=fp)
-            isd = convert_to_isd(elements)  
+            isd = convert_to_isd(elements)

        # Removing non ascii charactwers from isd_el['text']
        for isd_el in isd:
@@ -46,15 +46,15 @@ class HTMLParser(BaseParser):
        # Removing all the \n characters from isd_el['text'] using regex and replace with single space
        # Removing all the extra spaces  from isd_el['text'] using regex and replace with single space
        for isd_el in isd:
-            isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE|re.DOTALL)
-            isd_el['text'] = re.sub(r"\s{2,}"," ", isd_el['text'], flags=re.MULTILINE|re.DOTALL)
+            isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
+            isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)

        # more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
        for isd_el in isd:
-            clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True )
+            clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)

        # Creating a list of all the indexes of isd_el['type'] = 'Title'
-        title_indexes = [i for i,isd_el in enumerate(isd) if isd_el['type'] == 'Title']
+        title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']

        # Creating 'Chunks' - List of lists of strings 
        # each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
@@ -64,19 +64,20 @@ class HTMLParser(BaseParser):
        Chunks = [[]]
        final_chunks = list(list())

-        for i,isd_el in enumerate(isd):
+        for i, isd_el in enumerate(isd):
            if i in title_indexes:
                Chunks.append([])
            Chunks[-1].append(isd_el['text'])

-        # Removing all the chunks with sum of lenth of all the strings in the chunk < 25 #TODO: This value can be an user defined variable
+        # Removing all the chunks with sum of lenth of all the strings in the chunk < 25
+        # TODO: This value can be a user defined variable
        for chunk in Chunks:
            # sum of lenth of all the strings in the chunk
            sum = 0
            sum += len(str(chunk))
            if sum < 25:
                Chunks.remove(chunk)
-            else :         
+            else:
                # appending all the approved chunks to final_chunks as a single string       
                final_chunks.append(" ".join([str(item) for item in chunk]))
        return final_chunks
--- a/scripts/parser/file/markdown_parser.py
+++ b/scripts/parser/file/markdown_parser.py
@@ -7,8 +7,8 @@ import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Union, cast

-from parser.file.base_parser import BaseParser
 import tiktoken
+from parser.file.base_parser import BaseParser


 class MarkdownParser(BaseParser):
@@ -20,13 +20,13 @@ class MarkdownParser(BaseParser):
    """

    def __init__(
-        self,
-        *args: Any,
-        remove_hyperlinks: bool = True,
-        remove_images: bool = True,
-        max_tokens: int = 2048,
-        # remove_tables: bool = True,
-        **kwargs: Any,
+            self,
+            *args: Any,
+            remove_hyperlinks: bool = True,
+            remove_images: bool = True,
+            max_tokens: int = 2048,
+            # remove_tables: bool = True,
+            **kwargs: Any,
    ) -> None:
        """Init params."""
        super().__init__(*args, **kwargs)
@@ -35,8 +35,8 @@ class MarkdownParser(BaseParser):
        self._max_tokens = max_tokens
        # self._remove_tables = remove_tables

-
-    def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str], current_text: str):
+    def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str],
+                          current_text: str):
        """Append to tups chunk."""
        num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(current_text))
        if num_tokens > self._max_tokens:
@@ -46,6 +46,7 @@ class MarkdownParser(BaseParser):
        else:
            tups.append((current_header, current_text))
        return tups
+
    def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
        """Convert a markdown file to a dictionary.

@@ -115,7 +116,7 @@ class MarkdownParser(BaseParser):
        return {}

    def parse_tups(
-        self, filepath: Path, errors: str = "ignore"
+            self, filepath: Path, errors: str = "ignore"
    ) -> List[Tuple[Optional[str], str]]:
        """Parse file into tuples."""
        with open(filepath, "r") as f:
@@ -130,7 +131,7 @@ class MarkdownParser(BaseParser):
        return markdown_tups

    def parse_file(
-        self, filepath: Path, errors: str = "ignore"
+            self, filepath: Path, errors: str = "ignore"
    ) -> Union[str, List[str]]:
        """Parse file into string."""
        tups = self.parse_tups(filepath, errors=errors)
--- a/scripts/parser/file/rst_parser.py
+++ b/scripts/parser/file/rst_parser.py
@@ -5,10 +5,10 @@ Contains parser for md files.
 """
 import re
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union, cast
+from typing import Any, Dict, List, Optional, Tuple, Union

 from parser.file.base_parser import BaseParser
-import tiktoken
+

 class RstParser(BaseParser):
    """reStructuredText parser.
@@ -19,17 +19,17 @@ class RstParser(BaseParser):
    """

    def __init__(
-        self,
-        *args: Any,
-        remove_hyperlinks: bool = True,
-        remove_images: bool = True,
-        remove_table_excess: bool = True,
-        remove_interpreters: bool = True,
-        remove_directives: bool = True,
-        remove_whitespaces_excess: bool = True,
-        #Be carefull with remove_characters_excess, might cause data loss
-        remove_characters_excess: bool = True,
-        **kwargs: Any,
+            self,
+            *args: Any,
+            remove_hyperlinks: bool = True,
+            remove_images: bool = True,
+            remove_table_excess: bool = True,
+            remove_interpreters: bool = True,
+            remove_directives: bool = True,
+            remove_whitespaces_excess: bool = True,
+            # Be carefull with remove_characters_excess, might cause data loss
+            remove_characters_excess: bool = True,
+            **kwargs: Any,
    ) -> None:
        """Init params."""
        super().__init__(*args, **kwargs)
@@ -41,7 +41,6 @@ class RstParser(BaseParser):
        self._remove_whitespaces_excess = remove_whitespaces_excess
        self._remove_characters_excess = remove_characters_excess

-
    def rst_to_tups(self, rst_text: str) -> List[Tuple[Optional[str], str]]:
        """Convert a reStructuredText file to a dictionary.

@@ -56,7 +55,8 @@ class RstParser(BaseParser):

        for i, line in enumerate(lines):
            header_match = re.match(r"^[^\S\n]*[-=]+[^\S\n]*$", line)
-            if header_match and i > 0 and (len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
+            if header_match and i > 0 and (
+                    len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
                if current_header is not None:
                    if current_text == "" or None:
                        continue
@@ -72,7 +72,7 @@ class RstParser(BaseParser):

        rst_tups.append((current_header, current_text))

-        #TODO: Format for rst
+        # TODO: Format for rst
        #
        # if current_header is not None:
        #     # pass linting, assert keys are defined
@@ -136,7 +136,7 @@ class RstParser(BaseParser):
        return {}

    def parse_tups(
-        self, filepath: Path, errors: str = "ignore"
+            self, filepath: Path, errors: str = "ignore"
    ) -> List[Tuple[Optional[str], str]]:
        """Parse file into tuples."""
        with open(filepath, "r") as f:
@@ -159,7 +159,7 @@ class RstParser(BaseParser):
        return rst_tups

    def parse_file(
-        self, filepath: Path, errors: str = "ignore"
+            self, filepath: Path, errors: str = "ignore"
    ) -> Union[str, List[str]]:
        """Parse file into string."""
        tups = self.parse_tups(filepath, errors=errors)
--- a/scripts/parser/file/tabular_parser.py
+++ b/scripts/parser/file/tabular_parser.py
@@ -77,13 +77,13 @@ class PandasCSVParser(BaseParser):
    """

    def __init__(
-        self,
-        *args: Any,
-        concat_rows: bool = True,
-        col_joiner: str = ", ",
-        row_joiner: str = "\n",
-        pandas_config: dict = {},
-        **kwargs: Any
+            self,
+            *args: Any,
+            concat_rows: bool = True,
+            col_joiner: str = ", ",
+            row_joiner: str = "\n",
+            pandas_config: dict = {},
+            **kwargs: Any
    ) -> None:
        """Init params."""
        super().__init__(*args, **kwargs)
--- a/scripts/parser/java2doc.py
+++ b/scripts/parser/java2doc.py
@@ -1,6 +1,8 @@
 import os
+
 import javalang

+
 def find_files(directory):
    files_list = []
    for root, dirs, files in os.walk(directory):
@@ -9,6 +11,7 @@ def find_files(directory):
                files_list.append(os.path.join(root, file))
    return files_list

+
 def extract_functions(file_path):
    with open(file_path, "r") as file:
        java_code = file.read()
@@ -28,6 +31,7 @@ def extract_functions(file_path):
            methods[method_name] = method_source_code
    return methods

+
 def extract_classes(file_path):
    with open(file_path, 'r') as file:
        source_code = file.read()
@@ -47,6 +51,7 @@ def extract_classes(file_path):
            classes[class_name] = class_string
    return classes

+
 def extract_functions_and_classes(directory):
    files = find_files(directory)
    functions_dict = {}
@@ -58,4 +63,4 @@ def extract_functions_and_classes(directory):
        classes = extract_classes(file)
        if classes:
            classes_dict[file] = classes
-    return functions_dict, classes_dict
+    return functions_dict, classes_dict
--- a/scripts/parser/js2doc.py
+++ b/scripts/parser/js2doc.py
@@ -1,6 +1,7 @@
 import os
-import esprima
+
 import escodegen
+import esprima


 def find_files(directory):
@@ -11,6 +12,7 @@ def find_files(directory):
                files_list.append(os.path.join(root, file))
    return files_list

+
 def extract_functions(file_path):
    with open(file_path, 'r') as file:
        source_code = file.read()
@@ -26,7 +28,6 @@ def extract_functions(file_path):
                        func_name = declaration.id.name if declaration.id else '<anonymous>'
                        functions[func_name] = escodegen.generate(declaration.init)
            elif node.type == 'ClassDeclaration':
-                class_name = node.id.name
                for subnode in node.body.body:
                    if subnode.type == 'MethodDefinition':
                        func_name = subnode.key.name
@@ -38,6 +39,7 @@ def extract_functions(file_path):
                                functions[func_name] = escodegen.generate(declaration.init)
        return functions

+
 def extract_classes(file_path):
    with open(file_path, 'r') as file:
        source_code = file.read()
@@ -53,6 +55,7 @@ def extract_classes(file_path):
                classes[class_name] = ", ".join(function_names)
    return classes

+
 def extract_functions_and_classes(directory):
    files = find_files(directory)
    functions_dict = {}
--- a/scripts/parser/open_ai_func.py
+++ b/scripts/parser/open_ai_func.py
@@ -1,32 +1,32 @@
 import os
-import faiss
-import pickle
+
 import tiktoken
-from langchain.vectorstores import FAISS
 from langchain.embeddings import OpenAIEmbeddings
-
-#from langchain.embeddings import HuggingFaceEmbeddings
-#from langchain.embeddings import HuggingFaceInstructEmbeddings
-#from langchain.embeddings import CohereEmbeddings
-
+from langchain.vectorstores import FAISS
 from retry import retry


+# from langchain.embeddings import HuggingFaceEmbeddings
+# from langchain.embeddings import HuggingFaceInstructEmbeddings
+# from langchain.embeddings import CohereEmbeddings
+

 def num_tokens_from_string(string: str, encoding_name: str) -> int:
-# Function to convert string to tokens and estimate user cost.
+    # Function to convert string to tokens and estimate user cost.
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
-    total_price = ((num_tokens/1000) * 0.0004)
+    total_price = ((num_tokens / 1000) * 0.0004)
    return num_tokens, total_price

+
@retry(tries=10, delay=60)
 def store_add_texts_with_retry(store, i):
    store.add_texts([i.page_content], metadatas=[i.metadata])
-    #store_pine.add_texts([i.page_content], metadatas=[i.metadata])
+    # store_pine.add_texts([i.page_content], metadatas=[i.metadata])
+

 def call_openai_api(docs, folder_name):
-# Function to create a vector store from the documents and save it to disk.
+    # Function to create a vector store from the documents and save it to disk.

    # create output folder if it doesn't exist
    if not os.path.exists(f"outputs/{folder_name}"):
@@ -37,21 +37,22 @@ def call_openai_api(docs, folder_name):
    # remove the first element from docs
    docs.pop(0)
    # cut first n docs if you want to restart
-    #docs = docs[:n]
+    # docs = docs[:n]
    c1 = 0
    # pinecone.init(
    #     api_key="",  # find at app.pinecone.io
    #     environment="us-east1-gcp"  # next to api key in console
    # )
-    #index_name = "pandas"
+    # index_name = "pandas"
    store = FAISS.from_documents(docs_test, OpenAIEmbeddings())
-    #store_pine = Pinecone.from_documents(docs_test, OpenAIEmbeddings(), index_name=index_name)
+    # store_pine = Pinecone.from_documents(docs_test, OpenAIEmbeddings(), index_name=index_name)

    # Uncomment for MPNet embeddings
    # model_name = "sentence-transformers/all-mpnet-base-v2"
    # hf = HuggingFaceEmbeddings(model_name=model_name)
    # store = FAISS.from_documents(docs_test, hf)
-    for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs), bar_format='{l_bar}{bar}| Time Left: {remaining}'):
+    for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs),
+                  bar_format='{l_bar}{bar}| Time Left: {remaining}'):
        try:
            store_add_texts_with_retry(store, i)
        except Exception as e:
@@ -64,20 +65,20 @@ def call_openai_api(docs, folder_name):
        c1 += 1
    store.save_local(f"outputs/{folder_name}")

+
 def get_user_permission(docs, folder_name):
-# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
+    # Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
    # Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
-    #docs_content = (" ".join(docs))
+    # docs_content = (" ".join(docs))
    docs_content = ""
    for doc in docs:
        docs_content += doc.page_content

-
    tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
    # Here we print the number of tokens and the approx user cost with some visually appealing formatting.
    print(f"Number of Tokens = {format(tokens, ',d')}")
    print(f"Approx Cost = ${format(total_price, ',.2f')}")
-    #Here we check for user permission before calling the API.
+    # Here we check for user permission before calling the API.
    user_input = input("Price Okay? (Y/N) \n").lower()
    if user_input == "y":
        call_openai_api(docs, folder_name)
--- a/scripts/parser/py2doc.py
+++ b/scripts/parser/py2doc.py
@@ -1,10 +1,12 @@
-import os
 import ast
-import tiktoken
+import os
 from pathlib import Path
+
+import tiktoken
 from langchain.llms import OpenAI
 from langchain.prompts import PromptTemplate

+
 def find_files(directory):
    files_list = []
    for root, dirs, files in os.walk(directory):
@@ -13,6 +15,7 @@ def find_files(directory):
                files_list.append(os.path.join(root, file))
    return files_list

+
 def extract_functions(file_path):
    with open(file_path, 'r') as file:
        source_code = file.read()
@@ -25,6 +28,7 @@ def extract_functions(file_path):
                functions[func_name] = func_def
    return functions

+
 def extract_classes(file_path):
    with open(file_path, 'r') as file:
        source_code = file.read()
@@ -40,6 +44,7 @@ def extract_classes(file_path):
                classes[class_name] = ", ".join(function_names)
    return classes

+
 def extract_functions_and_classes(directory):
    files = find_files(directory)
    functions_dict = {}
@@ -53,11 +58,12 @@ def extract_functions_and_classes(directory):
            classes_dict[file] = classes
    return functions_dict, classes_dict

+
 def parse_functions(functions_dict, formats, dir):
    c1 = len(functions_dict)
    for i, (source, functions) in enumerate(functions_dict.items(), start=1):
        print(f"Processing file {i}/{c1}")
-        source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
+        source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
        subfolders = "/".join(source_w.split("/")[:-1])
        Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
        for j, (name, function) in enumerate(functions.items(), start=1):
@@ -70,18 +76,19 @@ def parse_functions(functions_dict, formats, dir):
            response = llm(prompt.format(code=function))
            mode = "a" if Path(f"outputs/{source_w}").exists() else "w"
            with open(f"outputs/{source_w}", mode) as f:
-                f.write(f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
+                f.write(
+                    f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")


 def parse_classes(classes_dict, formats, dir):
    c1 = len(classes_dict)
    for i, (source, classes) in enumerate(classes_dict.items()):
-        print(f"Processing file {i+1}/{c1}")
-        source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
+        print(f"Processing file {i + 1}/{c1}")
+        source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
        subfolders = "/".join(source_w.split("/")[:-1])
        Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
        for name, function_names in classes.items():
-            print(f"Processing Class {i+1}/{c1}")
+            print(f"Processing Class {i + 1}/{c1}")
            prompt = PromptTemplate(
                input_variables=["class_name", "functions_names"],
                template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
@@ -92,6 +99,7 @@ def parse_classes(classes_dict, formats, dir):
            with open(f"outputs/{source_w}", "a" if Path(f"outputs/{source_w}").exists() else "w") as f:
                f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")

+
 def transform_to_docs(functions_dict, classes_dict, formats, dir):
    docs_content = ''.join([str(key) + str(value) for key, value in functions_dict.items()])
    docs_content += ''.join([str(key) + str(value) for key, value in classes_dict.items()])
@@ -110,4 +118,4 @@ def transform_to_docs(functions_dict, classes_dict, formats, dir):
        parse_classes(classes_dict, formats, dir)
        print("All done!")
    else:
-        print("The API was not called. No money was spent.")
+        print("The API was not called. No money was spent.")
--- a/scripts/parser/schema/base.py
+++ b/scripts/parser/schema/base.py
@@ -2,7 +2,6 @@
 from dataclasses import dataclass

 from langchain.docstore.document import Document as LCDocument
-
 from parser.schema.schema import BaseDocument


--- a/scripts/parser/token_func.py
+++ b/scripts/parser/token_func.py
@@ -1,9 +1,9 @@
 import re
-import tiktoken
-
-from typing import List
-from parser.schema.base import Document
 from math import ceil
+from typing import List
+
+import tiktoken
+from parser.schema.base import Document


 def separate_header_and_body(text):
@@ -13,6 +13,7 @@ def separate_header_and_body(text):
    body = text[len(header):]
    return header, body

+
 def group_documents(documents: List[Document], min_tokens: int, max_tokens: int) -> List[Document]:
    docs = []
    current_group = None
@@ -23,7 +24,8 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
        if current_group is None:
            current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
                                     extra_info=doc.extra_info)
-        elif len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
+        elif len(tiktoken.get_encoding("cl100k_base").encode(
+                current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
            current_group.text += " " + doc.text
        else:
            docs.append(current_group)
@@ -35,6 +37,7 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)

    return docs

+
 def split_documents(documents: List[Document], max_tokens: int) -> List[Document]:
    docs = []
    for doc in documents:
@@ -54,17 +57,18 @@ def split_documents(documents: List[Document], max_tokens: int) -> List[Document
                docs.append(new_doc)
    return docs

+
 def group_split(documents: List[Document], max_tokens: int = 2000, min_tokens: int = 150, token_check: bool = True):
-    if token_check == False:
+    if not token_check:
        return documents
    print("Grouping small documents")
    try:
        documents = group_documents(documents=documents, min_tokens=min_tokens, max_tokens=max_tokens)
-    except:
+    except Exception:
        print("Grouping failed, try running without token_check")
    print("Separating large documents")
    try:
        documents = split_documents(documents=documents, max_tokens=max_tokens)
-    except:
+    except Exception:
        print("Grouping failed, try running without token_check")
    return documents
--- a/scripts/requirements.txt
+++ b/scripts/requirements.txt
@@ -33,7 +33,7 @@ esutils==1.0.1
 et-xmlfile==1.1.0
 faiss-cpu==1.7.3
 filelock==3.9.0
-Flask==2.2.2
+Flask==2.2.5
 frozenlist==1.3.3
 greenlet==2.0.2
 gunicorn==20.1.0
@@ -88,7 +88,7 @@ python-magic==0.4.27
 python-pptx==0.6.21
 pytz==2022.7.1
 PyYAML==6.0
-redis==4.5.1
+redis==4.5.4
 regex==2022.10.31
 requests==2.28.2
 retry==0.9.2
@@ -124,8 +124,7 @@ typing-inspect==0.8.0
 typing_extensions==4.4.0
 unstructured==0.4.11
 urllib3==1.26.14
-Werkzeug==2.2.3
 wrapt==1.14.1
 XlsxWriter==3.0.8
 xxhash==3.2.0
-yarl==1.8.2
+yarl==1.8.2
--- a/setup.sh
+++ b/setup.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+cd "$(dirname "$0")" || exit
+
+# Create the required directories on the host machine if they don't exist
+[ ! -d "./application/indexes" ] && mkdir -p ./application/indexes
+[ ! -d "./application/inputs" ] && mkdir -p ./application/inputs
+[ ! -d "./application/vectors" ] && mkdir -p ./application/vectors
+
+# Build frontend and backend images
+docker build -t frontend_image ./frontend
+docker build -t backend_image ./application
+
+# Run redis and mongo services
+docker run -d --name redis -p 6379:6379 redis:6-alpine
+docker run -d --name mongo -p 27017:27017 -v mongodb_data_container:/data/db mongo:6
+
+# Run backend and worker services
+docker run -d --name backend -p 5001:5001 \
+  --link redis:redis --link mongo:mongo \
+  -v $(pwd)/application/indexes:/app/indexes \
+  -v $(pwd)/application/inputs:/app/inputs \
+  -v $(pwd)/application/vectors:/app/vectors \
+  -e API_KEY=$OPENAI_API_KEY \
+  -e EMBEDDINGS_KEY=$OPENAI_API_KEY \
+  -e CELERY_BROKER_URL=redis://redis:6379/0 \
+  -e CELERY_RESULT_BACKEND=redis://redis:6379/1 \
+  -e MONGO_URI=mongodb://mongo:27017/docsgpt \
+  backend_image
+
+docker run -d --name worker \
+  --link redis:redis --link mongo:mongo \
+  -e API_KEY=$OPENAI_API_KEY \
+  -e EMBEDDINGS_KEY=$OPENAI_API_KEY \
+  -e CELERY_BROKER_URL=redis://redis:6379/0 \
+  -e CELERY_RESULT_BACKEND=redis://redis:6379/1 \
+  -e MONGO_URI=mongodb://mongo:27017/docsgpt \
+  -e API_URL=http://backend:5001 \
+  backend_image \
+  celery -A app.celery worker -l INFO
+
+# Run frontend service
+docker run -d --name frontend -p 5173:5173 \
+  -e VITE_API_HOST=http://localhost:5001 \
+  frontend_image
+
Author	SHA1	Message	Date
Alex	577d58c92b	less token less issues	2023-06-03 16:31:10 +01:00
Alex	899777632b	Update README.md	2023-06-03 16:09:10 +01:00
Alex	bbf55ca46e	Merge pull request #250 from tardigrde/main	2023-06-01 14:56:48 +01:00
Alex	3f88b04c4a	Update app.py	2023-05-31 23:49:41 +01:00
Alex	f8910ba136	Added history in streaming convo + fixed little bug with message margins on loading state	2023-05-31 23:47:16 +01:00
Alex	6c95d8b13e	Merge pull request #251 from arc53/feature/streaming Feature/streaming	2023-05-31 22:30:57 +01:00
Alex	e6bccaaf4e	Update app.py	2023-05-31 22:20:47 +01:00
Alex	3b8039a580	Merge branch 'main' into feature/streaming	2023-05-31 22:15:53 +01:00
Alex	fae3f55010	Working streaming	2023-05-31 17:44:20 +01:00
Alex	20c877f75b	working fe	2023-05-31 15:42:17 +01:00
Alex	8380858a82	some fixes	2023-05-30 20:00:41 +01:00
Alex	d2358c399d	working version	2023-05-30 19:43:06 +01:00
Alex	c3af8a77af	working streams	2023-05-29 17:55:43 +01:00
Levente Csőke	bc5a0b030b	Update .env-template to OPENAI_API_KEY	2023-05-26 08:57:11 +02:00
Alex	0b94f1717f	Merge pull request #246 from arc53/feature/gpt4all Feature/gpt4all	2023-05-25 19:42:20 +01:00
Alex	aaa1249a41	model fix + env var	2023-05-25 19:33:37 +01:00
Alex	ffaa22c49b	reverse history order to use latest history firts Co-Authored-By: Pavel <32868631+pabik@users.noreply.github.com>	2023-05-25 16:40:11 +01:00
Alex	0b78480977	init	2023-05-25 15:14:47 +01:00
Alex	6b6737613a	Merge pull request #243 from nazihkalo/main updating the bulk ingest file metadata logic	2023-05-20 16:02:32 +01:00
Nazih Kalo	da5d62cc1c	updating the bulk ingest file metadata to account for parsers that output lists	2023-05-19 10:29:18 -07:00
Alex	6a68b63192	history fix	2023-05-19 13:09:41 +01:00
Alex	ff2e79fe7b	streaming experiments	2023-05-18 23:52:59 +01:00
Alex	1800e51b19	Merge pull request #241 from arc53/feature/history Feature/history	2023-05-18 18:50:35 +01:00
Alex	ba9c505249	accidentaly deleted frontend container	2023-05-18 18:45:15 +01:00
Alex	bc9f1c17ed	History Co-Authored-By: riccardofresi <89981746+riccardofresi@users.noreply.github.com>	2023-05-18 18:42:23 +01:00
Alex	74845aed64	history init	2023-05-18 14:27:13 +01:00
Alex	e49dd0cc6a	metadata on ingestion	2023-05-17 21:41:24 +01:00
Alex	27c45ae24a	Merge pull request #236 from larinam/fixbuild_github_token fix workflow: upgrade "build and push" action version to the latest	2023-05-16 12:04:27 +01:00
Anton Larin	364a14adaf	fix workflow: upgrade "build and push" action version to the latest	2023-05-16 08:02:13 +02:00
Alex	5c560b1dd5	Merge pull request #235 from larinam/fixbuild_github_token fix workflow: adjust permissions according to documentation	2023-05-15 23:17:53 +01:00
Anton Larin	28b8b88332	fix workflow: adjust permissions according to documentation https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions#publishing-a-package-using-an-action	2023-05-15 21:22:06 +02:00
Alex	e39ef0cc9e	Merge pull request #234 from larinam/fixbuild_github_token fix workflow: login to GHCR according to the GH documentation	2023-05-15 18:17:48 +01:00
Anton Larin	8098d3fec8	fix workflow nad login to GHCR according to the GH documentation https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions	2023-05-15 18:55:40 +02:00
Alex	059ffe09ea	Merge pull request #232 from larinam/lint Lint	2023-05-15 13:53:09 +01:00
Alex	36a845c29e	Merge pull request #231 from larinam/main Proper PEP8 formatting	2023-05-15 13:45:52 +01:00
GH Action - Upstream Sync	ce6f0dab56	Merge branch 'main' of https://github.com/arc53/DocsGPT	2023-05-15 12:05:18 +00:00
Alex	f200ab10a4	Merge pull request #233 from arc53/dependabot/pip/scripts/flask-2.2.5 Bump flask from 2.2.2 to 2.2.5 in /scripts	2023-05-15 12:50:30 +01:00
Alex	3001688e0e	Update requirements.txt	2023-05-15 12:46:39 +01:00
GH Action - Upstream Sync	a73774099e	Merge branch 'main' of https://github.com/arc53/DocsGPT	2023-05-15 11:03:45 +00:00
dependabot[bot]	b28676d52c	Bump flask from 2.2.2 to 2.2.5 in /scripts Bumps [flask](https://github.com/pallets/flask) from 2.2.2 to 2.2.5. - [Release notes](https://github.com/pallets/flask/releases) - [Changelog](https://github.com/pallets/flask/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/flask/compare/2.2.2...2.2.5) --- updated-dependencies: - dependency-name: flask dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>	2023-05-15 11:00:19 +00:00
Alex	eef012b4d1	Merge pull request #225 from arc53/dependabot/pip/application/flask-2.3.2 Bump flask from 2.2.3 to 2.3.2 in /application	2023-05-15 11:58:54 +01:00
Alex	1417a1c020	Update requirements.txt	2023-05-15 11:49:41 +01:00
Anton Larin	962becb9a5	Linting * validate python formatting on every build with Ruff * fix lint warnings	2023-05-13 10:36:17 +02:00
Anton Larin	168648e789	Proper PEP8 formatting	2023-05-12 12:02:25 +02:00
Alex	7f56f57778	better markdown styling	2023-05-06 15:22:23 +01:00
Alex	6cadddc2fc	Merge pull request #223 from Zillibub/main Moved env variables to the pydantic settings file	2023-05-02 11:07:52 +01:00
dependabot[bot]	15fd54eac4	Bump flask from 2.2.3 to 2.3.2 in /application Bumps [flask](https://github.com/pallets/flask) from 2.2.3 to 2.3.2. - [Release notes](https://github.com/pallets/flask/releases) - [Changelog](https://github.com/pallets/flask/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/flask/compare/2.2.3...2.3.2) --- updated-dependencies: - dependency-name: flask dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>	2023-05-02 00:27:05 +00:00
Serj	31350e6302	Set celery and mongo urls as default	2023-04-30 11:03:09 +01:00
Serj	8742cdae0a	Refactored url join	2023-04-30 10:46:52 +01:00
Serj	4efcb388ff	Added settings usage to the worker	2023-04-29 15:58:02 +01:00
Serj	2d92e95c8a	Added settings usage to the worker	2023-04-29 15:56:32 +01:00
Serj	47e5d5684a	Replace other env variables in the file	2023-04-29 15:50:02 +01:00
Serj	b723e14d98	Added embeddings name variable	2023-04-29 15:46:09 +01:00
Serj	c9d24b8f42	Added llm model variable	2023-04-29 15:44:47 +01:00
Serj	43622e7ab1	Added settings file	2023-04-29 15:40:55 +01:00
Serge Kozloff	5cfc185ba5	Merge pull request #2 from arc53/main d	2023-04-29 15:39:46 +01:00
Alex	4be2635fbe	Merge pull request #221 from darth-pika-hu/main Create setup.sh	2023-04-27 21:36:29 +01:00
Darth Pika	0beafb8391	Update setup.sh This script includes the necessary changes to use container linking and updated environment variables for the `backend` and `worker` containers. Make sure you have the `./frontend` and `./application` directories in the correct locations before running the script.	2023-04-27 12:39:03 -07:00
Darth Pika	1d2654b9fa	Update setup.sh Create required directories on the host machine if they don't exist.	2023-04-27 12:02:11 -07:00
Darth Pika	a4bc3673e7	Create setup.sh Added a bash script to help with installation issues.	2023-04-27 11:40:25 -07:00
Alex	fa080537e8	Merge pull request #220 from Zillibub/main Updated readme for development run	2023-04-27 12:27:20 +01:00
Serj	bdf67a7db7	Added dev docker compose file	2023-04-26 19:05:50 +01:00
Serge Kozloff	db4cdc901c	Merge pull request #1 from arc53/main t	2023-04-26 18:55:39 +01:00
Serj	16a540b89b	Expand readme and added port in wsgi	2023-04-26 18:54:59 +01:00
Alex	e00ec9ac3f	Update chat_combine_prompt.txt	2023-04-26 15:01:46 +01:00
Alex	fc760afdfc	Update chat_combine_prompt.txt	2023-04-26 14:54:26 +01:00
Alex	cb47bcdb0e	Update ConversationBubble.tsx	2023-04-26 13:35:05 +01:00
Alex	8d62559ca8	Merge pull request #219 from arc53/feature/code-highlighting code highlighting	2023-04-26 10:30:39 +01:00
Alex	dbe9c4dc18	init	2023-04-25 17:01:44 +01:00
Serj	1609b4562d	Added mongo db start	2023-04-24 19:22:42 +01:00
Serj	b6cadb1d65	Removed spaces	2023-04-24 18:46:05 +01:00
Serj	7aafac5b5e	Expanded developer start a little bit	2023-04-24 18:39:53 +01:00
Pavel	36f0aacb19	Merge pull request #218 from arc53/feature/web-widget web widget	2023-04-23 15:12:18 +01:00
Alex	0c1a6a918d	web widget	2023-04-23 15:07:55 +01:00
Alex	d1f5ff4dba	Merge pull request #214 from SAMZONG/main	2023-04-18 15:12:54 +01:00
samzong	77e6df2a1c	add auto sync fork for workflow Signed-off-by: samzong <samzong.lu@gmail.com>	2023-04-18 04:24:10 +00:00
Alex	119c037f24	Merge pull request #209 from arc53/dot-env .env	2023-04-11 22:50:19 +01:00
Alex	97fe1abfd8	.env Co-Authored-By: Subhadip N <subhadip@get-deck.com>	2023-04-11 22:49:47 +01:00
Alex	3a0163f0fb	Merge pull request #202 from yuchen9/feat/ui-enhancement feat: ui enhancement	2023-04-07 11:17:12 +01:00
Chen	d3fab69155	feat: ui enhancement	2023-04-06 23:54:16 +08:00
Alex	9395d2c091	celery load	2023-04-06 12:16:30 +01:00
Alex	b9efb98280	Update README.md	2023-04-04 14:12:35 +01:00
Alex	60bb264663	async calls	2023-04-03 14:37:09 +01:00
Alex	316dd2f165	Merge pull request #197 from arc53/dependabot/pip/application/redis-4.5.4 Bump redis from 4.5.3 to 4.5.4 in /application	2023-04-03 13:01:13 +01:00
dependabot[bot]	8a0f700563	Bump redis from 4.5.3 to 4.5.4 in /application Bumps [redis](https://github.com/redis/redis-py) from 4.5.3 to 4.5.4. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.5.3...v4.5.4) --- updated-dependencies: - dependency-name: redis dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>	2023-03-31 14:31:03 +00:00
Alex	3d0c6eafec	gpt4- compatable	2023-03-31 10:45:40 +01:00
Alex	46e055833b	Merge pull request #196 from arc53/dependabot/pip/scripts/redis-4.5.4 Bump redis from 4.5.3 to 4.5.4 in /scripts	2023-03-30 12:52:15 +01:00
dependabot[bot]	80dfdd1cb9	Bump redis from 4.5.3 to 4.5.4 in /scripts Bumps [redis](https://github.com/redis/redis-py) from 4.5.3 to 4.5.4. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.5.3...v4.5.4) --- updated-dependencies: - dependency-name: redis dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>	2023-03-30 11:49:21 +00:00
Alex	db21678b74	Merge pull request #192 from arc53/dependabot/pip/scripts/redis-4.5.3 Bump redis from 4.5.1 to 4.5.3 in /scripts	2023-03-30 12:48:58 +01:00
Alex	09c7fe0565	Merge pull request #193 from arc53/dependabot/pip/application/redis-4.5.3 Bump redis from 4.5.2 to 4.5.3 in /application	2023-03-30 12:48:35 +01:00
Alex	b6dfb2c856	map_reduce	2023-03-30 12:44:25 +01:00
Alex	ab46ba521f	different prompts	2023-03-29 18:36:58 +01:00
Alex	4a7670f2aa	Update app.py	2023-03-29 17:32:00 +01:00
Alex	9ba86bc174	Update preferenceSlice.ts	2023-03-28 10:19:42 +01:00
Pavel	2ebe5e051c	discord bot fix Stop random answers	2023-03-28 01:51:54 +04:00
dependabot[bot]	24e98abd15	Bump redis from 4.5.2 to 4.5.3 in /application Bumps [redis](https://github.com/redis/redis-py) from 4.5.2 to 4.5.3. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.5.2...v4.5.3) --- updated-dependencies: - dependency-name: redis dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>	2023-03-27 21:36:39 +00:00
dependabot[bot]	b7f1a94ba4	Bump redis from 4.5.1 to 4.5.3 in /scripts Bumps [redis](https://github.com/redis/redis-py) from 4.5.1 to 4.5.3. - [Release notes](https://github.com/redis/redis-py/releases) - [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES) - [Commits](https://github.com/redis/redis-py/compare/v4.5.1...v4.5.3) --- updated-dependencies: - dependency-name: redis dependency-type: direct:production ... Signed-off-by: dependabot[bot] <support@github.com>	2023-03-27 21:34:50 +00:00
Alex	70bc7465c9	Merge pull request #191 from arc53/features/little-fixes Features/little fixes	2023-03-27 22:28:55 +01:00
Alex	65c2568427	Update app.py	2023-03-27 22:23:36 +01:00
Alex	186e7bf402	update for better runs + storage sync	2023-03-27 22:07:26 +01:00
Alex	e6f1c7d0c3	mobile more space	2023-03-27 21:50:54 +01:00
Alex	87ad9a3190	Update Upload.tsx	2023-03-27 21:48:44 +01:00
Alex	0ed45f8754	fix pending status	2023-03-27 21:48:16 +01:00
Alex	116e4401c4	Update .env.production	2023-03-27 21:44:22 +01:00
Alex	c3c0e643d2	Update chat_combine_prompt.txt	2023-03-27 21:42:06 +01:00
Alex	d5522e7c08	prep things	2023-03-27 19:29:10 +01:00
Alex	658b14ba26	failed upload	2023-03-27 19:22:06 +01:00
Alex	38f8469d0b	Update Navigation.tsx	2023-03-27 19:11:57 +01:00