init2

2026-01-28 09:50:33 +00:00 · 2023-02-03 12:45:29 +00:00
parent 2135b8420f
commit b71a9bf5ee
14 changed files with 1683 additions and 0 deletions
--- a/application/.env_sample
+++ b/application/.env_sample
@@ -0,0 +1 @@
+OPENAI_API_KEY=your_api_key
--- a/application/Dockerfile
+++ b/application/Dockerfile
@@ -0,0 +1,11 @@
+FROM python:3.9
+
+WORKDIR /app
+COPY . /app
+RUN pip install --no-cache-dir -r requirements.txt
+ENV FLASK_APP=app.py
+ENV FLASK_ENV=development
+
+EXPOSE 5000
+
+CMD ["flask", "run", "--host=0.0.0.0"]
--- a/application/app.py
+++ b/application/app.py
@@ -0,0 +1,61 @@
+import os
+import pickle
+import dotenv
+import datetime
+from flask import Flask, request, render_template
+# os.environ["LANGCHAIN_HANDLER"] = "langchain"
+import faiss
+from langchain import OpenAI
+from langchain.chains import VectorDBQAWithSourcesChain
+from langchain.prompts import PromptTemplate
+
+# loading the .env file
+dotenv.load_dotenv()
+
+# loading the index and the store and the prompt template
+index = faiss.read_index("docs.index")
+with open("combine_prompt.txt", "r") as f:
+    template = f.read()
+
+with open("faiss_store.pkl", "rb") as f:
+    store = pickle.load(f)
+
+app = Flask(__name__)
+
+
+@app.route("/")
+def home():
+    return render_template("index.html")
+
+
+@app.route("/api/answer", methods=["POST"])
+def api_answer():
+    data = request.get_json()
+    question = data["question"]
+
+    store.index = index
+    # create a prompt template
+    c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template)
+    # create a chain with the prompt template and the store
+    chain = VectorDBQAWithSourcesChain.from_llm(llm=OpenAI(temperature=0), vectorstore=store, combine_prompt=c_prompt)
+    # fetch the answer
+    result = chain({"question": question})
+
+    # some formatting for the frontend
+    result['answer'] = result['answer'].replace("\\n", "<br>")
+    result['answer'] = result['answer'].replace("SOURCES:", "")
+
+    return result
+
+
+# handling CORS
+@app.after_request
+def after_request(response):
+    response.headers.add('Access-Control-Allow-Origin', '*')
+    response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
+    response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
+    return response
+
+
+if __name__ == "__main__":
+    app.run(debug=True)
--- a/application/combine_prompt.txt
+++ b/application/combine_prompt.txt
@@ -0,0 +1,27 @@
+You are a DocsGPT bot assistant by Arc53 that provides help with programming libraries. You give thorough answers with code examples.
+Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES").
+ALWAYS return a "SOURCES" part in your answer.
+
+QUESTION: How to merge tables in pandas?
+=========
+Content: pandas provides various facilities for easily combining together Series or DataFrame with various kinds of set logic for the indexes and relational algebra functionality in the case of join / merge-type operations.
+Source: 28-pl
+Content: pandas provides a single function, merge(), as the entry point for all standard database join operations between DataFrame or named Series objects: \n\npandas.merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None)
+Source: 30-pl
+=========
+FINAL ANSWER: To merge two tables in pandas, you can use the pd.merge() function. The basic syntax is: \n\npd.merge(left, right, on, how) \n\nwhere left and right are the two tables to merge, on is the column to merge on, and how is the type of merge to perform. \n\nFor example, to merge the two tables df1 and df2 on the column 'id', you can use: \n\npd.merge(df1, df2, on='id', how='inner')
+SOURCES: 28-pl 30-pl
+
+QUESTION: How to eat vegetables using pandas?
+=========
+Content: ExtensionArray.repeat(repeats, axis=None) Returns a new ExtensionArray where each element of the current ExtensionArray is repeated consecutively a given number of times. \n\nParameters: repeats int or array of ints. The number of repetitions for each element. This should be a positive integer. Repeating 0 times will return an empty array. axis (0 or ‘index’, 1 or ‘columns’), default 0 The axis along which to repeat values. Currently only axis=0 is supported.
+Source: 0-pl
+=========
+FINAL ANSWER: You can't eat vegetables using pandas. You can only eat them using your mouth.
+SOURCES:
+
+QUESTION: {question}
+=========
+{summaries}
+=========
+FINAL ANSWER:
--- a/application/docs.index
+++ b/application/docs.index
--- a/application/faiss_store.pkl
+++ b/application/faiss_store.pkl
--- a/application/package-lock.json
+++ b/application/package-lock.json
--- a/application/package.json
+++ b/application/package.json
@@ -0,0 +1,5 @@
+{
+  "devDependencies": {
+    "tailwindcss": "^3.2.4"
+  }
+}
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -0,0 +1,34 @@
+aiohttp==3.8.3
+aiosignal==1.3.1
+async-timeout==4.0.2
+attrs==22.2.0
+charset-normalizer==2.1.1
+click==8.1.3
+dataclasses-json==0.5.7
+faiss-cpu==1.7.3
+Flask==2.2.2
+frozenlist==1.3.3
+greenlet==2.0.2
+idna==3.4
+itsdangerous==2.1.2
+Jinja2==3.1.2
+langchain==0.0.76
+MarkupSafe==2.1.2
+marshmallow==3.19.0
+marshmallow-enum==1.5.1
+multidict==6.0.4
+mypy-extensions==0.4.3
+numpy==1.24.1
+openai==0.26.4
+packaging==23.0
+pydantic==1.10.4
+python-dotenv==0.21.1
+PyYAML==6.0
+requests==2.28.2
+SQLAlchemy==1.4.46
+tqdm==4.64.1
+typing-inspect==0.8.0
+typing_extensions==4.4.0
+urllib3==1.26.14
+Werkzeug==2.2.2
+yarl==1.8.2
--- a/application/static/src/chat.js
+++ b/application/static/src/chat.js
@@ -0,0 +1,44 @@
+var el = document.getElementById('message-form');
+if (el) {
+    el.addEventListener("submit", function (event) {
+        console.log("submitting")
+        event.preventDefault()
+        var message = document.getElementById("message-input").value;
+        msg_html = '<div class="bg-blue-500 text-white p-2 rounded-lg mb-2 self-end"><p class="text-sm">'
+        msg_html += message
+        msg_html += '</p></div>'
+        document.getElementById("messages").innerHTML += msg_html;
+        let chatWindow = document.getElementById("chat-container");
+        chatWindow.scrollTop = chatWindow.scrollHeight;
+        document.getElementById("message-input").value = "";
+        document.getElementById("button-submit").innerHTML = '<i class="fa fa-circle-o-notch fa-spin"></i> Thinking...';
+        document.getElementById("button-submit").disabled = true;
+
+        fetch('/api/answer', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+            },
+            body: JSON.stringify({question: message}),
+        })
+            .then(response => response.json())
+            .then(data => {
+                console.log('Success:', data);
+                msg_html = '<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start"><code class="text-sm">'
+                msg_html += data.answer
+                msg_html += '</code></div>'
+                document.getElementById("messages").innerHTML += msg_html;
+                let chatWindow = document.getElementById("chat-container");
+                chatWindow.scrollTop = chatWindow.scrollHeight;
+                document.getElementById("button-submit").innerHTML = 'Send';
+                document.getElementById("button-submit").disabled = false;
+            })
+            .catch((error) => {
+                console.error('Error:', error);
+                document.getElementById("button-submit").innerHTML = 'Send';
+                document.getElementById("button-submit").disabled = false;
+            });
+
+
+    });
+}
--- a/application/static/src/input.css
+++ b/application/static/src/input.css
@@ -0,0 +1,56 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+#chat-container {
+    height: 44rem;
+    background-color: white;
+    padding: 10px;
+    overflow: auto;
+}
+
+
+
+.bg-gray-200 {
+    background-color: #edf2f7;
+  }
+
+  .bg-gray-900 {
+    background-color: #1a202c;
+  }
+
+  .rounded-lg {
+    border-radius: 0.5rem;
+  }
+
+  .shadow {
+    box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12), 0 1px 2px rgba(0, 0, 0, 0.24);
+  }
+
+  .text-gray-700 {
+    color: #4a5568;
+  }
+
+  .text-sm {
+    font-size: 0.875rem;
+  }
+
+  .p-4 {
+    padding: 1.5rem;
+  }
+
+.loader {
+  border: 16px solid #f3f3f3; /* Light grey */
+  border-top: 16px solid #3498db; /* Blue */
+  border-radius: 50%;
+  width: 120px;
+  height: 120px;
+  animation: spin 2s linear infinite;
+}
+
+@keyframes spin {
+  0% { transform: rotate(0deg); }
+  100% { transform: rotate(360deg); }
+}
+
+
--- a/application/tailwind.config.js
+++ b/application/tailwind.config.js
@@ -0,0 +1,8 @@
+/** @type {import('tailwindcss').Config} */
+module.exports = {
+  content: ["./templates/**/*.html", "./static/src/**/*.js"],
+  theme: {
+    extend: {},
+  },
+  plugins: [],
+}
--- a/application/templates/index.html
+++ b/application/templates/index.html
@@ -0,0 +1,65 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>DocsGPT 🦖 Preview</title>
+    <link href="{{url_for('static',filename='dist/css/output.css')}}" rel="stylesheet">
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
+
+
+  </head>
+  <body>
+    <header class="bg-white p-2 flex justify-between items-center">
+      <h1 class="text-lg font-medium">DocsGPT</h1>
+      <a href="https://github.com/arc53/docsgpt" class="text-blue-500 hover:text-blue-800">About</a>
+    </header>
+    <div class="flex">
+      <div class="w-3/4">
+        <div class="w-full flex flex-col">
+          <div id="chat-container">
+
+            <div id="messages" class="w-full flex flex-col" >
+              <div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
+                <p class="text-sm">Hello, ask me anything about this library. Im here to help</p>
+              </div>
+              <div class="bg-blue-500 text-white p-2 rounded-lg mb-2 self-end">
+                <p class="text-sm">How to create API key for Api gateway?</p>
+              </div>
+              <div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
+                <p class="text-sm">Import the boto3 library and create a client for the API Gateway service:</p>
+
+
+              </div>
+              <div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
+                <code class="text-sm">client = boto3.client('apigateway')</code>
+
+
+              </div>
+              <div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
+                <p class="text-sm">Create an API key:</p>
+
+              </div>
+              <div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
+                <code class="text-sm">response = client.create_api_key(<br>name='API_KEY_NAME',<br>description='API key description',<br>enabled=True)<br>api_key = response['value']</code>
+
+              </div>
+          </div>
+        </div>
+
+        <div class=" flex mt-4 mb-2">
+        <form id="message-form">
+          <input id="message-input" class="bg-white p-2 rounded-lg ml-2 w-[46rem]" type="text" placeholder="Type your message here...">
+          <button id="button-submit" class="bg-blue-500 text-white p-2 rounded-lg ml-2 mr-2 ml-2" type="submit">Send</button>
+        </form>
+        </div>
+
+
+    </div>
+        </div>
+        <div class="w-1/4 p-2">
+          <p class="text-sm">This is a chatbot that uses the GPT-3, Faiss and <a href="https://github.com/hwchase17/langchain >" class="text-blue-500 hover:text-blue-800">Longchain</a> to answer questions</p>
+          <p class="text-sm">The source code is available on <a href="https://github.com/arc53/docsgpt" class="text-blue-500 hover:text-blue-800">Github</a></p>
+        </div>
+    </div>
+  <script src="{{url_for('static',filename='src/chat.js')}}"></script>
+  </body>
+</html>
--- a/scripts/ingest_rst.py
+++ b/scripts/ingest_rst.py
@@ -0,0 +1,40 @@
+from pathlib import Path
+from langchain.text_splitter import CharacterTextSplitter
+import faiss
+from langchain.vectorstores import FAISS
+from langchain.embeddings import OpenAIEmbeddings
+import pickle
+import dotenv
+import os
+
+dotenv.load_dotenv()
+
+
+# Here we load in the data in the format that Notion exports it in.
+ps = list(Path("pandasdocs/").glob("**/*.rst"))
+# parse all child directories
+
+data = []
+sources = []
+for p in ps:
+    with open(p) as f:
+        data.append(f.read())
+    sources.append(p)
+
+# Here we split the documents, as needed, into smaller chunks.
+# We do this due to the context limits of the LLMs.
+text_splitter = CharacterTextSplitter(chunk_size=1500, separator="\n")
+docs = []
+metadatas = []
+for i, d in enumerate(data):
+    splits = text_splitter.split_text(d)
+    docs.extend(splits)
+    metadatas.extend([{"source": sources[i]}] * len(splits))
+
+
+# Here we create a vector store from the documents and save it to disk.
+store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
+faiss.write_index(store.index, "docs.index")
+store.index = None
+with open("faiss_store.pkl", "wb") as f:
+    pickle.dump(store, f)