diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py index 5f04678d..3755eb5b 100644 --- a/application/api/answer/routes.py +++ b/application/api/answer/routes.py @@ -1,20 +1,44 @@ +import asyncio import os from flask import Blueprint, request, jsonify, Response import requests import json import datetime +import logging +import traceback +from celery.result import AsyncResult -from langchain.chat_models import AzureChatOpenAI from pymongo import MongoClient from bson.objectid import ObjectId -from werkzeug.utils import secure_filename -import http.client +from transformers import GPT2TokenizerFast + +from langchain import FAISS +from langchain import VectorDBQA, Cohere, OpenAI +from langchain.chains import LLMChain, ConversationalRetrievalChain +from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT +from langchain.chains.question_answering import load_qa_chain +from langchain.chat_models import ChatOpenAI, AzureChatOpenAI +from langchain.embeddings import ( + OpenAIEmbeddings, + HuggingFaceHubEmbeddings, + CohereEmbeddings, + HuggingFaceInstructEmbeddings, +) +from langchain.prompts import PromptTemplate +from langchain.prompts.chat import ( + ChatPromptTemplate, + SystemMessagePromptTemplate, + HumanMessagePromptTemplate, + AIMessagePromptTemplate, +) +from langchain.schema import HumanMessage, AIMessage -from application.app import (logger, count_tokens, chat_combine_template, gpt_model, - api_key_set, embeddings_key_set, get_docsearch, get_vectorstore) from application.core.settings import settings from application.llm.openai import OpenAILLM +from application.core.settings import settings +from application.error import bad_request +logger = logging.getLogger(__name__) mongo = MongoClient(settings.MONGO_URI) db = mongo["docsgpt"] @@ -22,28 +46,118 @@ conversations_collection = db["conversations"] vectors_collection = db["vectors"] answer = Blueprint('answer', __name__) +if settings.LLM_NAME == "gpt4": + gpt_model = 'gpt-4' +else: + gpt_model = 'gpt-3.5-turbo' + +if settings.SELF_HOSTED_MODEL: + from langchain.llms import HuggingFacePipeline + from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + + model_id = settings.LLM_NAME # hf model id (Arc53/docsgpt-7b-falcon, Arc53/docsgpt-14b) + tokenizer = AutoTokenizer.from_pretrained(model_id) + model = AutoModelForCausalLM.from_pretrained(model_id) + pipe = pipeline( + "text-generation", model=model, + tokenizer=tokenizer, max_new_tokens=2000, + device_map="auto", eos_token_id=tokenizer.eos_token_id + ) + hf = HuggingFacePipeline(pipeline=pipe) + +# load the prompts +current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +with open(os.path.join(current_dir, "prompts", "combine_prompt.txt"), "r") as f: + template = f.read() + +with open(os.path.join(current_dir, "prompts", "combine_prompt_hist.txt"), "r") as f: + template_hist = f.read() + +with open(os.path.join(current_dir, "prompts", "question_prompt.txt"), "r") as f: + template_quest = f.read() + +with open(os.path.join(current_dir, "prompts", "chat_combine_prompt.txt"), "r") as f: + chat_combine_template = f.read() + +with open(os.path.join(current_dir, "prompts", "chat_reduce_prompt.txt"), "r") as f: + chat_reduce_template = f.read() + +api_key_set = settings.API_KEY is not None +embeddings_key_set = settings.EMBEDDINGS_KEY is not None + + +async def async_generate(chain, question, chat_history): + result = await chain.arun({"question": question, "chat_history": chat_history}) + return result + + +def count_tokens(string): + tokenizer = GPT2TokenizerFast.from_pretrained('gpt2') + return len(tokenizer(string)['input_ids']) + + +def run_async_chain(chain, question, chat_history): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + result = {} + try: + answer = loop.run_until_complete(async_generate(chain, question, chat_history)) + finally: + loop.close() + result["answer"] = answer + return result + + +def get_vectorstore(data): + if "active_docs" in data: + if data["active_docs"].split("/")[0] == "local": + if data["active_docs"].split("/")[1] == "default": + vectorstore = "" + else: + vectorstore = "indexes/" + data["active_docs"] + else: + vectorstore = "vectors/" + data["active_docs"] + if data["active_docs"] == "default": + vectorstore = "" + else: + vectorstore = "" + vectorstore = os.path.join("application", vectorstore) + return vectorstore + + +def get_docsearch(vectorstore, embeddings_key): + if settings.EMBEDDINGS_NAME == "openai_text-embedding-ada-002": + if is_azure_configured(): + os.environ["OPENAI_API_TYPE"] = "azure" + openai_embeddings = OpenAIEmbeddings(model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME) + else: + openai_embeddings = OpenAIEmbeddings(openai_api_key=embeddings_key) + docsearch = FAISS.load_local(vectorstore, openai_embeddings) + elif settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2": + docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings()) + elif settings.EMBEDDINGS_NAME == "huggingface_hkunlp/instructor-large": + docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings()) + elif settings.EMBEDDINGS_NAME == "cohere_medium": + docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key)) + return docsearch + + def is_azure_configured(): return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME + + def complete_stream(question, docsearch, chat_history, api_key, conversation_id): - - # openai.api_key = api_key - if is_azure_configured(): - # logger.debug("in Azure") - # openai.api_type = "azure" - # openai.api_version = settings.OPENAI_API_VERSION - # openai.api_base = settings.OPENAI_API_BASE - # llm = AzureChatOpenAI( - # openai_api_key=api_key, - # openai_api_base=settings.OPENAI_API_BASE, - # openai_api_version=settings.OPENAI_API_VERSION, - # deployment_name=settings.AZURE_DEPLOYMENT_NAME, - # ) - llm = OpenAILLM(api_key=api_key) + llm = AzureChatOpenAI( + openai_api_key=api_key, + openai_api_base=settings.OPENAI_API_BASE, + openai_api_version=settings.OPENAI_API_VERSION, + deployment_name=settings.AZURE_DEPLOYMENT_NAME, + ) else: logger.debug("plain OpenAI") llm = OpenAILLM(api_key=api_key) - # llm = ChatOpenAI(openai_api_key=api_key) + docs = docsearch.similarity_search(question, k=2) # join all page_content together with a newline docs_together = "\n".join([doc.page_content for doc in docs]) @@ -71,33 +185,20 @@ def complete_stream(question, docsearch, chat_history, api_key, conversation_id) messages_combine.append({"role": "user", "content": i["prompt"]}) messages_combine.append({"role": "system", "content": i["response"]}) messages_combine.append({"role": "user", "content": question}) - # completion = openai.ChatCompletion.create(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME, - # messages=messages_combine, stream=True, max_tokens=500, temperature=0) - import sys - print(api_key) - reponse_full = "" - # for line in completion: - # if "content" in line["choices"][0]["delta"]: - # # check if the delta contains content - # data = json.dumps({"answer": str(line["choices"][0]["delta"]["content"])}) - # reponse_full += str(line["choices"][0]["delta"]["content"]) - # yield f"data: {data}\n\n" - # reponse_full = "" - print(llm) + + response_full = "" completion = llm.gen_stream(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME, - messages=messages_combine) + messages=messages_combine) for line in completion: - data = json.dumps({"answer": str(line)}) - reponse_full += str(line) + response_full += str(line) yield f"data: {data}\n\n" - # save conversation to database if conversation_id is not None: conversations_collection.update_one( {"_id": ObjectId(conversation_id)}, - {"$push": {"queries": {"prompt": question, "response": reponse_full, "sources": source_log_docs}}}, + {"$push": {"queries": {"prompt": question, "response": response_full, "sources": source_log_docs}}}, ) else: @@ -107,19 +208,18 @@ def complete_stream(question, docsearch, chat_history, api_key, conversation_id) "words, respond ONLY with the summary, use the same " "language as the system \n\nUser: " + question + "\n\n" + "AI: " + - reponse_full}, + response_full}, {"role": "user", "content": "Summarise following conversation in no more than 3 words, " "respond ONLY with the summary, use the same language as the " "system"}] - # completion = openai.ChatCompletion.create(model='gpt-3.5-turbo', engine=settings.AZURE_DEPLOYMENT_NAME, - # messages=messages_summary, max_tokens=30, temperature=0) + completion = llm.gen(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME, - messages=messages_combine, max_tokens=30) + messages=messages_summary, max_tokens=30) conversation_id = conversations_collection.insert_one( {"user": "local", "date": datetime.datetime.utcnow(), - "name": completion["choices"][0]["message"]["content"], - "queries": [{"prompt": question, "response": reponse_full, "sources": source_log_docs}]} + "name": completion, + "queries": [{"prompt": question, "response": response_full, "sources": source_log_docs}]} ).inserted_id # send data.type = "end" to indicate that the stream has ended as json @@ -160,4 +260,165 @@ def stream(): complete_stream(question, docsearch, chat_history=history, api_key=api_key, conversation_id=conversation_id), mimetype="text/event-stream" - ) \ No newline at end of file + ) + + +@answer.route("/api/answer", methods=["POST"]) +def api_answer(): + data = request.get_json() + question = data["question"] + history = data["history"] + if "conversation_id" not in data: + conversation_id = None + else: + conversation_id = data["conversation_id"] + print("-" * 5) + if not api_key_set: + api_key = data["api_key"] + else: + api_key = settings.API_KEY + if not embeddings_key_set: + embeddings_key = data["embeddings_key"] + else: + embeddings_key = settings.EMBEDDINGS_KEY + + # use try and except to check for exception + try: + # check if the vectorstore is set + vectorstore = get_vectorstore(data) + # loading the index and the store and the prompt template + # Note if you have used other embeddings than OpenAI, you need to change the embeddings + docsearch = get_docsearch(vectorstore, embeddings_key) + + q_prompt = PromptTemplate( + input_variables=["context", "question"], template=template_quest, template_format="jinja2" + ) + if settings.LLM_NAME == "openai_chat": + if is_azure_configured(): + logger.debug("in Azure") + llm = AzureChatOpenAI( + openai_api_key=api_key, + openai_api_base=settings.OPENAI_API_BASE, + openai_api_version=settings.OPENAI_API_VERSION, + deployment_name=settings.AZURE_DEPLOYMENT_NAME, + ) + else: + logger.debug("plain OpenAI") + llm = ChatOpenAI(openai_api_key=api_key, model_name=gpt_model) # optional parameter: model_name="gpt-4" + messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)] + if history: + tokens_current_history = 0 + # count tokens in history + history.reverse() + for i in history: + if "prompt" in i and "response" in i: + tokens_batch = count_tokens(i["prompt"]) + count_tokens(i["response"]) + if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY: + tokens_current_history += tokens_batch + messages_combine.append(HumanMessagePromptTemplate.from_template(i["prompt"])) + messages_combine.append(AIMessagePromptTemplate.from_template(i["response"])) + messages_combine.append(HumanMessagePromptTemplate.from_template("{question}")) + p_chat_combine = ChatPromptTemplate.from_messages(messages_combine) + elif settings.LLM_NAME == "openai": + llm = OpenAI(openai_api_key=api_key, temperature=0) + elif settings.SELF_HOSTED_MODEL: + llm = hf + elif settings.LLM_NAME == "cohere": + llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key) + else: + raise ValueError("unknown LLM model") + + if settings.LLM_NAME == "openai_chat": + question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT) + doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine) + chain = ConversationalRetrievalChain( + retriever=docsearch.as_retriever(k=2), + question_generator=question_generator, + combine_docs_chain=doc_chain, + ) + chat_history = [] + # result = chain({"question": question, "chat_history": chat_history}) + # generate async with async generate method + result = run_async_chain(chain, question, chat_history) + elif settings.SELF_HOSTED_MODEL: + question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT) + doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine) + chain = ConversationalRetrievalChain( + retriever=docsearch.as_retriever(k=2), + question_generator=question_generator, + combine_docs_chain=doc_chain, + ) + chat_history = [] + # result = chain({"question": question, "chat_history": chat_history}) + # generate async with async generate method + result = run_async_chain(chain, question, chat_history) + + else: + qa_chain = load_qa_chain( + llm=llm, chain_type="map_reduce", combine_prompt=chat_combine_template, question_prompt=q_prompt + ) + chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=3) + result = chain({"query": question}) + + print(result) + + # some formatting for the frontend + if "result" in result: + result["answer"] = result["result"] + result["answer"] = result["answer"].replace("\\n", "\n") + try: + result["answer"] = result["answer"].split("SOURCES:")[0] + except Exception: + pass + + sources = docsearch.similarity_search(question, k=2) + sources_doc = [] + for doc in sources: + if doc.metadata: + sources_doc.append({'title': doc.metadata['title'], 'text': doc.page_content}) + else: + sources_doc.append({'title': doc.page_content, 'text': doc.page_content}) + result['sources'] = sources_doc + + # generate conversationId + if conversation_id is not None: + conversations_collection.update_one( + {"_id": ObjectId(conversation_id)}, + {"$push": {"queries": {"prompt": question, + "response": result["answer"], "sources": result['sources']}}}, + ) + + else: + # create new conversation + # generate summary + messages_summary = [AIMessage(content="Summarise following conversation in no more than 3 " + + "words, respond ONLY with the summary, use the same " + + "language as the system \n\nUser: " + question + "\n\nAI: " + + result["answer"]), + HumanMessage(content="Summarise following conversation in no more than 3 words, " + + "respond ONLY with the summary, use the same language as the " + + "system")] + + # completion = openai.ChatCompletion.create(model='gpt-3.5-turbo', engine=settings.AZURE_DEPLOYMENT_NAME, + # messages=messages_summary, max_tokens=30, temperature=0) + completion = llm.predict_messages(messages_summary) + conversation_id = conversations_collection.insert_one( + {"user": "local", + "date": datetime.datetime.utcnow(), + "name": completion.content, + "queries": [{"prompt": question, "response": result["answer"], "sources": result['sources']}]} + ).inserted_id + + result["conversation_id"] = str(conversation_id) + + # mock result + # result = { + # "answer": "The answer is 42", + # "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"] + # } + return result + except Exception as e: + # print whole traceback + traceback.print_exc() + print(str(e)) + return bad_request(500, str(e)) diff --git a/application/app.py b/application/app.py index c4b2c314..55c8e5c3 100644 --- a/application/app.py +++ b/application/app.py @@ -1,69 +1,18 @@ -import asyncio -import datetime -import json -import logging -import os import platform -import traceback + import dotenv -import openai -import requests from celery import Celery -from celery.result import AsyncResult -from flask import Flask, request, send_from_directory, jsonify, Response, redirect -from langchain import FAISS -from langchain import VectorDBQA, Cohere, OpenAI -from langchain.chains import LLMChain, ConversationalRetrievalChain -from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT -from langchain.chains.question_answering import load_qa_chain -from langchain.chat_models import ChatOpenAI, AzureChatOpenAI -from langchain.embeddings import ( - OpenAIEmbeddings, - HuggingFaceHubEmbeddings, - CohereEmbeddings, - HuggingFaceInstructEmbeddings, -) -from langchain.prompts import PromptTemplate -from langchain.prompts.chat import ( - ChatPromptTemplate, - SystemMessagePromptTemplate, - HumanMessagePromptTemplate, - AIMessagePromptTemplate, -) -from langchain.schema import HumanMessage, AIMessage +from flask import Flask, request, redirect + from pymongo import MongoClient -from werkzeug.utils import secure_filename from application.core.settings import settings -from application.error import bad_request from application.worker import ingest_worker -from bson.objectid import ObjectId from application.api.user.routes import user from application.api.answer.routes import answer -from transformers import GPT2TokenizerFast -# os.environ["LANGCHAIN_HANDLER"] = "langchain" -logger = logging.getLogger(__name__) -if settings.LLM_NAME == "gpt4": - gpt_model = 'gpt-4' -else: - gpt_model = 'gpt-3.5-turbo' - -if settings.SELF_HOSTED_MODEL: - from langchain.llms import HuggingFacePipeline - from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline - - model_id = settings.LLM_NAME # hf model id (Arc53/docsgpt-7b-falcon, Arc53/docsgpt-14b) - tokenizer = AutoTokenizer.from_pretrained(model_id) - model = AutoModelForCausalLM.from_pretrained(model_id) - pipe = pipeline( - "text-generation", model=model, - tokenizer=tokenizer, max_new_tokens=2000, - device_map="auto", eos_token_id=tokenizer.eos_token_id - ) - hf = HuggingFacePipeline(pipeline=pipe) # Redirect PosixPath to WindowsPath on Windows @@ -76,25 +25,7 @@ if platform.system() == "Windows": # loading the .env file dotenv.load_dotenv() -# load the prompts -current_dir = os.path.dirname(os.path.abspath(__file__)) -with open(os.path.join(current_dir, "prompts", "combine_prompt.txt"), "r") as f: - template = f.read() -with open(os.path.join(current_dir, "prompts", "combine_prompt_hist.txt"), "r") as f: - template_hist = f.read() - -with open(os.path.join(current_dir, "prompts", "question_prompt.txt"), "r") as f: - template_quest = f.read() - -with open(os.path.join(current_dir, "prompts", "chat_combine_prompt.txt"), "r") as f: - chat_combine_template = f.read() - -with open(os.path.join(current_dir, "prompts", "chat_reduce_prompt.txt"), "r") as f: - chat_reduce_template = f.read() - -api_key_set = settings.API_KEY is not None -embeddings_key_set = settings.EMBEDDINGS_KEY is not None app = Flask(__name__) app.register_blueprint(user) @@ -105,65 +36,9 @@ app.config["CELERY_RESULT_BACKEND"] = settings.CELERY_RESULT_BACKEND app.config["MONGO_URI"] = settings.MONGO_URI celery = Celery() celery.config_from_object("application.celeryconfig") -mongo = MongoClient(app.config["MONGO_URI"]) -db = mongo["docsgpt"] -vectors_collection = db["vectors"] -conversations_collection = db["conversations"] -async def async_generate(chain, question, chat_history): - result = await chain.arun({"question": question, "chat_history": chat_history}) - return result -def count_tokens(string): - - tokenizer = GPT2TokenizerFast.from_pretrained('gpt2') - return len(tokenizer(string)['input_ids']) - -def run_async_chain(chain, question, chat_history): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - result = {} - try: - answer = loop.run_until_complete(async_generate(chain, question, chat_history)) - finally: - loop.close() - result["answer"] = answer - return result - - -def get_vectorstore(data): - if "active_docs" in data: - if data["active_docs"].split("/")[0] == "local": - if data["active_docs"].split("/")[1] == "default": - vectorstore = "" - else: - vectorstore = "indexes/" + data["active_docs"] - else: - vectorstore = "vectors/" + data["active_docs"] - if data["active_docs"] == "default": - vectorstore = "" - else: - vectorstore = "" - vectorstore = os.path.join("application", vectorstore) - return vectorstore - - -def get_docsearch(vectorstore, embeddings_key): - if settings.EMBEDDINGS_NAME == "openai_text-embedding-ada-002": - if is_azure_configured(): - os.environ["OPENAI_API_TYPE"] = "azure" - openai_embeddings = OpenAIEmbeddings(model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME) - else: - openai_embeddings = OpenAIEmbeddings(openai_api_key=embeddings_key) - docsearch = FAISS.load_local(vectorstore, openai_embeddings) - elif settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2": - docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings()) - elif settings.EMBEDDINGS_NAME == "huggingface_hkunlp/instructor-large": - docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings()) - elif settings.EMBEDDINGS_NAME == "cohere_medium": - docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key)) - return docsearch @celery.task(bind=True) @@ -188,170 +63,8 @@ def home(): -def is_azure_configured(): - return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME -@app.route("/api/answer", methods=["POST"]) -def api_answer(): - data = request.get_json() - question = data["question"] - history = data["history"] - if "conversation_id" not in data: - conversation_id = None - else: - conversation_id = data["conversation_id"] - print("-" * 5) - if not api_key_set: - api_key = data["api_key"] - else: - api_key = settings.API_KEY - if not embeddings_key_set: - embeddings_key = data["embeddings_key"] - else: - embeddings_key = settings.EMBEDDINGS_KEY - - # use try and except to check for exception - try: - # check if the vectorstore is set - vectorstore = get_vectorstore(data) - # loading the index and the store and the prompt template - # Note if you have used other embeddings than OpenAI, you need to change the embeddings - docsearch = get_docsearch(vectorstore, embeddings_key) - - q_prompt = PromptTemplate( - input_variables=["context", "question"], template=template_quest, template_format="jinja2" - ) - if settings.LLM_NAME == "openai_chat": - if is_azure_configured(): - logger.debug("in Azure") - llm = AzureChatOpenAI( - openai_api_key=api_key, - openai_api_base=settings.OPENAI_API_BASE, - openai_api_version=settings.OPENAI_API_VERSION, - deployment_name=settings.AZURE_DEPLOYMENT_NAME, - ) - else: - logger.debug("plain OpenAI") - llm = ChatOpenAI(openai_api_key=api_key, model_name=gpt_model) # optional parameter: model_name="gpt-4" - messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)] - if history: - tokens_current_history = 0 - # count tokens in history - history.reverse() - for i in history: - if "prompt" in i and "response" in i: - tokens_batch = count_tokens(i["prompt"]) + count_tokens(i["response"]) - if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY: - tokens_current_history += tokens_batch - messages_combine.append(HumanMessagePromptTemplate.from_template(i["prompt"])) - messages_combine.append(AIMessagePromptTemplate.from_template(i["response"])) - messages_combine.append(HumanMessagePromptTemplate.from_template("{question}")) - p_chat_combine = ChatPromptTemplate.from_messages(messages_combine) - elif settings.LLM_NAME == "openai": - llm = OpenAI(openai_api_key=api_key, temperature=0) - elif settings.SELF_HOSTED_MODEL: - llm = hf - elif settings.LLM_NAME == "cohere": - llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key) - else: - raise ValueError("unknown LLM model") - - if settings.LLM_NAME == "openai_chat": - question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT) - doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine) - chain = ConversationalRetrievalChain( - retriever=docsearch.as_retriever(k=2), - question_generator=question_generator, - combine_docs_chain=doc_chain, - ) - chat_history = [] - # result = chain({"question": question, "chat_history": chat_history}) - # generate async with async generate method - result = run_async_chain(chain, question, chat_history) - elif settings.SELF_HOSTED_MODEL: - question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT) - doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine) - chain = ConversationalRetrievalChain( - retriever=docsearch.as_retriever(k=2), - question_generator=question_generator, - combine_docs_chain=doc_chain, - ) - chat_history = [] - # result = chain({"question": question, "chat_history": chat_history}) - # generate async with async generate method - result = run_async_chain(chain, question, chat_history) - - else: - qa_chain = load_qa_chain( - llm=llm, chain_type="map_reduce", combine_prompt=chat_combine_template, question_prompt=q_prompt - ) - chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=3) - result = chain({"query": question}) - - print(result) - - # some formatting for the frontend - if "result" in result: - result["answer"] = result["result"] - result["answer"] = result["answer"].replace("\\n", "\n") - try: - result["answer"] = result["answer"].split("SOURCES:")[0] - except Exception: - pass - - sources = docsearch.similarity_search(question, k=2) - sources_doc = [] - for doc in sources: - if doc.metadata: - sources_doc.append({'title': doc.metadata['title'], 'text': doc.page_content}) - else: - sources_doc.append({'title': doc.page_content, 'text': doc.page_content}) - result['sources'] = sources_doc - - # generate conversationId - if conversation_id is not None: - conversations_collection.update_one( - {"_id": ObjectId(conversation_id)}, - {"$push": {"queries": {"prompt": question, - "response": result["answer"], "sources": result['sources']}}}, - ) - - else: - # create new conversation - # generate summary - messages_summary = [AIMessage(content="Summarise following conversation in no more than 3 " + - "words, respond ONLY with the summary, use the same " + - "language as the system \n\nUser: " + question + "\n\nAI: " + - result["answer"]), - HumanMessage(content="Summarise following conversation in no more than 3 words, " + - "respond ONLY with the summary, use the same language as the " + - "system")] - - # completion = openai.ChatCompletion.create(model='gpt-3.5-turbo', engine=settings.AZURE_DEPLOYMENT_NAME, - # messages=messages_summary, max_tokens=30, temperature=0) - completion = llm.predict_messages(messages_summary) - conversation_id = conversations_collection.insert_one( - {"user": "local", - "date": datetime.datetime.utcnow(), - "name": completion.content, - "queries": [{"prompt": question, "response": result["answer"], "sources": result['sources']}]} - ).inserted_id - - result["conversation_id"] = str(conversation_id) - - # mock result - # result = { - # "answer": "The answer is 42", - # "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"] - # } - return result - except Exception as e: - # print whole traceback - traceback.print_exc() - print(str(e)) - return bad_request(500, str(e)) - # handling CORS @app.after_request diff --git a/application/llm/openai.py b/application/llm/openai.py index 981b8394..23e5fab0 100644 --- a/application/llm/openai.py +++ b/application/llm/openai.py @@ -15,12 +15,18 @@ class OpenAILLM(BaseLLM): openai.api_key = self.api_key return openai - def gen(self, *args, **kwargs): - # This is just a stub. In the real implementation, you'd hit the OpenAI API or any other service. - return "Non-streaming response from OpenAI." + def gen(self, model, engine, messages, stream=False, **kwargs): + response = openai.ChatCompletion.create( + model=model, + engine=engine, + messages=messages, + stream=stream, + **kwargs + ) + + return response["choices"][0]["message"]["content"] def gen_stream(self, model, engine, messages, stream=True, **kwargs): - # openai = self._get_openai() # Get the openai module with the API key set response = openai.ChatCompletion.create( model=model, engine=engine, @@ -32,3 +38,19 @@ class OpenAILLM(BaseLLM): for line in response: if "content" in line["choices"][0]["delta"]: yield line["choices"][0]["delta"]["content"] + + +class AzureOpenAILLM(OpenAILLM): + + def __init__(self, openai_api_key, openai_api_base, openai_api_version, deployment_name): + super().__init__(openai_api_key) + self.api_base = openai_api_base + self.api_version = openai_api_version + self.deployment_name = deployment_name + + def _get_openai(self): + openai = super()._get_openai() + openai.api_base = self.api_base + openai.api_version = self.api_version + openai.api_type = "azure" + return openai