Fixed request length bug, changed to as less used port

This commit is contained in:
Idan
2023-06-23 14:56:14 +03:00
parent 92373b25a9
commit 2404899e28
13 changed files with 92 additions and 50 deletions

View File

@@ -3,7 +3,7 @@ EMBEDDINGS_KEY=your_api_key
CELERY_BROKER_URL=redis://localhost:6379/0
CELERY_RESULT_BACKEND=redis://localhost:6379/1
MONGO_URI=mongodb://localhost:27017/docsgpt
API_URL=http://localhost:5001
API_URL=http://localhost:7091
#For OPENAI on Azure
OPENAI_API_BASE=

View File

@@ -18,6 +18,6 @@ COPY . /app
ENV FLASK_APP=app.py
ENV FLASK_DEBUG=true
EXPOSE 5001
EXPOSE 7091
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:5001", "wsgi:app"]
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "wsgi:app"]

View File

@@ -43,6 +43,7 @@ from worker import ingest_worker
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
logger = logging.getLogger(__name__)
gpt_model = 'gpt-3.5-turbo' # gpt-4
if settings.LLM_NAME == "manifest":
from manifest import Manifest
@@ -195,7 +196,7 @@ def complete_stream(question, docsearch, chat_history, api_key):
messages_combine.append({"role": "user", "content": i["prompt"]})
messages_combine.append({"role": "system", "content": i["response"]})
messages_combine.append({"role": "user", "content": question})
completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", engine=settings.AZURE_DEPLOYMENT_NAME,
completion = openai.ChatCompletion.create(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
messages=messages_combine, stream=True, max_tokens=500, temperature=0)
for line in completion:
@@ -208,26 +209,27 @@ def complete_stream(question, docsearch, chat_history, api_key):
yield f"data: {data}\n\n"
@app.route("/stream", methods=["POST", "GET"])
@app.route("/stream", methods=["POST"])
def stream():
data = request.get_json()
# get parameter from url question
question = request.args.get("question")
history = request.args.get("history")
question = data["question"]
history = data["history"]
# history to json object from string
history = json.loads(history)
# check if active_docs is set
if not api_key_set:
api_key = request.args.get("api_key")
api_key = data["api_key"]
else:
api_key = settings.API_KEY
if not embeddings_key_set:
embeddings_key = request.args.get("embeddings_key")
embeddings_key = data["embeddings_key"]
else:
embeddings_key = settings.EMBEDDINGS_KEY
if "active_docs" in request.args:
vectorstore = get_vectorstore({"active_docs": request.args.get("active_docs")})
if "active_docs" in data:
vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
else:
vectorstore = ""
docsearch = get_docsearch(vectorstore, embeddings_key)
@@ -279,7 +281,7 @@ def api_answer():
)
else:
logger.debug("plain OpenAI")
llm = ChatOpenAI(openai_api_key=api_key) # optional parameter: model_name="gpt-4"
llm = ChatOpenAI(openai_api_key=api_key, model_name=gpt_model) # optional parameter: model_name="gpt-4"
messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)]
if history:
tokens_current_history = 0
@@ -597,4 +599,4 @@ def after_request(response):
if __name__ == "__main__":
app.run(debug=True, port=5001)
app.run(debug=True, port=7091)

View File

@@ -12,7 +12,7 @@ class Settings(BaseSettings):
MODEL_PATH: str = "./models/gpt4all-model.bin"
TOKENS_MAX_HISTORY: int = 150
API_URL: str = "http://localhost:5001" # backend url for celery worker
API_URL: str = "http://localhost:7091" # backend url for celery worker
API_KEY: str = None # LLM api key
EMBEDDINGS_KEY: str = None # api key for embeddings (if using openai, just copy API_KEY

View File

@@ -1,4 +1,4 @@
from app import app
if __name__ == "__main__":
app.run(debug=True, port=5001)
app.run(debug=True, port=7091)