feat: logging

This commit is contained in:
Alex
2024-08-31 12:30:03 +01:00
parent c0886c2785
commit 4f88b6dc71
5 changed files with 55 additions and 12 deletions

View File

@@ -1,7 +1,7 @@
import asyncio import asyncio
import os import os
import sys import sys
from flask import Blueprint, request, Response from flask import Blueprint, request, Response, current_app
import json import json
import datetime import datetime
import logging import logging
@@ -267,6 +267,10 @@ def stream():
else: else:
retriever_name = source["active_docs"] retriever_name = source["active_docs"]
current_app.logger.info(f"/stream - request_data: {data}, source: {source}",
extra={"data": json.dumps({"request_data": data, "source": source})}
)
prompt = get_prompt(prompt_id) prompt = get_prompt(prompt_id)
retriever = RetrieverCreator.create_retriever( retriever = RetrieverCreator.create_retriever(
@@ -301,7 +305,9 @@ def stream():
mimetype="text/event-stream", mimetype="text/event-stream",
) )
except Exception as e: except Exception as e:
print("\033[91merr", str(e), file=sys.stderr) current_app.logger.error(f"/stream - error: {str(e)} - traceback: {traceback.format_exc()}",
extra={"error": str(e), "traceback": traceback.format_exc()}
)
message = e.args[0] message = e.args[0]
status_code = 400 status_code = 400
# # Custom exceptions with two arguments, index 1 as status code # # Custom exceptions with two arguments, index 1 as status code
@@ -345,7 +351,6 @@ def api_answer():
else: else:
token_limit = settings.DEFAULT_MAX_HISTORY token_limit = settings.DEFAULT_MAX_HISTORY
# use try and except to check for exception
try: try:
# check if the vectorstore is set # check if the vectorstore is set
if "api_key" in data: if "api_key" in data:
@@ -365,6 +370,10 @@ def api_answer():
prompt = get_prompt(prompt_id) prompt = get_prompt(prompt_id)
current_app.logger.info(f"/api/answer - request_data: {data}, source: {source}",
extra={"data": json.dumps({"request_data": data, "source": source})}
)
retriever = RetrieverCreator.create_retriever( retriever = RetrieverCreator.create_retriever(
retriever_name, retriever_name,
question=question, question=question,
@@ -399,9 +408,9 @@ def api_answer():
return result return result
except Exception as e: except Exception as e:
# print whole traceback current_app.logger.error(f"/api/answer - error: {str(e)} - traceback: {traceback.format_exc()}",
traceback.print_exc() extra={"error": str(e), "traceback": traceback.format_exc()}
print(str(e)) )
return bad_request(500, str(e)) return bad_request(500, str(e))
@@ -433,6 +442,10 @@ def api_search():
token_limit = data["token_limit"] token_limit = data["token_limit"]
else: else:
token_limit = settings.DEFAULT_MAX_HISTORY token_limit = settings.DEFAULT_MAX_HISTORY
current_app.logger.info(f"/api/answer - request_data: {data}, source: {source}",
extra={"data": json.dumps({"request_data": data, "source": source})}
)
retriever = RetrieverCreator.create_retriever( retriever = RetrieverCreator.create_retriever(
retriever_name, retriever_name,

View File

@@ -6,12 +6,14 @@ from application.core.settings import settings
from application.api.user.routes import user from application.api.user.routes import user
from application.api.answer.routes import answer from application.api.answer.routes import answer
from application.api.internal.routes import internal from application.api.internal.routes import internal
from application.core.logging_config import setup_logging
if platform.system() == "Windows": if platform.system() == "Windows":
import pathlib import pathlib
pathlib.PosixPath = pathlib.WindowsPath pathlib.PosixPath = pathlib.WindowsPath
dotenv.load_dotenv() dotenv.load_dotenv()
setup_logging()
app = Flask(__name__) app = Flask(__name__)
app.register_blueprint(user) app.register_blueprint(user)

View File

@@ -1,9 +1,15 @@
from celery import Celery from celery import Celery
from application.core.settings import settings from application.core.settings import settings
from celery.signals import setup_logging
def make_celery(app_name=__name__): def make_celery(app_name=__name__):
celery = Celery(app_name, broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND) celery = Celery(app_name, broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND)
celery.conf.update(settings) celery.conf.update(settings)
return celery return celery
@setup_logging.connect
def config_loggers(*args, **kwargs):
from application.core.logging_config import setup_logging
setup_logging()
celery = make_celery() celery = make_celery()

View File

@@ -0,0 +1,22 @@
from logging.config import dictConfig
def setup_logging():
dictConfig({
'version': 1,
'formatters': {
'default': {
'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
}
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"stream": "ext://sys.stdout",
"formatter": "default",
}
},
'root': {
'level': 'INFO',
'handlers': ['console'],
},
})

View File

@@ -4,6 +4,7 @@ import string
import zipfile import zipfile
import tiktoken import tiktoken
from urllib.parse import urljoin from urllib.parse import urljoin
import logging
import requests import requests
@@ -14,6 +15,7 @@ from application.parser.open_ai_func import call_openai_api
from application.parser.schema.base import Document from application.parser.schema.base import Document
from application.parser.token_func import group_split from application.parser.token_func import group_split
# Define a function to extract metadata from a given filename. # Define a function to extract metadata from a given filename.
def metadata_from_filename(title): def metadata_from_filename(title):
store = "/".join(title.split("/")[1:3]) store = "/".join(title.split("/")[1:3])
@@ -41,7 +43,7 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
max_depth (int): Maximum allowed depth of recursion to prevent infinite loops. max_depth (int): Maximum allowed depth of recursion to prevent infinite loops.
""" """
if current_depth > max_depth: if current_depth > max_depth:
print(f"Reached maximum recursion depth of {max_depth}") logging.warning(f"Reached maximum recursion depth of {max_depth}")
return return
with zipfile.ZipFile(zip_path, "r") as zip_ref: with zipfile.ZipFile(zip_path, "r") as zip_ref:
@@ -88,16 +90,13 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
max_tokens = 1250 max_tokens = 1250
recursion_depth = 2 recursion_depth = 2
full_path = os.path.join(directory, user, name_job) full_path = os.path.join(directory, user, name_job)
import sys
print(full_path, file=sys.stderr) logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job})
# check if API_URL env variable is set # check if API_URL env variable is set
file_data = {"name": name_job, "file": filename, "user": user} file_data = {"name": name_job, "file": filename, "user": user}
response = requests.get( response = requests.get(
urljoin(settings.API_URL, "/api/download"), params=file_data urljoin(settings.API_URL, "/api/download"), params=file_data
) )
# check if file is in the response
print(response, file=sys.stderr)
file = response.content file = response.content
if not os.path.exists(full_path): if not os.path.exists(full_path):
@@ -137,7 +136,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
if sample: if sample:
for i in range(min(5, len(raw_docs))): for i in range(min(5, len(raw_docs))):
print(raw_docs[i].text) logging.info(f"Sample document {i}: {raw_docs[i]}")
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl # get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
# and send them to the server (provide user and name in form) # and send them to the server (provide user and name in form)
@@ -180,6 +179,7 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
if not os.path.exists(full_path): if not os.path.exists(full_path):
os.makedirs(full_path) os.makedirs(full_path)
self.update_state(state="PROGRESS", meta={"current": 1}) self.update_state(state="PROGRESS", meta={"current": 1})
logging.info(f"Remote job: {full_path}", extra={"user": user, "job": name_job, source_data: source_data})
remote_loader = RemoteCreator.create_loader(loader) remote_loader = RemoteCreator.create_loader(loader)
raw_docs = remote_loader.load_data(source_data) raw_docs = remote_loader.load_data(source_data)