diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py index 893edd3a..a809b4ef 100644 --- a/application/api/answer/routes.py +++ b/application/api/answer/routes.py @@ -1,7 +1,7 @@ import asyncio import os import sys -from flask import Blueprint, request, Response +from flask import Blueprint, request, Response, current_app import json import datetime import logging @@ -267,6 +267,10 @@ def stream(): else: retriever_name = source["active_docs"] + current_app.logger.info(f"/stream - request_data: {data}, source: {source}", + extra={"data": json.dumps({"request_data": data, "source": source})} + ) + prompt = get_prompt(prompt_id) retriever = RetrieverCreator.create_retriever( @@ -301,7 +305,9 @@ def stream(): mimetype="text/event-stream", ) except Exception as e: - print("\033[91merr", str(e), file=sys.stderr) + current_app.logger.error(f"/stream - error: {str(e)} - traceback: {traceback.format_exc()}", + extra={"error": str(e), "traceback": traceback.format_exc()} + ) message = e.args[0] status_code = 400 # # Custom exceptions with two arguments, index 1 as status code @@ -345,7 +351,6 @@ def api_answer(): else: token_limit = settings.DEFAULT_MAX_HISTORY - # use try and except to check for exception try: # check if the vectorstore is set if "api_key" in data: @@ -365,6 +370,10 @@ def api_answer(): prompt = get_prompt(prompt_id) + current_app.logger.info(f"/api/answer - request_data: {data}, source: {source}", + extra={"data": json.dumps({"request_data": data, "source": source})} + ) + retriever = RetrieverCreator.create_retriever( retriever_name, question=question, @@ -399,9 +408,9 @@ def api_answer(): return result except Exception as e: - # print whole traceback - traceback.print_exc() - print(str(e)) + current_app.logger.error(f"/api/answer - error: {str(e)} - traceback: {traceback.format_exc()}", + extra={"error": str(e), "traceback": traceback.format_exc()} + ) return bad_request(500, str(e)) @@ -433,6 +442,10 @@ def api_search(): token_limit = data["token_limit"] else: token_limit = settings.DEFAULT_MAX_HISTORY + + current_app.logger.info(f"/api/answer - request_data: {data}, source: {source}", + extra={"data": json.dumps({"request_data": data, "source": source})} + ) retriever = RetrieverCreator.create_retriever( retriever_name, diff --git a/application/app.py b/application/app.py index fe8efd12..87d9d42f 100644 --- a/application/app.py +++ b/application/app.py @@ -6,12 +6,14 @@ from application.core.settings import settings from application.api.user.routes import user from application.api.answer.routes import answer from application.api.internal.routes import internal +from application.core.logging_config import setup_logging if platform.system() == "Windows": import pathlib pathlib.PosixPath = pathlib.WindowsPath dotenv.load_dotenv() +setup_logging() app = Flask(__name__) app.register_blueprint(user) diff --git a/application/celery_init.py b/application/celery_init.py index c19c2e75..c5838083 100644 --- a/application/celery_init.py +++ b/application/celery_init.py @@ -1,9 +1,15 @@ from celery import Celery from application.core.settings import settings +from celery.signals import setup_logging def make_celery(app_name=__name__): celery = Celery(app_name, broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND) celery.conf.update(settings) return celery +@setup_logging.connect +def config_loggers(*args, **kwargs): + from application.core.logging_config import setup_logging + setup_logging() + celery = make_celery() diff --git a/application/core/logging_config.py b/application/core/logging_config.py new file mode 100644 index 00000000..e693cb91 --- /dev/null +++ b/application/core/logging_config.py @@ -0,0 +1,22 @@ +from logging.config import dictConfig + +def setup_logging(): + dictConfig({ + 'version': 1, + 'formatters': { + 'default': { + 'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s', + } + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "stream": "ext://sys.stdout", + "formatter": "default", + } + }, + 'root': { + 'level': 'INFO', + 'handlers': ['console'], + }, + }) \ No newline at end of file diff --git a/application/worker.py b/application/worker.py index bd1bc15a..3105aabe 100755 --- a/application/worker.py +++ b/application/worker.py @@ -4,6 +4,7 @@ import string import zipfile import tiktoken from urllib.parse import urljoin +import logging import requests @@ -14,6 +15,7 @@ from application.parser.open_ai_func import call_openai_api from application.parser.schema.base import Document from application.parser.token_func import group_split + # Define a function to extract metadata from a given filename. def metadata_from_filename(title): store = "/".join(title.split("/")[1:3]) @@ -41,7 +43,7 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5): max_depth (int): Maximum allowed depth of recursion to prevent infinite loops. """ if current_depth > max_depth: - print(f"Reached maximum recursion depth of {max_depth}") + logging.warning(f"Reached maximum recursion depth of {max_depth}") return with zipfile.ZipFile(zip_path, "r") as zip_ref: @@ -88,16 +90,13 @@ def ingest_worker(self, directory, formats, name_job, filename, user): max_tokens = 1250 recursion_depth = 2 full_path = os.path.join(directory, user, name_job) - import sys - print(full_path, file=sys.stderr) + logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job}) # check if API_URL env variable is set file_data = {"name": name_job, "file": filename, "user": user} response = requests.get( urljoin(settings.API_URL, "/api/download"), params=file_data ) - # check if file is in the response - print(response, file=sys.stderr) file = response.content if not os.path.exists(full_path): @@ -137,7 +136,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user): if sample: for i in range(min(5, len(raw_docs))): - print(raw_docs[i].text) + logging.info(f"Sample document {i}: {raw_docs[i]}") # get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl # and send them to the server (provide user and name in form) @@ -180,6 +179,7 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp"): if not os.path.exists(full_path): os.makedirs(full_path) self.update_state(state="PROGRESS", meta={"current": 1}) + logging.info(f"Remote job: {full_path}", extra={"user": user, "job": name_job, source_data: source_data}) remote_loader = RemoteCreator.create_loader(loader) raw_docs = remote_loader.load_data(source_data)