feat: logging

2025-11-29 08:33:20 +00:00 · 2024-08-31 12:30:03 +01:00
parent c0886c2785
commit 4f88b6dc71
5 changed files with 55 additions and 12 deletions
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -1,7 +1,7 @@
 import asyncio
 import os
 import sys
-from flask import Blueprint, request, Response
+from flask import Blueprint, request, Response, current_app
 import json
 import datetime
 import logging
@@ -267,6 +267,10 @@ def stream():
        else:
            retriever_name = source["active_docs"]
        current_app.logger.info(f"/stream - request_data: {data}, source: {source}",
            extra={"data": json.dumps({"request_data": data, "source": source})}
        )
        prompt = get_prompt(prompt_id)
        retriever = RetrieverCreator.create_retriever(
@@ -301,7 +305,9 @@ def stream():
            mimetype="text/event-stream",
        )
    except Exception as e:
-        print("\033[91merr", str(e), file=sys.stderr)
+        current_app.logger.error(f"/stream - error: {str(e)} - traceback: {traceback.format_exc()}",
          extra={"error": str(e), "traceback": traceback.format_exc()}
        )
        message = e.args[0]
        status_code = 400
        # # Custom exceptions with two arguments, index 1 as status code
@@ -345,7 +351,6 @@ def api_answer():
    else:
        token_limit = settings.DEFAULT_MAX_HISTORY
    # use try and except  to check for exception
    try:
        # check if the vectorstore is set
        if "api_key" in data:
@@ -365,6 +370,10 @@ def api_answer():
        prompt = get_prompt(prompt_id)
        current_app.logger.info(f"/api/answer - request_data: {data}, source: {source}",
            extra={"data": json.dumps({"request_data": data, "source": source})}
        )
        retriever = RetrieverCreator.create_retriever(
            retriever_name,
            question=question,
@@ -399,9 +408,9 @@ def api_answer():
        return result
    except Exception as e:
-        # print whole traceback
+        current_app.logger.error(f"/api/answer - error: {str(e)} - traceback: {traceback.format_exc()}",
-        traceback.print_exc()
+          extra={"error": str(e), "traceback": traceback.format_exc()}
-        print(str(e))
+        )
        return bad_request(500, str(e))
@@ -433,6 +442,10 @@ def api_search():
        token_limit = data["token_limit"]
    else:
        token_limit = settings.DEFAULT_MAX_HISTORY
    current_app.logger.info(f"/api/answer - request_data: {data}, source: {source}",
            extra={"data": json.dumps({"request_data": data, "source": source})}
    )
    retriever = RetrieverCreator.create_retriever(
        retriever_name,
--- a/application/app.py
+++ b/application/app.py
@@ -6,12 +6,14 @@ from application.core.settings import settings
 from application.api.user.routes import user
 from application.api.answer.routes import answer
 from application.api.internal.routes import internal
 from application.core.logging_config import setup_logging
 if platform.system() == "Windows":
    import pathlib
    pathlib.PosixPath = pathlib.WindowsPath
 dotenv.load_dotenv()
 setup_logging()
 app = Flask(__name__)
 app.register_blueprint(user)
--- a/application/celery_init.py
+++ b/application/celery_init.py
@@ -1,9 +1,15 @@
 from celery import Celery
 from application.core.settings import settings
 from celery.signals import setup_logging
 def make_celery(app_name=__name__):
    celery = Celery(app_name, broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND)
    celery.conf.update(settings)
    return celery
@setup_logging.connect
 def config_loggers(*args, **kwargs):
    from application.core.logging_config import setup_logging
    setup_logging()
 celery = make_celery()
--- a/application/core/logging_config.py
+++ b/application/core/logging_config.py
@@ -0,0 +1,22 @@
 from logging.config import dictConfig
 def setup_logging():
    dictConfig({
        'version': 1,
        'formatters': {
            'default': {
                'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
            }
        },
        "handlers": {
            "console": {
                "class": "logging.StreamHandler",
                "stream": "ext://sys.stdout",
                "formatter": "default",
            }
        },
        'root': {
            'level': 'INFO',
            'handlers': ['console'],
        },
    })
--- a/application/worker.py
+++ b/application/worker.py
@@ -4,6 +4,7 @@ import string
 import zipfile
 import tiktoken
 from urllib.parse import urljoin
 import logging
 import requests
@@ -14,6 +15,7 @@ from application.parser.open_ai_func import call_openai_api
 from application.parser.schema.base import Document
 from application.parser.token_func import group_split
 # Define a function to extract metadata from a given filename.
 def metadata_from_filename(title):
    store = "/".join(title.split("/")[1:3])
@@ -41,7 +43,7 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
        max_depth (int): Maximum allowed depth of recursion to prevent infinite loops.
    """
    if current_depth > max_depth:
-        print(f"Reached maximum recursion depth of {max_depth}")
+        logging.warning(f"Reached maximum recursion depth of {max_depth}")
        return
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
@@ -88,16 +90,13 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
    max_tokens = 1250
    recursion_depth = 2
    full_path = os.path.join(directory, user, name_job)
    import sys
-    print(full_path, file=sys.stderr)
+    logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job})
    # check if API_URL env variable is set
    file_data = {"name": name_job, "file": filename, "user": user}
    response = requests.get(
        urljoin(settings.API_URL, "/api/download"), params=file_data
    )
    # check if file is in the response
    print(response, file=sys.stderr)
    file = response.content
    if not os.path.exists(full_path):
@@ -137,7 +136,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
    if sample:
        for i in range(min(5, len(raw_docs))):
-            print(raw_docs[i].text)
+            logging.info(f"Sample document {i}: {raw_docs[i]}")
    # get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
    # and send them to the server (provide user and name in form)
@@ -180,6 +179,7 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
    if not os.path.exists(full_path):
        os.makedirs(full_path)
    self.update_state(state="PROGRESS", meta={"current": 1})
    logging.info(f"Remote job: {full_path}", extra={"user": user, "job": name_job, source_data: source_data})
    remote_loader = RemoteCreator.create_loader(loader)
    raw_docs = remote_loader.load_data(source_data)