mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
feat: logging
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from flask import Blueprint, request, Response
|
from flask import Blueprint, request, Response, current_app
|
||||||
import json
|
import json
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
@@ -267,6 +267,10 @@ def stream():
|
|||||||
else:
|
else:
|
||||||
retriever_name = source["active_docs"]
|
retriever_name = source["active_docs"]
|
||||||
|
|
||||||
|
current_app.logger.info(f"/stream - request_data: {data}, source: {source}",
|
||||||
|
extra={"data": json.dumps({"request_data": data, "source": source})}
|
||||||
|
)
|
||||||
|
|
||||||
prompt = get_prompt(prompt_id)
|
prompt = get_prompt(prompt_id)
|
||||||
|
|
||||||
retriever = RetrieverCreator.create_retriever(
|
retriever = RetrieverCreator.create_retriever(
|
||||||
@@ -301,7 +305,9 @@ def stream():
|
|||||||
mimetype="text/event-stream",
|
mimetype="text/event-stream",
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("\033[91merr", str(e), file=sys.stderr)
|
current_app.logger.error(f"/stream - error: {str(e)} - traceback: {traceback.format_exc()}",
|
||||||
|
extra={"error": str(e), "traceback": traceback.format_exc()}
|
||||||
|
)
|
||||||
message = e.args[0]
|
message = e.args[0]
|
||||||
status_code = 400
|
status_code = 400
|
||||||
# # Custom exceptions with two arguments, index 1 as status code
|
# # Custom exceptions with two arguments, index 1 as status code
|
||||||
@@ -345,7 +351,6 @@ def api_answer():
|
|||||||
else:
|
else:
|
||||||
token_limit = settings.DEFAULT_MAX_HISTORY
|
token_limit = settings.DEFAULT_MAX_HISTORY
|
||||||
|
|
||||||
# use try and except to check for exception
|
|
||||||
try:
|
try:
|
||||||
# check if the vectorstore is set
|
# check if the vectorstore is set
|
||||||
if "api_key" in data:
|
if "api_key" in data:
|
||||||
@@ -365,6 +370,10 @@ def api_answer():
|
|||||||
|
|
||||||
prompt = get_prompt(prompt_id)
|
prompt = get_prompt(prompt_id)
|
||||||
|
|
||||||
|
current_app.logger.info(f"/api/answer - request_data: {data}, source: {source}",
|
||||||
|
extra={"data": json.dumps({"request_data": data, "source": source})}
|
||||||
|
)
|
||||||
|
|
||||||
retriever = RetrieverCreator.create_retriever(
|
retriever = RetrieverCreator.create_retriever(
|
||||||
retriever_name,
|
retriever_name,
|
||||||
question=question,
|
question=question,
|
||||||
@@ -399,9 +408,9 @@ def api_answer():
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# print whole traceback
|
current_app.logger.error(f"/api/answer - error: {str(e)} - traceback: {traceback.format_exc()}",
|
||||||
traceback.print_exc()
|
extra={"error": str(e), "traceback": traceback.format_exc()}
|
||||||
print(str(e))
|
)
|
||||||
return bad_request(500, str(e))
|
return bad_request(500, str(e))
|
||||||
|
|
||||||
|
|
||||||
@@ -433,6 +442,10 @@ def api_search():
|
|||||||
token_limit = data["token_limit"]
|
token_limit = data["token_limit"]
|
||||||
else:
|
else:
|
||||||
token_limit = settings.DEFAULT_MAX_HISTORY
|
token_limit = settings.DEFAULT_MAX_HISTORY
|
||||||
|
|
||||||
|
current_app.logger.info(f"/api/answer - request_data: {data}, source: {source}",
|
||||||
|
extra={"data": json.dumps({"request_data": data, "source": source})}
|
||||||
|
)
|
||||||
|
|
||||||
retriever = RetrieverCreator.create_retriever(
|
retriever = RetrieverCreator.create_retriever(
|
||||||
retriever_name,
|
retriever_name,
|
||||||
|
|||||||
@@ -6,12 +6,14 @@ from application.core.settings import settings
|
|||||||
from application.api.user.routes import user
|
from application.api.user.routes import user
|
||||||
from application.api.answer.routes import answer
|
from application.api.answer.routes import answer
|
||||||
from application.api.internal.routes import internal
|
from application.api.internal.routes import internal
|
||||||
|
from application.core.logging_config import setup_logging
|
||||||
|
|
||||||
if platform.system() == "Windows":
|
if platform.system() == "Windows":
|
||||||
import pathlib
|
import pathlib
|
||||||
pathlib.PosixPath = pathlib.WindowsPath
|
pathlib.PosixPath = pathlib.WindowsPath
|
||||||
|
|
||||||
dotenv.load_dotenv()
|
dotenv.load_dotenv()
|
||||||
|
setup_logging()
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
app.register_blueprint(user)
|
app.register_blueprint(user)
|
||||||
|
|||||||
@@ -1,9 +1,15 @@
|
|||||||
from celery import Celery
|
from celery import Celery
|
||||||
from application.core.settings import settings
|
from application.core.settings import settings
|
||||||
|
from celery.signals import setup_logging
|
||||||
|
|
||||||
def make_celery(app_name=__name__):
|
def make_celery(app_name=__name__):
|
||||||
celery = Celery(app_name, broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND)
|
celery = Celery(app_name, broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND)
|
||||||
celery.conf.update(settings)
|
celery.conf.update(settings)
|
||||||
return celery
|
return celery
|
||||||
|
|
||||||
|
@setup_logging.connect
|
||||||
|
def config_loggers(*args, **kwargs):
|
||||||
|
from application.core.logging_config import setup_logging
|
||||||
|
setup_logging()
|
||||||
|
|
||||||
celery = make_celery()
|
celery = make_celery()
|
||||||
|
|||||||
22
application/core/logging_config.py
Normal file
22
application/core/logging_config.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
from logging.config import dictConfig
|
||||||
|
|
||||||
|
def setup_logging():
|
||||||
|
dictConfig({
|
||||||
|
'version': 1,
|
||||||
|
'formatters': {
|
||||||
|
'default': {
|
||||||
|
'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"handlers": {
|
||||||
|
"console": {
|
||||||
|
"class": "logging.StreamHandler",
|
||||||
|
"stream": "ext://sys.stdout",
|
||||||
|
"formatter": "default",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'root': {
|
||||||
|
'level': 'INFO',
|
||||||
|
'handlers': ['console'],
|
||||||
|
},
|
||||||
|
})
|
||||||
@@ -4,6 +4,7 @@ import string
|
|||||||
import zipfile
|
import zipfile
|
||||||
import tiktoken
|
import tiktoken
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
import logging
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
@@ -14,6 +15,7 @@ from application.parser.open_ai_func import call_openai_api
|
|||||||
from application.parser.schema.base import Document
|
from application.parser.schema.base import Document
|
||||||
from application.parser.token_func import group_split
|
from application.parser.token_func import group_split
|
||||||
|
|
||||||
|
|
||||||
# Define a function to extract metadata from a given filename.
|
# Define a function to extract metadata from a given filename.
|
||||||
def metadata_from_filename(title):
|
def metadata_from_filename(title):
|
||||||
store = "/".join(title.split("/")[1:3])
|
store = "/".join(title.split("/")[1:3])
|
||||||
@@ -41,7 +43,7 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
|
|||||||
max_depth (int): Maximum allowed depth of recursion to prevent infinite loops.
|
max_depth (int): Maximum allowed depth of recursion to prevent infinite loops.
|
||||||
"""
|
"""
|
||||||
if current_depth > max_depth:
|
if current_depth > max_depth:
|
||||||
print(f"Reached maximum recursion depth of {max_depth}")
|
logging.warning(f"Reached maximum recursion depth of {max_depth}")
|
||||||
return
|
return
|
||||||
|
|
||||||
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||||
@@ -88,16 +90,13 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
|||||||
max_tokens = 1250
|
max_tokens = 1250
|
||||||
recursion_depth = 2
|
recursion_depth = 2
|
||||||
full_path = os.path.join(directory, user, name_job)
|
full_path = os.path.join(directory, user, name_job)
|
||||||
import sys
|
|
||||||
|
|
||||||
print(full_path, file=sys.stderr)
|
logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job})
|
||||||
# check if API_URL env variable is set
|
# check if API_URL env variable is set
|
||||||
file_data = {"name": name_job, "file": filename, "user": user}
|
file_data = {"name": name_job, "file": filename, "user": user}
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
urljoin(settings.API_URL, "/api/download"), params=file_data
|
urljoin(settings.API_URL, "/api/download"), params=file_data
|
||||||
)
|
)
|
||||||
# check if file is in the response
|
|
||||||
print(response, file=sys.stderr)
|
|
||||||
file = response.content
|
file = response.content
|
||||||
|
|
||||||
if not os.path.exists(full_path):
|
if not os.path.exists(full_path):
|
||||||
@@ -137,7 +136,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
|||||||
|
|
||||||
if sample:
|
if sample:
|
||||||
for i in range(min(5, len(raw_docs))):
|
for i in range(min(5, len(raw_docs))):
|
||||||
print(raw_docs[i].text)
|
logging.info(f"Sample document {i}: {raw_docs[i]}")
|
||||||
|
|
||||||
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
|
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
|
||||||
# and send them to the server (provide user and name in form)
|
# and send them to the server (provide user and name in form)
|
||||||
@@ -180,6 +179,7 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
|
|||||||
if not os.path.exists(full_path):
|
if not os.path.exists(full_path):
|
||||||
os.makedirs(full_path)
|
os.makedirs(full_path)
|
||||||
self.update_state(state="PROGRESS", meta={"current": 1})
|
self.update_state(state="PROGRESS", meta={"current": 1})
|
||||||
|
logging.info(f"Remote job: {full_path}", extra={"user": user, "job": name_job, source_data: source_data})
|
||||||
|
|
||||||
remote_loader = RemoteCreator.create_loader(loader)
|
remote_loader = RemoteCreator.create_loader(loader)
|
||||||
raw_docs = remote_loader.load_data(source_data)
|
raw_docs = remote_loader.load_data(source_data)
|
||||||
|
|||||||
Reference in New Issue
Block a user