Merge branch 'main' into feat/agent-menu

This commit is contained in:
Siddhant Rai
2025-04-16 18:35:38 +05:30
21 changed files with 1568 additions and 231 deletions

View File

@@ -48,10 +48,13 @@ class ClassicAgent(BaseAgent):
):
yield {"answer": resp.message.content}
else:
completion = self.llm.gen_stream(
model=self.gpt_model, messages=messages, tools=self.tools
)
for line in completion:
# completion = self.llm.gen_stream(
# model=self.gpt_model, messages=messages, tools=self.tools
# )
# log type of resp
logger.info(f"Response type: {type(resp)}")
logger.info(f"Response: {resp}")
for line in resp:
if isinstance(line, str):
yield {"answer": line}

View File

@@ -15,7 +15,7 @@ class LLMHandler(ABC):
@abstractmethod
def handle_response(self, agent, resp, tools_dict, messages, attachments=None, **kwargs):
pass
def prepare_messages_with_attachments(self, agent, messages, attachments=None):
"""
Prepare messages with attachment content if available.
@@ -33,15 +33,53 @@ class LLMHandler(ABC):
logger.info(f"Preparing messages with {len(attachments)} attachments")
# Check if the LLM has its own custom attachment handling implementation
if hasattr(agent.llm, "prepare_messages_with_attachments") and agent.llm.__class__.__name__ != "BaseLLM":
logger.info(f"Using {agent.llm.__class__.__name__}'s own prepare_messages_with_attachments method")
return agent.llm.prepare_messages_with_attachments(messages, attachments)
supported_types = agent.llm.get_supported_attachment_types()
# Otherwise, append attachment content to the system prompt
supported_attachments = []
unsupported_attachments = []
for attachment in attachments:
mime_type = attachment.get('mime_type')
if not mime_type:
import mimetypes
file_path = attachment.get('path')
if file_path:
mime_type = mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
else:
unsupported_attachments.append(attachment)
continue
if mime_type in supported_types:
supported_attachments.append(attachment)
else:
unsupported_attachments.append(attachment)
# Process supported attachments with the LLM's custom method
prepared_messages = messages
if supported_attachments:
logger.info(f"Processing {len(supported_attachments)} supported attachments with {agent.llm.__class__.__name__}'s method")
prepared_messages = agent.llm.prepare_messages_with_attachments(messages, supported_attachments)
# Process unsupported attachments with the default method
if unsupported_attachments:
logger.info(f"Processing {len(unsupported_attachments)} unsupported attachments with default method")
prepared_messages = self._append_attachment_content_to_system(prepared_messages, unsupported_attachments)
return prepared_messages
def _append_attachment_content_to_system(self, messages, attachments):
"""
Default method to append attachment content to the system prompt.
Args:
messages (list): List of message dictionaries.
attachments (list): List of attachment dictionaries with content.
Returns:
list: Messages with attachment context added to the system prompt.
"""
prepared_messages = messages.copy()
# Build attachment content string
attachment_texts = []
for attachment in attachments:
logger.info(f"Adding attachment {attachment.get('id')} to context")
@@ -122,12 +160,13 @@ class OpenAILLMHandler(LLMHandler):
return resp
else:
text_buffer = ""
while True:
tool_calls = {}
for chunk in resp:
if isinstance(chunk, str) and len(chunk) > 0:
return
yield chunk
continue
elif hasattr(chunk, "delta"):
chunk_delta = chunk.delta
@@ -206,12 +245,17 @@ class OpenAILLMHandler(LLMHandler):
}
)
tool_calls = {}
if hasattr(chunk_delta, "content") and chunk_delta.content:
# Add to buffer or yield immediately based on your preference
text_buffer += chunk_delta.content
yield text_buffer
text_buffer = ""
if (
hasattr(chunk, "finish_reason")
and chunk.finish_reason == "stop"
):
return
return resp
elif isinstance(chunk, str) and len(chunk) == 0:
continue
@@ -227,7 +271,7 @@ class GoogleLLMHandler(LLMHandler):
from google.genai import types
messages = self.prepare_messages_with_attachments(agent, messages, attachments)
while True:
if not stream:
response = agent.llm.gen(
@@ -298,6 +342,9 @@ class GoogleLLMHandler(LLMHandler):
"content": [function_response_part.to_json_dict()],
}
)
else:
tool_call_found = False
yield result
if not tool_call_found:
return response

View File

@@ -657,6 +657,7 @@ class Answer(Resource):
source_log_docs = []
tool_calls = []
stream_ended = False
thought = ""
for line in complete_stream(
question=question,
@@ -679,6 +680,8 @@ class Answer(Resource):
source_log_docs = event["source"]
elif event["type"] == "tool_calls":
tool_calls = event["tool_calls"]
elif event["type"] == "thought":
thought = event["thought"]
elif event["type"] == "error":
logger.error(f"Error from stream: {event['error']}")
return bad_request(500, event["error"])
@@ -710,6 +713,7 @@ class Answer(Resource):
conversation_id,
question,
response_full,
thought,
source_log_docs,
tool_calls,
llm,
@@ -876,14 +880,7 @@ def get_attachments_content(attachment_ids, user):
)
if attachment_doc:
attachments.append(
{
"id": str(attachment_doc["_id"]),
"content": attachment_doc["content"],
"token_count": attachment_doc.get("token_count", 0),
"path": attachment_doc.get("path", ""),
}
)
attachments.append(attachment_doc)
except Exception as e:
logger.error(f"Error retrieving attachment {attachment_id}: {e}")

View File

@@ -2792,25 +2792,25 @@ class StoreAttachment(Resource):
user = secure_filename(decoded_token.get("sub"))
try:
attachment_id = ObjectId()
original_filename = secure_filename(file.filename)
folder_name = original_filename
save_dir = os.path.join(
current_dir, settings.UPLOAD_FOLDER, user, "attachments", folder_name
current_dir,
settings.UPLOAD_FOLDER,
user,
"attachments",
str(attachment_id),
)
os.makedirs(save_dir, exist_ok=True)
# Create directory structure: user/attachments/filename/
file_path = os.path.join(save_dir, original_filename)
# Handle filename conflicts
if os.path.exists(file_path):
name_parts = os.path.splitext(original_filename)
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
new_filename = f"{name_parts[0]}_{timestamp}{name_parts[1]}"
file_path = os.path.join(save_dir, new_filename)
original_filename = new_filename
file.save(file_path)
file_info = {"folder": folder_name, "filename": original_filename}
file_info = {
"filename": original_filename,
"attachment_id": str(attachment_id),
}
current_app.logger.info(f"Saved file: {file_path}")
# Start async task to process single file

View File

@@ -55,3 +55,12 @@ class BaseLLM(ABC):
def _supports_tools(self):
raise NotImplementedError("Subclass must implement _supports_tools method")
def get_supported_attachment_types(self):
"""
Return a list of MIME types supported by this LLM for file uploads.
Returns:
list: List of supported MIME types
"""
return [] # Default: no attachments supported

View File

@@ -1,5 +1,9 @@
from google import genai
from google.genai import types
import os
import logging
import mimetypes
import json
from application.llm.base import BaseLLM
@@ -9,6 +13,138 @@ class GoogleLLM(BaseLLM):
super().__init__(*args, **kwargs)
self.api_key = api_key
self.user_api_key = user_api_key
self.client = genai.Client(api_key=self.api_key)
def get_supported_attachment_types(self):
"""
Return a list of MIME types supported by Google Gemini for file uploads.
Returns:
list: List of supported MIME types
"""
return [
'application/pdf',
'image/png',
'image/jpeg',
'image/jpg',
'image/webp',
'image/gif'
]
def prepare_messages_with_attachments(self, messages, attachments=None):
"""
Process attachments using Google AI's file API for more efficient handling.
Args:
messages (list): List of message dictionaries.
attachments (list): List of attachment dictionaries with content and metadata.
Returns:
list: Messages formatted with file references for Google AI API.
"""
if not attachments:
return messages
prepared_messages = messages.copy()
# Find the user message to attach files to the last one
user_message_index = None
for i in range(len(prepared_messages) - 1, -1, -1):
if prepared_messages[i].get("role") == "user":
user_message_index = i
break
if user_message_index is None:
user_message = {"role": "user", "content": []}
prepared_messages.append(user_message)
user_message_index = len(prepared_messages) - 1
if isinstance(prepared_messages[user_message_index].get("content"), str):
text_content = prepared_messages[user_message_index]["content"]
prepared_messages[user_message_index]["content"] = [
{"type": "text", "text": text_content}
]
elif not isinstance(prepared_messages[user_message_index].get("content"), list):
prepared_messages[user_message_index]["content"] = []
files = []
for attachment in attachments:
mime_type = attachment.get('mime_type')
if not mime_type:
file_path = attachment.get('path')
if file_path:
mime_type = mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
if mime_type in self.get_supported_attachment_types():
try:
file_uri = self._upload_file_to_google(attachment)
logging.info(f"GoogleLLM: Successfully uploaded file, got URI: {file_uri}")
files.append({"file_uri": file_uri, "mime_type": mime_type})
except Exception as e:
logging.error(f"GoogleLLM: Error uploading file: {e}")
if 'content' in attachment:
prepared_messages[user_message_index]["content"].append({
"type": "text",
"text": f"[File could not be processed: {attachment.get('path', 'unknown')}]"
})
if files:
logging.info(f"GoogleLLM: Adding {len(files)} files to message")
prepared_messages[user_message_index]["content"].append({
"files": files
})
return prepared_messages
def _upload_file_to_google(self, attachment):
"""
Upload a file to Google AI and return the file URI.
Args:
attachment (dict): Attachment dictionary with path and metadata.
Returns:
str: Google AI file URI for the uploaded file.
"""
if 'google_file_uri' in attachment:
return attachment['google_file_uri']
file_path = attachment.get('path')
if not file_path:
raise ValueError("No file path provided in attachment")
if not os.path.isabs(file_path):
current_dir = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
file_path = os.path.join(current_dir, "application", file_path)
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
mime_type = attachment.get('mime_type')
if not mime_type:
mime_type = mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
try:
response = self.client.files.upload(file=file_path)
file_uri = response.uri
from application.core.mongo_db import MongoDB
mongo = MongoDB.get_client()
db = mongo["docsgpt"]
attachments_collection = db["attachments"]
if '_id' in attachment:
attachments_collection.update_one(
{"_id": attachment['_id']},
{"$set": {"google_file_uri": file_uri}}
)
return file_uri
except Exception as e:
logging.error(f"Error uploading file to Google AI: {e}")
raise
def _clean_messages_google(self, messages):
cleaned_messages = []
@@ -26,7 +162,7 @@ class GoogleLLM(BaseLLM):
elif isinstance(content, list):
for item in content:
if "text" in item:
parts.append(types.Part.from_text(item["text"]))
parts.append(types.Part.from_text(text=item["text"]))
elif "function_call" in item:
parts.append(
types.Part.from_function_call(
@@ -41,6 +177,14 @@ class GoogleLLM(BaseLLM):
response=item["function_response"]["response"],
)
)
elif "files" in item:
for file_data in item["files"]:
parts.append(
types.Part.from_uri(
file_uri=file_data["file_uri"],
mime_type=file_data["mime_type"]
)
)
else:
raise ValueError(
f"Unexpected content dictionary format:{item}"
@@ -145,12 +289,26 @@ class GoogleLLM(BaseLLM):
if tools:
cleaned_tools = self._clean_tools_format(tools)
config.tools = cleaned_tools
# Check if we have both tools and file attachments
has_attachments = False
for message in messages:
for part in message.parts:
if hasattr(part, 'file_data') and part.file_data is not None:
has_attachments = True
break
if has_attachments:
break
logging.info(f"GoogleLLM: Starting stream generation. Model: {model}, Messages: {json.dumps(messages, default=str)}, Has attachments: {has_attachments}")
response = client.models.generate_content_stream(
model=model,
contents=messages,
config=config,
)
for chunk in response:
if hasattr(chunk, "candidates") and chunk.candidates:
for candidate in chunk.candidates:

View File

@@ -1,4 +1,8 @@
import json
import base64
import os
import mimetypes
import logging
from application.core.settings import settings
from application.llm.base import BaseLLM
@@ -65,6 +69,15 @@ class OpenAILLM(BaseLLM):
),
}
)
elif isinstance(item, dict):
content_parts = []
if "text" in item:
content_parts.append({"type": "text", "text": item["text"]})
elif "type" in item and item["type"] == "text" and "text" in item:
content_parts.append(item)
elif "type" in item and item["type"] == "file" and "file" in item:
content_parts.append(item)
cleaned_messages.append({"role": role, "content": content_parts})
else:
raise ValueError(
f"Unexpected content dictionary format: {item}"
@@ -133,6 +146,183 @@ class OpenAILLM(BaseLLM):
def _supports_tools(self):
return True
def get_supported_attachment_types(self):
"""
Return a list of MIME types supported by OpenAI for file uploads.
Returns:
list: List of supported MIME types
"""
return [
'application/pdf',
'image/png',
'image/jpeg',
'image/jpg',
'image/webp',
'image/gif'
]
def prepare_messages_with_attachments(self, messages, attachments=None):
"""
Process attachments using OpenAI's file API for more efficient handling.
Args:
messages (list): List of message dictionaries.
attachments (list): List of attachment dictionaries with content and metadata.
Returns:
list: Messages formatted with file references for OpenAI API.
"""
if not attachments:
return messages
prepared_messages = messages.copy()
# Find the user message to attach file_id to the last one
user_message_index = None
for i in range(len(prepared_messages) - 1, -1, -1):
if prepared_messages[i].get("role") == "user":
user_message_index = i
break
if user_message_index is None:
user_message = {"role": "user", "content": []}
prepared_messages.append(user_message)
user_message_index = len(prepared_messages) - 1
if isinstance(prepared_messages[user_message_index].get("content"), str):
text_content = prepared_messages[user_message_index]["content"]
prepared_messages[user_message_index]["content"] = [
{"type": "text", "text": text_content}
]
elif not isinstance(prepared_messages[user_message_index].get("content"), list):
prepared_messages[user_message_index]["content"] = []
for attachment in attachments:
mime_type = attachment.get('mime_type')
if not mime_type:
file_path = attachment.get('path')
if file_path:
mime_type = mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
if mime_type and mime_type.startswith('image/'):
try:
base64_image = self._get_base64_image(attachment)
prepared_messages[user_message_index]["content"].append({
"type": "image_url",
"image_url": {
"url": f"data:{mime_type};base64,{base64_image}"
}
})
except Exception as e:
logging.error(f"Error processing image attachment: {e}")
if 'content' in attachment:
prepared_messages[user_message_index]["content"].append({
"type": "text",
"text": f"[Image could not be processed: {attachment.get('path', 'unknown')}]"
})
# Handle PDFs using the file API
elif mime_type == 'application/pdf':
try:
file_id = self._upload_file_to_openai(attachment)
prepared_messages[user_message_index]["content"].append({
"type": "file",
"file": {"file_id": file_id}
})
except Exception as e:
logging.error(f"Error uploading PDF to OpenAI: {e}")
if 'content' in attachment:
prepared_messages[user_message_index]["content"].append({
"type": "text",
"text": f"File content:\n\n{attachment['content']}"
})
return prepared_messages
def _get_base64_image(self, attachment):
"""
Convert an image file to base64 encoding.
Args:
attachment (dict): Attachment dictionary with path and metadata.
Returns:
str: Base64-encoded image data.
"""
file_path = attachment.get('path')
if not file_path:
raise ValueError("No file path provided in attachment")
if not os.path.isabs(file_path):
current_dir = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
file_path = os.path.join(current_dir, "application", file_path)
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
with open(file_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def _upload_file_to_openai(self, attachment): ##pdfs
"""
Upload a file to OpenAI and return the file_id.
Args:
attachment (dict): Attachment dictionary with path and metadata.
Expected keys:
- path: Path to the file
- id: Optional MongoDB ID for caching
Returns:
str: OpenAI file_id for the uploaded file.
"""
import os
import logging
if 'openai_file_id' in attachment:
return attachment['openai_file_id']
file_path = attachment.get('path')
if not file_path:
raise ValueError("No file path provided in attachment")
if not os.path.isabs(file_path):
current_dir = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
file_path = os.path.join(current_dir,"application", file_path)
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
try:
with open(file_path, 'rb') as file:
response = self.client.files.create(
file=file,
purpose="assistants"
)
file_id = response.id
from application.core.mongo_db import MongoDB
mongo = MongoDB.get_client()
db = mongo["docsgpt"]
attachments_collection = db["attachments"]
if '_id' in attachment:
attachments_collection.update_one(
{"_id": attachment['_id']},
{"$set": {"openai_file_id": file_id}}
)
return file_id
except Exception as e:
logging.error(f"Error uploading file to OpenAI: {e}")
raise
class AzureOpenAILLM(OpenAILLM):

View File

@@ -73,7 +73,13 @@ class PandasCSVParser(BaseParser):
for more information.
Set to empty dict by default, this means pandas will try to figure
out the separators, table head, etc. on its own.
header_period (int): Controls how headers are included in output:
- 0: Headers only at the beginning
- 1: Headers in every row
- N > 1: Headers every N rows
header_prefix (str): Prefix for header rows. Default is "HEADERS: ".
"""
def __init__(
@@ -83,6 +89,8 @@ class PandasCSVParser(BaseParser):
col_joiner: str = ", ",
row_joiner: str = "\n",
pandas_config: dict = {},
header_period: int = 20,
header_prefix: str = "HEADERS: ",
**kwargs: Any
) -> None:
"""Init params."""
@@ -91,6 +99,8 @@ class PandasCSVParser(BaseParser):
self._col_joiner = col_joiner
self._row_joiner = row_joiner
self._pandas_config = pandas_config
self._header_period = header_period
self._header_prefix = header_prefix
def _init_parser(self) -> Dict:
"""Init parser."""
@@ -104,15 +114,26 @@ class PandasCSVParser(BaseParser):
raise ValueError("pandas module is required to read CSV files.")
df = pd.read_csv(file, **self._pandas_config)
headers = df.columns.tolist()
header_row = f"{self._header_prefix}{self._col_joiner.join(headers)}"
text_list = df.apply(
lambda row: (self._col_joiner).join(row.astype(str).tolist()), axis=1
).tolist()
if not self._concat_rows:
return df.apply(
lambda row: (self._col_joiner).join(row.astype(str).tolist()), axis=1
).tolist()
text_list = []
if self._header_period != 1:
text_list.append(header_row)
for i, row in df.iterrows():
if (self._header_period > 1 and i > 0 and i % self._header_period == 0):
text_list.append(header_row)
text_list.append(self._col_joiner.join(row.astype(str).tolist()))
if self._header_period == 1 and i < len(df) - 1:
text_list.append(header_row)
if self._concat_rows:
return (self._row_joiner).join(text_list)
else:
return text_list
return self._row_joiner.join(text_list)
class ExcelParser(BaseParser):
@@ -138,7 +159,13 @@ class ExcelParser(BaseParser):
for more information.
Set to empty dict by default, this means pandas will try to figure
out the table structure on its own.
header_period (int): Controls how headers are included in output:
- 0: Headers only at the beginning (default)
- 1: Headers in every row
- N > 1: Headers every N rows
header_prefix (str): Prefix for header rows. Default is "HEADERS: ".
"""
def __init__(
@@ -148,6 +175,8 @@ class ExcelParser(BaseParser):
col_joiner: str = ", ",
row_joiner: str = "\n",
pandas_config: dict = {},
header_period: int = 20,
header_prefix: str = "HEADERS: ",
**kwargs: Any
) -> None:
"""Init params."""
@@ -156,6 +185,8 @@ class ExcelParser(BaseParser):
self._col_joiner = col_joiner
self._row_joiner = row_joiner
self._pandas_config = pandas_config
self._header_period = header_period
self._header_prefix = header_prefix
def _init_parser(self) -> Dict:
"""Init parser."""
@@ -169,12 +200,22 @@ class ExcelParser(BaseParser):
raise ValueError("pandas module is required to read Excel files.")
df = pd.read_excel(file, **self._pandas_config)
headers = df.columns.tolist()
header_row = f"{self._header_prefix}{self._col_joiner.join(headers)}"
if not self._concat_rows:
return df.apply(
lambda row: (self._col_joiner).join(row.astype(str).tolist()), axis=1
).tolist()
text_list = []
if self._header_period != 1:
text_list.append(header_row)
text_list = df.apply(
lambda row: (self._col_joiner).join(row.astype(str).tolist()), axis=1
).tolist()
if self._concat_rows:
return (self._row_joiner).join(text_list)
else:
return text_list
for i, row in df.iterrows():
if (self._header_period > 1 and i > 0 and i % self._header_period == 0):
text_list.append(header_row)
text_list.append(self._col_joiner.join(row.astype(str).tolist()))
if self._header_period == 1 and i < len(df) - 1:
text_list.append(header_row)
return self._row_joiner.join(text_list)

View File

@@ -41,7 +41,7 @@ lxml==5.3.1
markupsafe==3.0.2
marshmallow==3.26.1
mpmath==1.3.0
multidict==6.1.0
multidict==6.3.2
mypy-extensions==1.0.0
networkx==3.4.2
numpy==2.2.1

View File

@@ -328,34 +328,30 @@ def attachment_worker(self, directory, file_info, user):
"""
import datetime
import os
import mimetypes
from application.utils import num_tokens_from_string
mongo = MongoDB.get_client()
db = mongo["docsgpt"]
attachments_collection = db["attachments"]
job_name = file_info["folder"]
logging.info(f"Processing attachment: {job_name}", extra={"user": user, "job": job_name})
filename = file_info["filename"]
attachment_id = file_info["attachment_id"]
logging.info(f"Processing attachment: {attachment_id}/{filename}", extra={"user": user})
self.update_state(state="PROGRESS", meta={"current": 10})
folder_name = file_info["folder"]
filename = file_info["filename"]
file_path = os.path.join(directory, filename)
logging.info(f"Processing file: {file_path}", extra={"user": user, "job": job_name})
if not os.path.exists(file_path):
logging.warning(f"File not found: {file_path}", extra={"user": user, "job": job_name})
return {"error": "File not found"}
logging.warning(f"File not found: {file_path}", extra={"user": user})
raise FileNotFoundError(f"File not found: {file_path}")
try:
reader = SimpleDirectoryReader(
input_files=[file_path]
)
documents = reader.load_data()
self.update_state(state="PROGRESS", meta={"current": 50})
@@ -364,33 +360,37 @@ def attachment_worker(self, directory, file_info, user):
content = documents[0].text
token_count = num_tokens_from_string(content)
file_path_relative = f"{user}/attachments/{folder_name}/{filename}"
file_path_relative = f"{settings.UPLOAD_FOLDER}/{user}/attachments/{attachment_id}/{filename}"
attachment_id = attachments_collection.insert_one({
mime_type = mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
doc_id = ObjectId(attachment_id)
attachments_collection.insert_one({
"_id": doc_id,
"user": user,
"path": file_path_relative,
"content": content,
"token_count": token_count,
"mime_type": mime_type,
"date": datetime.datetime.now(),
}).inserted_id
})
logging.info(f"Stored attachment with ID: {attachment_id}",
extra={"user": user, "job": job_name})
extra={"user": user})
self.update_state(state="PROGRESS", meta={"current": 100})
return {
"attachment_id": str(attachment_id),
"filename": filename,
"folder": folder_name,
"path": file_path_relative,
"token_count": token_count
"token_count": token_count,
"attachment_id": attachment_id,
"mime_type": mime_type
}
else:
logging.warning("No content was extracted from the file",
extra={"user": user, "job": job_name})
return {"error": "No content was extracted from the file"}
extra={"user": user})
raise ValueError("No content was extracted from the file")
except Exception as e:
logging.error(f"Error processing file {filename}: {e}",
extra={"user": user, "job": job_name}, exc_info=True)
return {"error": f"Error processing file: {str(e)}"}
logging.error(f"Error processing file {filename}: {e}", extra={"user": user}, exc_info=True)
raise