From 481df4d6047194ecadac267fa5965c9174d4691a Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 5 May 2025 13:12:39 +0100
Subject: [PATCH] fix: enhance error logging with exception info across
 multiple modules

---
 application/agents/llm_handler.py           | 3 +++
 application/api/answer/routes.py            | 7 +++----
 application/api/user/routes.py              | 8 ++++----
 application/cache.py                        | 8 ++++----
 application/llm/google_ai.py                | 4 ++--
 application/llm/openai.py                   | 6 +++---
 application/logging.py                      | 2 +-
 application/parser/embedding_pipeline.py    | 4 ++--
 application/parser/remote/crawler_loader.py | 5 +++--
 application/parser/remote/sitemap_loader.py | 3 ++-
 application/parser/remote/web_loader.py     | 5 +++--
 application/retriever/classic_rag.py        | 3 ++-
 application/vectorstore/mongodb.py          | 5 +++--
 application/worker.py                       | 4 ++--
 14 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/application/agents/llm_handler.py b/application/agents/llm_handler.py
index bf39f625..9e37408c 100644
--- a/application/agents/llm_handler.py
+++ b/application/agents/llm_handler.py
@@ -1,5 +1,6 @@
 import json
 import logging
+import logging
 from abc import ABC, abstractmethod
 
 from application.logging import build_stack_data
@@ -137,6 +138,7 @@ class OpenAILLMHandler(LLMHandler):
 
                         messages = self.prepare_messages_with_attachments(agent, messages, attachments)
                     except Exception as e:
+                        logging.error(f"Error executing tool: {str(e)}", exc_info=True)
                         messages.append(
                             {
                                 "role": "tool",
@@ -229,6 +231,7 @@ class OpenAILLMHandler(LLMHandler):
                                     )
 
                                 except Exception as e:
+                                    logging.error(f"Error executing tool: {str(e)}", exc_info=True)
                                     messages.append(
                                         {
                                             "role": "assistant",
diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index 2a8476d8..abc1f9ba 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -105,7 +105,7 @@ def get_agent_key(agent_id, user_id):
         raise Exception("Unauthorized access to the agent", 403)
 
     except Exception as e:
-        logger.error(f"Error in get_agent_key: {str(e)}")
+        logger.error(f"Error in get_agent_key: {str(e)}", exc_info=True)
         raise
 
 
@@ -351,8 +351,7 @@ def complete_stream(
         data = json.dumps({"type": "end"})
         yield f"data: {data}\n\n"
     except Exception as e:
-        logger.error(f"Error in stream: {str(e)}")
-        logger.error(traceback.format_exc())
+        logger.error(f"Error in stream: {str(e)}", exc_info=True)
         data = json.dumps(
             {
                 "type": "error",
@@ -882,6 +881,6 @@ def get_attachments_content(attachment_ids, user):
             if attachment_doc:
                 attachments.append(attachment_doc)
         except Exception as e:
-            logger.error(f"Error retrieving attachment {attachment_id}: {e}")
+            logger.error(f"Error retrieving attachment {attachment_id}: {e}", exc_info=True)
 
     return attachments
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index d96d6202..43e54177 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -2755,7 +2755,7 @@ class GetChunks(Resource):
             )
 
         except Exception as e:
-            current_app.logger.error(f"Error getting chunks: {e}")
+            current_app.logger.error(f"Error getting chunks: {e}", exc_info=True)
             return make_response(jsonify({"success": False}), 500)
 
 
@@ -2809,7 +2809,7 @@ class AddChunk(Resource):
                 201,
             )
         except Exception as e:
-            current_app.logger.error(f"Error adding chunk: {e}")
+            current_app.logger.error(f"Error adding chunk: {e}", exc_info=True)
             return make_response(jsonify({"success": False}), 500)
 
 
@@ -2849,7 +2849,7 @@ class DeleteChunk(Resource):
                     404,
                 )
         except Exception as e:
-            current_app.logger.error(f"Error deleting chunk: {e}")
+            current_app.logger.error(f"Error deleting chunk: {e}", exc_info=True)
             return make_response(jsonify({"success": False}), 500)
 
 
@@ -2931,7 +2931,7 @@ class UpdateChunk(Resource):
                 200,
             )
         except Exception as e:
-            current_app.logger.error(f"Error updating chunk: {e}")
+            current_app.logger.error(f"Error updating chunk: {e}", exc_info=True)
             return make_response(jsonify({"success": False}), 500)
 
 
diff --git a/application/cache.py b/application/cache.py
index 117b444a..3fdb6b8d 100644
--- a/application/cache.py
+++ b/application/cache.py
@@ -61,14 +61,14 @@ def gen_cache(func):
                 if cached_response:
                     return cached_response.decode("utf-8")
             except Exception as e:
-                logger.error(f"Error getting cached response: {e}")
+                logger.error(f"Error getting cached response: {e}", exc_info=True)
 
         result = func(self, model, messages, stream, tools, *args, **kwargs)
         if redis_client and isinstance(result, str):
             try:
                 redis_client.set(cache_key, result, ex=1800)
             except Exception as e:
-                logger.error(f"Error setting cache: {e}")
+                logger.error(f"Error setting cache: {e}", exc_info=True)
 
         return result
 
@@ -100,7 +100,7 @@ def stream_cache(func):
                         time.sleep(0.03)  # Simulate streaming delay
                     return
             except Exception as e:
-                logger.error(f"Error getting cached stream: {e}")
+                logger.error(f"Error getting cached stream: {e}", exc_info=True)
 
         stream_cache_data = []
         for chunk in func(self, model, messages, stream, tools, *args, **kwargs):
@@ -112,6 +112,6 @@ def stream_cache(func):
                 redis_client.set(cache_key, json.dumps(stream_cache_data), ex=1800)
                 logger.info(f"Stream cache saved for key: {cache_key}")
             except Exception as e:
-                logger.error(f"Error setting stream cache: {e}")
+                logger.error(f"Error setting stream cache: {e}", exc_info=True)
 
     return wrapper
diff --git a/application/llm/google_ai.py b/application/llm/google_ai.py
index a56616d2..b749431b 100644
--- a/application/llm/google_ai.py
+++ b/application/llm/google_ai.py
@@ -78,7 +78,7 @@ class GoogleLLM(BaseLLM):
                     logging.info(f"GoogleLLM: Successfully uploaded file, got URI: {file_uri}")
                     files.append({"file_uri": file_uri, "mime_type": mime_type})
                 except Exception as e:
-                    logging.error(f"GoogleLLM: Error uploading file: {e}")
+                    logging.error(f"GoogleLLM: Error uploading file: {e}", exc_info=True)
                     if 'content' in attachment:
                         prepared_messages[user_message_index]["content"].append({
                             "type": "text",
@@ -131,7 +131,7 @@ class GoogleLLM(BaseLLM):
 
             return file_uri
         except Exception as e:
-            logging.error(f"Error uploading file to Google AI: {e}")
+            logging.error(f"Error uploading file to Google AI: {e}", exc_info=True)
             raise
 
     def _clean_messages_google(self, messages):
diff --git a/application/llm/openai.py b/application/llm/openai.py
index 248fd7e2..c918768d 100644
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -213,7 +213,7 @@ class OpenAILLM(BaseLLM):
                         }
                     })
                 except Exception as e:
-                    logging.error(f"Error processing image attachment: {e}")
+                    logging.error(f"Error processing image attachment: {e}", exc_info=True)
                     if 'content' in attachment:
                         prepared_messages[user_message_index]["content"].append({
                             "type": "text",
@@ -228,7 +228,7 @@ class OpenAILLM(BaseLLM):
                         "file": {"file_id": file_id}
                     })
                 except Exception as e:
-                    logging.error(f"Error uploading PDF to OpenAI: {e}")
+                    logging.error(f"Error uploading PDF to OpenAI: {e}", exc_info=True)
                     if 'content' in attachment:
                         prepared_messages[user_message_index]["content"].append({
                             "type": "text",
@@ -301,7 +301,7 @@ class OpenAILLM(BaseLLM):
 
             return file_id
         except Exception as e:
-            logging.error(f"Error uploading file to OpenAI: {e}")
+            logging.error(f"Error uploading file to OpenAI: {e}", exc_info=True)
             raise
 
 
diff --git a/application/logging.py b/application/logging.py
index b447ffa8..d48fb17e 100644
--- a/application/logging.py
+++ b/application/logging.py
@@ -151,4 +151,4 @@ def _log_to_mongodb(
         logging.debug(f"Logged activity to MongoDB: {activity_id}")
 
     except Exception as e:
-        logging.error(f"Failed to log to MongoDB: {e}")
+        logging.error(f"Failed to log to MongoDB: {e}", exc_info=True)
diff --git a/application/parser/embedding_pipeline.py b/application/parser/embedding_pipeline.py
index 0435cd14..87d9a8d5 100755
--- a/application/parser/embedding_pipeline.py
+++ b/application/parser/embedding_pipeline.py
@@ -19,7 +19,7 @@ def add_text_to_store_with_retry(store, doc, source_id):
         doc.metadata["source_id"] = str(source_id)
         store.add_texts([doc.page_content], metadatas=[doc.metadata])
     except Exception as e:
-        logging.error(f"Failed to add document with retry: {e}")
+        logging.error(f"Failed to add document with retry: {e}", exc_info=True)
         raise
 
 
@@ -75,7 +75,7 @@ def embed_and_store_documents(docs, folder_name, source_id, task_status):
             # Add document to vector store
             add_text_to_store_with_retry(store, doc, source_id)
         except Exception as e:
-            logging.error(f"Error embedding document {idx}: {e}")
+            logging.error(f"Error embedding document {idx}: {e}", exc_info=True)
             logging.info(f"Saving progress at document {idx} out of {total_docs}")
             store.save_local(folder_name)
             break
diff --git a/application/parser/remote/crawler_loader.py b/application/parser/remote/crawler_loader.py
index c2da230b..2ff6cf6f 100644
--- a/application/parser/remote/crawler_loader.py
+++ b/application/parser/remote/crawler_loader.py
@@ -1,3 +1,4 @@
+import logging
 import requests
 from urllib.parse import urlparse, urljoin
 from bs4 import BeautifulSoup
@@ -42,7 +43,7 @@ class CrawlerLoader(BaseRemote):
                         )
                     )
             except Exception as e:
-                print(f"Error processing URL {current_url}: {e}")
+                logging.error(f"Error processing URL {current_url}: {e}", exc_info=True)
                 continue
 
             # Parse the HTML content to extract all links
@@ -61,4 +62,4 @@ class CrawlerLoader(BaseRemote):
             if self.limit is not None and len(visited_urls) >= self.limit:
                 break
 
-        return loaded_content
\ No newline at end of file
+        return loaded_content
diff --git a/application/parser/remote/sitemap_loader.py b/application/parser/remote/sitemap_loader.py
index 8066f4f6..6d54ea9b 100644
--- a/application/parser/remote/sitemap_loader.py
+++ b/application/parser/remote/sitemap_loader.py
@@ -1,3 +1,4 @@
+import logging
 import requests
 import re  # Import regular expression library
 import xml.etree.ElementTree as ET
@@ -32,7 +33,7 @@ class SitemapLoader(BaseRemote):
                 documents.extend(loader.load())
                 processed_urls += 1  # Increment the counter after processing each URL
             except Exception as e:
-                print(f"Error processing URL {url}: {e}")
+                logging.error(f"Error processing URL {url}: {e}", exc_info=True)
                 continue
 
         return documents
diff --git a/application/parser/remote/web_loader.py b/application/parser/remote/web_loader.py
index cc1cdcb8..77cb145b 100644
--- a/application/parser/remote/web_loader.py
+++ b/application/parser/remote/web_loader.py
@@ -1,3 +1,4 @@
+import logging
 from application.parser.remote.base import BaseRemote
 from application.parser.schema.base import Document
 from langchain_community.document_loaders import WebBaseLoader
@@ -39,6 +40,6 @@ class WebLoader(BaseRemote):
                         )
                     )
             except Exception as e:
-                print(f"Error processing URL {url}: {e}")
+                logging.error(f"Error processing URL {url}: {e}", exc_info=True)
                 continue
-        return documents
\ No newline at end of file
+        return documents
diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py
index 08771337..b8ac69e4 100644
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -1,3 +1,4 @@
+import logging
 from application.core.settings import settings
 from application.llm.llm_creator import LLMCreator
 from application.retriever.base import BaseRetriever
@@ -72,7 +73,7 @@ class ClassicRAG(BaseRetriever):
             print(f"Rephrased query: {rephrased_query}")
             return rephrased_query if rephrased_query else self.original_question
         except Exception as e:
-            print(f"Error rephrasing query: {e}")
+            logging.error(f"Error rephrasing query: {e}", exc_info=True)
             return self.original_question
 
     def _get_data(self):
diff --git a/application/vectorstore/mongodb.py b/application/vectorstore/mongodb.py
index 94b757e0..aadd4652 100644
--- a/application/vectorstore/mongodb.py
+++ b/application/vectorstore/mongodb.py
@@ -1,3 +1,4 @@
+import logging
 from application.core.settings import settings
 from application.vectorstore.base import BaseVectorStore
 from application.vectorstore.document_class import Document
@@ -146,7 +147,7 @@ class MongoDBVectorStore(BaseVectorStore):
 
             return chunks
         except Exception as e:
-            print(f"Error getting chunks: {e}")
+            logging.error(f"Error getting chunks: {e}", exc_info=True)
             return []
 
     def add_chunk(self, text, metadata=None):
@@ -172,5 +173,5 @@ class MongoDBVectorStore(BaseVectorStore):
             result = self._collection.delete_one({"_id": object_id})
             return result.deleted_count > 0
         except Exception as e:
-            print(f"Error deleting chunk: {e}")
+            logging.error(f"Error deleting chunk: {e}", exc_info=True)
             return False
diff --git a/application/worker.py b/application/worker.py
index bebd88a6..619993c9 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -75,7 +75,7 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
             zip_ref.extractall(extract_to)
         os.remove(zip_path)  # Remove the zip file after extracting
     except Exception as e:
-        logging.error(f"Error extracting zip file {zip_path}: {e}")
+        logging.error(f"Error extracting zip file {zip_path}: {e}", exc_info=True)
         return
 
     # Check for nested zip files and extract them
@@ -403,7 +403,7 @@ def sync(
             doc_id,
         )
     except Exception as e:
-        logging.error(f"Error during sync: {e}")
+        logging.error(f"Error during sync: {e}", exc_info=True)
         return {"status": "error", "error": str(e)}
     return {"status": "success"}