fix: enhance error logging with exception info across multiple modules

2026-01-20 05:50:58 +00:00 · 2025-05-05 13:12:39 +01:00
parent cf333873fd
commit 481df4d604
14 changed files with 37 additions and 30 deletions
--- a/application/parser/embedding_pipeline.py
+++ b/application/parser/embedding_pipeline.py
@@ -19,7 +19,7 @@ def add_text_to_store_with_retry(store, doc, source_id):
        doc.metadata["source_id"] = str(source_id)
        store.add_texts([doc.page_content], metadatas=[doc.metadata])
    except Exception as e:
-        logging.error(f"Failed to add document with retry: {e}")
+        logging.error(f"Failed to add document with retry: {e}", exc_info=True)
        raise


@@ -75,7 +75,7 @@ def embed_and_store_documents(docs, folder_name, source_id, task_status):
            # Add document to vector store
            add_text_to_store_with_retry(store, doc, source_id)
        except Exception as e:
-            logging.error(f"Error embedding document {idx}: {e}")
+            logging.error(f"Error embedding document {idx}: {e}", exc_info=True)
            logging.info(f"Saving progress at document {idx} out of {total_docs}")
            store.save_local(folder_name)
            break
--- a/application/parser/remote/crawler_loader.py
+++ b/application/parser/remote/crawler_loader.py
@@ -1,3 +1,4 @@
+import logging
 import requests
 from urllib.parse import urlparse, urljoin
 from bs4 import BeautifulSoup
@@ -42,7 +43,7 @@ class CrawlerLoader(BaseRemote):
                        )
                    )
            except Exception as e:
-                print(f"Error processing URL {current_url}: {e}")
+                logging.error(f"Error processing URL {current_url}: {e}", exc_info=True)
                continue

            # Parse the HTML content to extract all links
@@ -61,4 +62,4 @@ class CrawlerLoader(BaseRemote):
            if self.limit is not None and len(visited_urls) >= self.limit:
                break

-        return loaded_content
+        return loaded_content
--- a/application/parser/remote/sitemap_loader.py
+++ b/application/parser/remote/sitemap_loader.py
@@ -1,3 +1,4 @@
+import logging
 import requests
 import re  # Import regular expression library
 import xml.etree.ElementTree as ET
@@ -32,7 +33,7 @@ class SitemapLoader(BaseRemote):
                documents.extend(loader.load())
                processed_urls += 1  # Increment the counter after processing each URL
            except Exception as e:
-                print(f"Error processing URL {url}: {e}")
+                logging.error(f"Error processing URL {url}: {e}", exc_info=True)
                continue

        return documents
--- a/application/parser/remote/web_loader.py
+++ b/application/parser/remote/web_loader.py
@@ -1,3 +1,4 @@
+import logging
 from application.parser.remote.base import BaseRemote
 from application.parser.schema.base import Document
 from langchain_community.document_loaders import WebBaseLoader
@@ -39,6 +40,6 @@ class WebLoader(BaseRemote):
                        )
                    )
            except Exception as e:
-                print(f"Error processing URL {url}: {e}")
+                logging.error(f"Error processing URL {url}: {e}", exc_info=True)
                continue
-        return documents
+        return documents