fix: enhance error logging with exception info across multiple modules

This commit is contained in:
Alex
2025-05-05 13:12:39 +01:00
parent cf333873fd
commit 481df4d604
14 changed files with 37 additions and 30 deletions

View File

@@ -19,7 +19,7 @@ def add_text_to_store_with_retry(store, doc, source_id):
doc.metadata["source_id"] = str(source_id)
store.add_texts([doc.page_content], metadatas=[doc.metadata])
except Exception as e:
logging.error(f"Failed to add document with retry: {e}")
logging.error(f"Failed to add document with retry: {e}", exc_info=True)
raise
@@ -75,7 +75,7 @@ def embed_and_store_documents(docs, folder_name, source_id, task_status):
# Add document to vector store
add_text_to_store_with_retry(store, doc, source_id)
except Exception as e:
logging.error(f"Error embedding document {idx}: {e}")
logging.error(f"Error embedding document {idx}: {e}", exc_info=True)
logging.info(f"Saving progress at document {idx} out of {total_docs}")
store.save_local(folder_name)
break

View File

@@ -1,3 +1,4 @@
import logging
import requests
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
@@ -42,7 +43,7 @@ class CrawlerLoader(BaseRemote):
)
)
except Exception as e:
print(f"Error processing URL {current_url}: {e}")
logging.error(f"Error processing URL {current_url}: {e}", exc_info=True)
continue
# Parse the HTML content to extract all links
@@ -61,4 +62,4 @@ class CrawlerLoader(BaseRemote):
if self.limit is not None and len(visited_urls) >= self.limit:
break
return loaded_content
return loaded_content

View File

@@ -1,3 +1,4 @@
import logging
import requests
import re # Import regular expression library
import xml.etree.ElementTree as ET
@@ -32,7 +33,7 @@ class SitemapLoader(BaseRemote):
documents.extend(loader.load())
processed_urls += 1 # Increment the counter after processing each URL
except Exception as e:
print(f"Error processing URL {url}: {e}")
logging.error(f"Error processing URL {url}: {e}", exc_info=True)
continue
return documents

View File

@@ -1,3 +1,4 @@
import logging
from application.parser.remote.base import BaseRemote
from application.parser.schema.base import Document
from langchain_community.document_loaders import WebBaseLoader
@@ -39,6 +40,6 @@ class WebLoader(BaseRemote):
)
)
except Exception as e:
print(f"Error processing URL {url}: {e}")
logging.error(f"Error processing URL {url}: {e}", exc_info=True)
continue
return documents
return documents