fix: improve error logging for index creation and add PARSE_IMAGE_REMOTE setting

This commit is contained in:
Alex
2025-08-06 10:40:20 +01:00
parent 1282e7687f
commit 9281fac898
3 changed files with 12 additions and 7 deletions

View File

@@ -70,7 +70,7 @@ try:
)
users_collection.create_index("user_id", unique=True)
except Exception as e:
current_app.logger.warning(f"Can't create indexes: {e}", )
print("Error creating indexes:", e)
user = Blueprint("user", __name__)
user_ns = Namespace("user", description="User related operations", path="/")

View File

@@ -30,6 +30,7 @@ class Settings(BaseSettings):
}
UPLOAD_FOLDER: str = "inputs"
PARSE_PDF_AS_IMAGE: bool = False
PARSE_IMAGE_REMOTE: bool = False
VECTOR_STORE: str = (
"faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb"
)

View File

@@ -8,6 +8,7 @@ import requests
from typing import Dict, Union
from application.parser.file.base_parser import BaseParser
from application.core.settings import settings
class ImageParser(BaseParser):
@@ -18,10 +19,13 @@ class ImageParser(BaseParser):
return {}
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, list[str]]:
doc2md_service = "https://llm.arc53.com/doc2md"
# alternatively you can use local vision capable LLM
with open(file, "rb") as file_loaded:
files = {'file': file_loaded}
response = requests.post(doc2md_service, files=files)
data = response.json()["markdown"]
if settings.PARSE_IMAGE_REMOTE:
doc2md_service = "https://llm.arc53.com/doc2md"
# alternatively you can use local vision capable LLM
with open(file, "rb") as file_loaded:
files = {'file': file_loaded}
response = requests.post(doc2md_service, files=files)
data = response.json()["markdown"]
else:
data = ""
return data