diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 9bc8cd80..309d8ebe 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -70,7 +70,7 @@ try: ) users_collection.create_index("user_id", unique=True) except Exception as e: - current_app.logger.warning(f"Can't create indexes: {e}", ) + print("Error creating indexes:", e) user = Blueprint("user", __name__) user_ns = Namespace("user", description="User related operations", path="/") diff --git a/application/core/settings.py b/application/core/settings.py index 04266fed..9303b996 100644 --- a/application/core/settings.py +++ b/application/core/settings.py @@ -30,6 +30,7 @@ class Settings(BaseSettings): } UPLOAD_FOLDER: str = "inputs" PARSE_PDF_AS_IMAGE: bool = False + PARSE_IMAGE_REMOTE: bool = False VECTOR_STORE: str = ( "faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb" ) diff --git a/application/parser/file/image_parser.py b/application/parser/file/image_parser.py index fd800d91..c7aa66a3 100644 --- a/application/parser/file/image_parser.py +++ b/application/parser/file/image_parser.py @@ -8,6 +8,7 @@ import requests from typing import Dict, Union from application.parser.file.base_parser import BaseParser +from application.core.settings import settings class ImageParser(BaseParser): @@ -18,10 +19,13 @@ class ImageParser(BaseParser): return {} def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, list[str]]: - doc2md_service = "https://llm.arc53.com/doc2md" - # alternatively you can use local vision capable LLM - with open(file, "rb") as file_loaded: - files = {'file': file_loaded} - response = requests.post(doc2md_service, files=files) - data = response.json()["markdown"] + if settings.PARSE_IMAGE_REMOTE: + doc2md_service = "https://llm.arc53.com/doc2md" + # alternatively you can use local vision capable LLM + with open(file, "rb") as file_loaded: + files = {'file': file_loaded} + response = requests.post(doc2md_service, files=files) + data = response.json()["markdown"] + else: + data = "" return data