Merge branch 'main' into feat/enhance-agents

2026-02-23 21:01:20 +00:00 · 2025-05-12 13:45:18 +05:30
parent 6520be5b85 d84c416421
commit ec7f14b82d
49 changed files with 2518 additions and 642 deletions
--- a/application/agents/classic_agent.py
+++ b/application/agents/classic_agent.py
@@ -57,4 +57,8 @@ class ClassicAgent(BaseAgent):
        )

        yield {"sources": retrieved_data}
+        # clean tool_call_data only send first 50 characters of tool_call['result']
+        for tool_call in self.tool_calls:
+            if len(str(tool_call["result"])) > 50:
+                tool_call["result"] = str(tool_call["result"])[:50] + "..."
        yield {"tool_calls": self.tool_calls.copy()}
--- a/application/agents/react_agent.py
+++ b/application/agents/react_agent.py
@@ -87,6 +87,10 @@ class ReActAgent(BaseAgent):
        )

        yield {"sources": retrieved_data}
+        # clean tool_call_data only send first 50 characters of tool_call['result']
+        for tool_call in self.tool_calls:
+            if len(str(tool_call["result"])) > 50:
+                tool_call["result"] = str(tool_call["result"])[:50] + "..."
        yield {"tool_calls": self.tool_calls.copy()}

        final_answer = self._create_final_answer(query, self.observations, log_context)
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -28,6 +28,9 @@ from application.extensions import api
 from application.tts.google_tts import GoogleTTS
 from application.utils import check_required_fields, validate_function_name
 from application.vectorstore.vector_creator import VectorCreator
+from application.storage.storage_creator import StorageCreator
+
+storage = StorageCreator.get_storage()

 mongo = MongoDB.get_client()
 db = mongo[settings.MONGO_DB_NAME]
@@ -461,59 +464,93 @@ class UploadFile(Resource):

                zip_filename = f"{job_name}.zip"
                zip_path = f"{base_path}/{zip_filename}"
+                zip_temp_path = None

-                def create_zip_archive(temp_paths, **kwargs):
+                def create_zip_archive(temp_paths, job_name, storage):
                    import tempfile

-                    with tempfile.TemporaryDirectory() as temp_dir:
+                    with tempfile.NamedTemporaryFile(
+                        delete=False, suffix=".zip"
+                    ) as temp_zip_file:
+                        zip_output_path = temp_zip_file.name
+
+                    with tempfile.TemporaryDirectory() as stage_dir:
                        for path in temp_paths:
-                            file_data = storage.get_file(path)
-                            with open(
-                                os.path.join(temp_dir, os.path.basename(path)), "wb"
-                            ) as f:
-                                f.write(file_data.read())
+                            try:
+                                file_data = storage.get_file(path)
+                                with open(
+                                    os.path.join(stage_dir, os.path.basename(path)),
+                                    "wb",
+                                ) as f:
+                                    f.write(file_data.read())
+                            except Exception as e:
+                                current_app.logger.error(
+                                    f"Error processing file {path} for zipping: {e}",
+                                    exc_info=True,
+                                )
+                                if os.path.exists(zip_output_path):
+                                    os.remove(zip_output_path)
+                                raise
+                        try:
+                            shutil.make_archive(
+                                base_name=zip_output_path.replace(".zip", ""),
+                                format="zip",
+                                root_dir=stage_dir,
+                            )
+                        except Exception as e:
+                            current_app.logger.error(
+                                f"Error creating zip archive: {e}", exc_info=True
+                            )
+                            if os.path.exists(zip_output_path):
+                                os.remove(zip_output_path)
+                            raise

-                        # Create zip archive
-                        zip_temp = shutil.make_archive(
-                            base_name=os.path.join(temp_dir, job_name),
-                            format="zip",
-                            root_dir=temp_dir,
-                        )
+                    return zip_output_path

-                        return zip_temp
+                try:
+                    zip_temp_path = create_zip_archive(temp_files, job_name, storage)
+                    with open(zip_temp_path, "rb") as zip_file:
+                        storage.save_file(zip_file, zip_path)

-                zip_temp_path = create_zip_archive(temp_files)
-                with open(zip_temp_path, "rb") as zip_file:
-                    storage.save_file(zip_file, zip_path)
+                    task = ingest.delay(
+                        settings.UPLOAD_FOLDER,
+                        [
+                            ".rst",
+                            ".md",
+                            ".pdf",
+                            ".txt",
+                            ".docx",
+                            ".csv",
+                            ".epub",
+                            ".html",
+                            ".mdx",
+                            ".json",
+                            ".xlsx",
+                            ".pptx",
+                            ".png",
+                            ".jpg",
+                            ".jpeg",
+                        ],
+                        job_name,
+                        zip_filename,
+                        user,
+                    )
+                finally:
+                    # Clean up temporary files
+                    for temp_path in temp_files:
+                        try:
+                            storage.delete_file(temp_path)
+                        except Exception as e:
+                            current_app.logger.error(
+                                f"Error deleting temporary file {temp_path}: {e}",
+                                exc_info=True,
+                            )

-                # Clean up temp files
-                for temp_path in temp_files:
-                    storage.delete_file(temp_path)
+                    # Clean up the zip file if it was created
+                    if zip_temp_path and os.path.exists(zip_temp_path):
+                        os.remove(zip_temp_path)

-                task = ingest.delay(
-                    settings.UPLOAD_FOLDER,
-                    [
-                        ".rst",
-                        ".md",
-                        ".pdf",
-                        ".txt",
-                        ".docx",
-                        ".csv",
-                        ".epub",
-                        ".html",
-                        ".mdx",
-                        ".json",
-                        ".xlsx",
-                        ".pptx",
-                        ".png",
-                        ".jpg",
-                        ".jpeg",
-                    ],
-                    job_name,
-                    zip_filename,
-                    user,
-                )
-            else:
+            else:  # Keep this else block for single file upload
                # For single file
                file = files[0]
                filename = secure_filename(file.filename)
@@ -541,7 +578,7 @@ class UploadFile(Resource):
                        ".jpeg",
                    ],
                    job_name,
-                    filename,
+                    filename,  # Corrected variable for single-file case
                    user,
                )

@@ -1022,14 +1059,15 @@ class GetAgent(Resource):
                return make_response(jsonify({"status": "Not found"}), 404)
            data = {
                "id": str(agent["_id"]),
-                "name": agent.get("name", ""),
+                "name": agent["name"],
                "description": agent.get("description", ""),
                "source": (
-                    str(db.dereference(agent["source"])["_id"])
-                    if "source" in agent and isinstance(agent["source"], DBRef)
+                    str(source_doc["_id"])
+                    if isinstance(agent.get("source"), DBRef)
+                    and (source_doc := db.dereference(agent.get("source")))
                    else ""
                ),
-                "chunks": agent.get("chunks", ""),
+                "chunks": agent["chunks"],
                "retriever": agent.get("retriever", ""),
                "prompt_id": agent.get("prompt_id", ""),
                "tools": agent.get("tools", []),
@@ -1068,14 +1106,15 @@ class GetAgents(Resource):
            list_agents = [
                {
                    "id": str(agent["_id"]),
-                    "name": agent.get("name", ""),
+                    "name": agent["name"],
                    "description": agent.get("description", ""),
                    "source": (
-                        str(db.dereference(agent["source"])["_id"])
-                        if "source" in agent and isinstance(agent["source"], DBRef)
+                        str(source_doc["_id"])
+                        if isinstance(agent.get("source"), DBRef)
+                        and (source_doc := db.dereference(agent.get("source")))
                        else ""
                    ),
-                    "chunks": agent.get("chunks", ""),
+                    "chunks": agent["chunks"],
                    "retriever": agent.get("retriever", ""),
                    "prompt_id": agent.get("prompt_id", ""),
                    "tools": agent.get("tools", []),
@@ -3300,16 +3339,16 @@ class StoreAttachment(Resource):

        try:
            attachment_id = ObjectId()
-            original_filename = secure_filename(file.filename)
+            original_filename = secure_filename(os.path.basename(file.filename))
            relative_path = f"{settings.UPLOAD_FOLDER}/{user}/attachments/{str(attachment_id)}/{original_filename}"

-            file_content = file.read()
+            metadata = storage.save_file(file, relative_path)

            file_info = {
                "filename": original_filename,
                "attachment_id": str(attachment_id),
                "path": relative_path,
-                "file_content": file_content,
+                "metadata": metadata,
            }

            task = store_attachment.delay(file_info, user)
--- a/application/parser/file/bulk.py
+++ b/application/parser/file/bulk.py
@@ -158,7 +158,7 @@ class SimpleDirectoryReader(BaseReader):
                    data = f.read()
            # Prepare metadata for this file
            if self.file_metadata is not None:
-                file_metadata = self.file_metadata(str(input_file))
+                file_metadata = self.file_metadata(input_file.name)
            else:
                # Provide a default empty metadata
                file_metadata = {'title': '', 'store': ''}
--- a/application/worker.py
+++ b/application/worker.py
@@ -1,5 +1,4 @@
 import datetime
-import io
 import json
 import logging
 import mimetypes
@@ -445,76 +444,61 @@ def attachment_worker(self, file_info, user):
    filename = file_info["filename"]
    attachment_id = file_info["attachment_id"]
    relative_path = file_info["path"]
-    file_content = file_info["file_content"]
+    metadata = file_info.get("metadata", {})

    try:
        self.update_state(state="PROGRESS", meta={"current": 10})
-        storage_type = getattr(settings, "STORAGE_TYPE", "local")
-        storage = StorageCreator.create_storage(storage_type)
+        storage = StorageCreator.get_storage()
+        
        self.update_state(
            state="PROGRESS", meta={"current": 30, "status": "Processing content"}
        )

-        with tempfile.NamedTemporaryFile(
-            suffix=os.path.splitext(filename)[1]
-        ) as temp_file:
-            temp_file.write(file_content)
-            temp_file.flush()
-            reader = SimpleDirectoryReader(
-                input_files=[temp_file.name], exclude_hidden=True, errors="ignore"
-            )
-            documents = reader.load_data()
+        content = storage.process_file(
+            relative_path,
+            lambda local_path, **kwargs: SimpleDirectoryReader(
+                input_files=[local_path], exclude_hidden=True, errors="ignore"
+            ).load_data()[0].text
+        )
+            
+        token_count = num_tokens_from_string(content)

-            if not documents:
-                logging.warning(f"No content extracted from file: {filename}")
-                raise ValueError(f"Failed to extract content from file: {filename}")
+        self.update_state(
+            state="PROGRESS", meta={"current": 80, "status": "Storing in database"}
+        )

-            content = documents[0].text
-            token_count = num_tokens_from_string(content)
+        mime_type = mimetypes.guess_type(filename)[0] or "application/octet-stream"

-            self.update_state(
-                state="PROGRESS", meta={"current": 60, "status": "Saving file"}
-            )
-            file_obj = io.BytesIO(file_content)
-
-            metadata = storage.save_file(file_obj, relative_path)
-
-            mime_type = mimetypes.guess_type(filename)[0] or "application/octet-stream"
-
-            self.update_state(
-                state="PROGRESS", meta={"current": 80, "status": "Storing in database"}
-            )
-
-            doc_id = ObjectId(attachment_id)
-            attachments_collection.insert_one(
-                {
-                    "_id": doc_id,
-                    "user": user,
-                    "path": relative_path,
-                    "content": content,
-                    "token_count": token_count,
-                    "mime_type": mime_type,
-                    "date": datetime.datetime.now(),
-                    "metadata": metadata,
-                }
-            )
-
-            logging.info(
-                f"Stored attachment with ID: {attachment_id}", extra={"user": user}
-            )
-
-            self.update_state(
-                state="PROGRESS", meta={"current": 100, "status": "Complete"}
-            )
-
-            return {
-                "filename": filename,
+        doc_id = ObjectId(attachment_id)
+        attachments_collection.insert_one(
+            {
+                "_id": doc_id,
+                "user": user,
                "path": relative_path,
+                "content": content,
                "token_count": token_count,
-                "attachment_id": attachment_id,
                "mime_type": mime_type,
+                "date": datetime.datetime.now(),
                "metadata": metadata,
            }
+        )
+
+        logging.info(
+            f"Stored attachment with ID: {attachment_id}", extra={"user": user}
+        )
+
+        self.update_state(
+            state="PROGRESS", meta={"current": 100, "status": "Complete"}
+        )
+
+        return {
+            "filename": filename,
+            "path": relative_path,
+            "token_count": token_count,
+            "attachment_id": attachment_id,
+            "mime_type": mime_type,
+            "metadata": metadata,
+        }

    except Exception as e:
        logging.error(