From 142ed75468cf4d251f2fc8c4a9f20c4617d15406 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Wed, 16 Apr 2025 03:31:06 +0530
Subject: [PATCH 01/39] ((feat:fs_abstact) base

---
 application/storage/base.py | 73 +++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 application/storage/base.py

diff --git a/application/storage/base.py b/application/storage/base.py
new file mode 100644
index 00000000..c16eb600
--- /dev/null
+++ b/application/storage/base.py
@@ -0,0 +1,73 @@
+"""Base storage class for file system abstraction."""
+from abc import ABC, abstractmethod
+from typing import BinaryIO, List
+
+
+class BaseStorage(ABC):
+    """Abstract base class for storage implementations."""
+
+    @abstractmethod
+    def save_file(self, file_data: BinaryIO, path: str) -> str:
+        """
+        Save a file to storage.
+        
+        Args:
+            file_data: File-like object containing the data
+            path: Path where the file should be stored
+            
+        Returns:
+            str: The complete path where the file was saved
+        """
+        pass
+    
+    @abstractmethod
+    def get_file(self, path: str) -> BinaryIO:
+        """
+        Retrieve a file from storage.
+        
+        Args:
+            path: Path to the file
+            
+        Returns:
+            BinaryIO: File-like object containing the file data
+        """
+        pass
+    
+    @abstractmethod
+    def delete_file(self, path: str) -> bool:
+        """
+        Delete a file from storage.
+        
+        Args:
+            path: Path to the file
+            
+        Returns:
+            bool: True if deletion was successful
+        """
+        pass
+    
+    @abstractmethod
+    def file_exists(self, path: str) -> bool:
+        """
+        Check if a file exists.
+        
+        Args:
+            path: Path to the file
+            
+        Returns:
+            bool: True if the file exists
+        """
+        pass
+    
+    @abstractmethod
+    def list_files(self, directory: str) -> List[str]:
+        """
+        List all files in a directory.
+        
+        Args:
+            directory: Directory path to list
+            
+        Returns:
+            List[str]: List of file paths
+        """
+        pass

From 89b2937b110509d7b472676a738179b009287821 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Wed, 16 Apr 2025 03:31:28 +0530
Subject: [PATCH 02/39] ((feat:fs_abstact) local

---
 application/storage/local.py | 85 ++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 application/storage/local.py

diff --git a/application/storage/local.py b/application/storage/local.py
new file mode 100644
index 00000000..82707007
--- /dev/null
+++ b/application/storage/local.py
@@ -0,0 +1,85 @@
+"""Local file system implementation."""
+import os
+import shutil
+from typing import BinaryIO, List
+
+from application.core.settings import settings
+from application.storage.base import BaseStorage
+
+
+class LocalStorage(BaseStorage):
+    """Local file system storage implementation."""
+    
+    def __init__(self, base_dir: str = None):
+        """
+        Initialize local storage.
+        
+        Args:
+            base_dir: Base directory for all operations. If None, uses current directory.
+        """
+        self.base_dir = base_dir or os.path.dirname(
+            os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        )
+    
+    def _get_full_path(self, path: str) -> str:
+        """Get absolute path by combining base_dir and path."""
+        if os.path.isabs(path):
+            return path
+        return os.path.join(self.base_dir, path)
+    
+    def save_file(self, file_data: BinaryIO, path: str) -> str:
+        """Save a file to local storage."""
+        full_path = self._get_full_path(path)
+        
+        # Ensure directory exists
+        os.makedirs(os.path.dirname(full_path), exist_ok=True)
+        
+        # Write file
+        if hasattr(file_data, 'save'):
+            # Handle Flask's FileStorage objects
+            file_data.save(full_path)
+        else:
+            # Handle regular file-like objects
+            with open(full_path, 'wb') as f:
+                shutil.copyfileobj(file_data, f)
+        
+        return path
+    
+    def get_file(self, path: str) -> BinaryIO:
+        """Get a file from local storage."""
+        full_path = self._get_full_path(path)
+        
+        if not os.path.exists(full_path):
+            raise FileNotFoundError(f"File not found: {full_path}")
+        
+        return open(full_path, 'rb')
+    
+    def delete_file(self, path: str) -> bool:
+        """Delete a file from local storage."""
+        full_path = self._get_full_path(path)
+        
+        if not os.path.exists(full_path):
+            return False
+        
+        os.remove(full_path)
+        return True
+    
+    def file_exists(self, path: str) -> bool:
+        """Check if a file exists in local storage."""
+        full_path = self._get_full_path(path)
+        return os.path.exists(full_path)
+    
+    def list_files(self, directory: str) -> List[str]:
+        """List all files in a directory in local storage."""
+        full_path = self._get_full_path(directory)
+        
+        if not os.path.exists(full_path):
+            return []
+        
+        result = []
+        for root, _, files in os.walk(full_path):
+            for file in files:
+                rel_path = os.path.relpath(os.path.join(root, file), self.base_dir)
+                result.append(rel_path)
+        
+        return result

From e567d8895128e46eba3c3df0fddd9e050985006b Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Wed, 16 Apr 2025 03:31:42 +0530
Subject: [PATCH 03/39] ((feat:fs_abstact) s3

---
 application/storage/s3.py | 81 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 application/storage/s3.py

diff --git a/application/storage/s3.py b/application/storage/s3.py
new file mode 100644
index 00000000..f9d38d09
--- /dev/null
+++ b/application/storage/s3.py
@@ -0,0 +1,81 @@
+"""S3 storage implementation."""
+import io
+from typing import BinaryIO, List
+
+import boto3
+from botocore.exceptions import ClientError
+
+from application.storage.base import BaseStorage
+
+
+class S3Storage(BaseStorage):
+    """AWS S3 storage implementation."""
+    
+    def __init__(self, bucket_name: str, aws_access_key_id=None, 
+                 aws_secret_access_key=None, region_name=None):
+        """
+        Initialize S3 storage.
+        
+        Args:
+            bucket_name: S3 bucket name
+            aws_access_key_id: AWS access key ID (optional if using IAM roles)
+            aws_secret_access_key: AWS secret access key (optional if using IAM roles)
+            region_name: AWS region name (optional)
+        """
+        self.bucket_name = bucket_name
+        
+        # Initialize S3 client
+        self.s3 = boto3.client(
+            's3',
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            region_name=region_name
+        )
+    
+    def save_file(self, file_data: BinaryIO, path: str) -> str:
+        """Save a file to S3 storage."""
+        self.s3.upload_fileobj(file_data, self.bucket_name, path)
+        return path
+    
+    def get_file(self, path: str) -> BinaryIO:
+        """Get a file from S3 storage."""
+        if not self.file_exists(path):
+            raise FileNotFoundError(f"File not found: {path}")
+        
+        file_obj = io.BytesIO()
+        self.s3.download_fileobj(self.bucket_name, path, file_obj)
+        file_obj.seek(0)
+        return file_obj
+    
+    def delete_file(self, path: str) -> bool:
+        """Delete a file from S3 storage."""
+        try:
+            self.s3.delete_object(Bucket=self.bucket_name, Key=path)
+            return True
+        except ClientError:
+            return False
+    
+    def file_exists(self, path: str) -> bool:
+        """Check if a file exists in S3 storage."""
+        try:
+            self.s3.head_object(Bucket=self.bucket_name, Key=path)
+            return True
+        except ClientError:
+            return False
+    
+    def list_files(self, directory: str) -> List[str]:
+        """List all files in a directory in S3 storage."""
+        # Ensure directory ends with a slash if it's not empty
+        if directory and not directory.endswith('/'):
+            directory += '/'
+            
+        result = []
+        paginator = self.s3.get_paginator('list_objects_v2')
+        pages = paginator.paginate(Bucket=self.bucket_name, Prefix=directory)
+        
+        for page in pages:
+            if 'Contents' in page:
+                for obj in page['Contents']:
+                    result.append(obj['Key'])
+                    
+        return result
\ No newline at end of file

From 377e33c148c664d21e9b3c580cce1909352aca5e Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Wed, 16 Apr 2025 03:36:45 +0530
Subject: [PATCH 04/39] (feat:file_abstract) process files method

---
 application/storage/base.py  | 20 +++++++++++++++++-
 application/storage/local.py | 24 ++++++++++++++++++++--
 application/storage/s3.py    | 39 +++++++++++++++++++++++++++++++++---
 3 files changed, 77 insertions(+), 6 deletions(-)

diff --git a/application/storage/base.py b/application/storage/base.py
index c16eb600..cb205091 100644
--- a/application/storage/base.py
+++ b/application/storage/base.py
@@ -1,6 +1,6 @@
 """Base storage class for file system abstraction."""
 from abc import ABC, abstractmethod
-from typing import BinaryIO, List
+from typing import BinaryIO, List, Optional, Callable
 
 
 class BaseStorage(ABC):
@@ -33,6 +33,24 @@ class BaseStorage(ABC):
         """
         pass
     
+    @abstractmethod
+    def process_file(self, path: str, processor_func: Callable, **kwargs):
+        """
+        Process a file using the provided processor function.
+        
+        This method handles the details of retrieving the file and providing
+        it to the processor function in an appropriate way based on the storage type.
+        
+        Args:
+            path: Path to the file
+            processor_func: Function that processes the file
+            **kwargs: Additional arguments to pass to the processor function
+            
+        Returns:
+            The result of the processor function
+        """
+        pass
+    
     @abstractmethod
     def delete_file(self, path: str) -> bool:
         """
diff --git a/application/storage/local.py b/application/storage/local.py
index 82707007..91c5c264 100644
--- a/application/storage/local.py
+++ b/application/storage/local.py
@@ -1,9 +1,8 @@
 """Local file system implementation."""
 import os
 import shutil
-from typing import BinaryIO, List
+from typing import BinaryIO, List, Callable
 
-from application.core.settings import settings
 from application.storage.base import BaseStorage
 
 
@@ -83,3 +82,24 @@ class LocalStorage(BaseStorage):
                 result.append(rel_path)
         
         return result
+
+    def process_file(self, path: str, processor_func: Callable, **kwargs):
+        """
+        Process a file using the provided processor function.
+        
+        For local storage, we can directly pass the full path to the processor.
+        
+        Args:
+            path: Path to the file
+            processor_func: Function that processes the file
+            **kwargs: Additional arguments to pass to the processor function
+            
+        Returns:
+            The result of the processor function
+        """
+        full_path = self._get_full_path(path)
+        
+        if not os.path.exists(full_path):
+            raise FileNotFoundError(f"File not found: {full_path}")
+        
+        return processor_func(file_path=full_path, **kwargs)
diff --git a/application/storage/s3.py b/application/storage/s3.py
index f9d38d09..cdec6887 100644
--- a/application/storage/s3.py
+++ b/application/storage/s3.py
@@ -1,6 +1,6 @@
 """S3 storage implementation."""
 import io
-from typing import BinaryIO, List
+from typing import BinaryIO, List, Callable
 
 import boto3
 from botocore.exceptions import ClientError
@@ -24,7 +24,6 @@ class S3Storage(BaseStorage):
         """
         self.bucket_name = bucket_name
         
-        # Initialize S3 client
         self.s3 = boto3.client(
             's3',
             aws_access_key_id=aws_access_key_id,
@@ -78,4 +77,38 @@ class S3Storage(BaseStorage):
                 for obj in page['Contents']:
                     result.append(obj['Key'])
                     
-        return result
\ No newline at end of file
+        return result
+
+    def process_file(self, path: str, processor_func: Callable, **kwargs):
+        """
+        Process a file using the provided processor function.
+        
+        For S3 storage, we need to download the file to a temporary location first.
+        
+        Args:
+            path: Path to the file
+            processor_func: Function that processes the file
+            **kwargs: Additional arguments to pass to the processor function
+            
+        Returns:
+            The result of the processor function
+        """
+        import tempfile
+        import os
+        
+        if not self.file_exists(path):
+            raise FileNotFoundError(f"File not found: {path}")
+        
+        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+            self.s3.download_fileobj(self.bucket_name, path, temp_file)
+            temp_path = temp_file.name
+        
+        try:
+            result = processor_func(file_path=temp_path, **kwargs)
+            return result
+        finally:
+            try:
+                os.unlink(temp_path)
+            except Exception as e:
+                import logging
+                logging.warning(f"Failed to delete temporary file: {e}")

From 0a0e16547e95a084cfbac4f0bcaed1f4a85fc613 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Thu, 17 Apr 2025 02:35:45 +0530
Subject: [PATCH 05/39] (feat:fs_abstract) attachment uploads

---
 application/api/user/routes.py | 23 ++------
 application/api/user/tasks.py  |  4 +-
 application/worker.py          | 99 +++++++++++++++++++---------------
 3 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 91b028d5..98af5343 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -2494,7 +2494,6 @@ class StoreAttachment(Resource):
         if not decoded_token:
             return make_response(jsonify({"success": False}), 401)
         
-        # Get single file instead of list
         file = request.files.get("file")
         
         if not file or file.filename == "":
@@ -2508,29 +2507,18 @@ class StoreAttachment(Resource):
         try:
             attachment_id = ObjectId()
             original_filename = secure_filename(file.filename)
+            relative_path = f"{settings.UPLOAD_FOLDER}/{user}/attachments/{str(attachment_id)}/{original_filename}"
             
-            save_dir = os.path.join(
-                current_dir, 
-                settings.UPLOAD_FOLDER,
-                user,
-                "attachments", 
-                str(attachment_id)
-            )
-            os.makedirs(save_dir, exist_ok=True)
+            file_content = file.read()
             
-            file_path = os.path.join(save_dir, original_filename)
-            
-            
-            file.save(file_path)
             file_info = {
                 "filename": original_filename,
-                "attachment_id": str(attachment_id)
+                "attachment_id": str(attachment_id),
+                "path": relative_path,
+                "file_content": file_content
             }
-            current_app.logger.info(f"Saved file: {file_path}")
             
-            # Start async task to process single file
             task = store_attachment.delay(
-                save_dir,
                 file_info,
                 user
             )
@@ -2543,7 +2531,6 @@ class StoreAttachment(Resource):
                 }),
                 200
             )
-            
         except Exception as err:
             current_app.logger.error(f"Error storing attachment: {err}")
             return make_response(jsonify({"success": False, "error": str(err)}), 400)
diff --git a/application/api/user/tasks.py b/application/api/user/tasks.py
index 24cff3c6..c9d4d39d 100644
--- a/application/api/user/tasks.py
+++ b/application/api/user/tasks.py
@@ -23,8 +23,8 @@ def schedule_syncs(self, frequency):
 
 
 @celery.task(bind=True)
-def store_attachment(self, directory, saved_files, user):
-    resp = attachment_worker(self, directory, saved_files, user)
+def store_attachment(self, file_info, user):
+    resp = attachment_worker(self, file_info, user)
     return resp
 
 
diff --git a/application/worker.py b/application/worker.py
index bbd422ac..d561a53f 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -3,15 +3,21 @@ import os
 import shutil
 import string
 import zipfile
+import io
+import datetime
+import mimetypes
+import requests
+
 from collections import Counter
 from urllib.parse import urljoin
 
-import requests
+from application.storage.storage_creator import StorageCreator 
+from application.utils import num_tokens_from_string
+from application.core.settings import settings
+from application.parser.file.bulk import SimpleDirectoryReader
 from bson.objectid import ObjectId
 
 from application.core.mongo_db import MongoDB
-from application.core.settings import settings
-from application.parser.file.bulk import SimpleDirectoryReader
 from application.parser.embedding_pipeline import embed_and_store_documents
 from application.parser.remote.remote_creator import RemoteCreator
 from application.parser.schema.base import Document
@@ -313,23 +319,11 @@ def sync_worker(self, frequency):
         for key in ["total_sync_count", "sync_success", "sync_failure"]
     }
 
-def attachment_worker(self, directory, file_info, user):
+
+def attachment_worker(self, file_info, user):
     """
     Process and store a single attachment without vectorization.
-    
-    Args:
-        self: Reference to the instance of the task.
-        directory (str): Base directory for storing files.
-        file_info (dict): Dictionary with folder and filename info.
-        user (str): User identifier.
-        
-    Returns:
-        dict: Information about processed attachment.
     """
-    import datetime
-    import os
-    import mimetypes
-    from application.utils import num_tokens_from_string
     
     mongo = MongoDB.get_client()
     db = mongo["docsgpt"]
@@ -337,60 +331,79 @@ def attachment_worker(self, directory, file_info, user):
     
     filename = file_info["filename"]
     attachment_id = file_info["attachment_id"]
-    
-    logging.info(f"Processing attachment: {attachment_id}/{filename}", extra={"user": user})
-    
-    self.update_state(state="PROGRESS", meta={"current": 10})
-    
-    file_path = os.path.join(directory, filename)
-    
-    if not os.path.exists(file_path):
-        logging.warning(f"File not found: {file_path}", extra={"user": user})
-        raise FileNotFoundError(f"File not found: {file_path}")
+    relative_path = file_info["path"]
+    file_content = file_info["file_content"]
     
     try:
-        reader = SimpleDirectoryReader(
-            input_files=[file_path]
-        )
-        documents = reader.load_data()
+        self.update_state(state="PROGRESS", meta={"current": 10})
         
-        self.update_state(state="PROGRESS", meta={"current": 50})
+        storage_type = getattr(settings, "STORAGE_TYPE", "local")
+        storage = StorageCreator.create_storage(storage_type)
         
-        if documents:
+        self.update_state(state="PROGRESS", meta={"current": 30, "status": "Saving file"})
+        file_obj = io.BytesIO(file_content)
+        storage.save_file(file_obj, relative_path)
+        
+        def process_document(file_path, **kwargs):
+            self.update_state(state="PROGRESS", meta={"current": 50, "status": "Processing content"})
+            
+            reader = SimpleDirectoryReader(
+                input_files=[file_path],
+                exclude_hidden=True,
+                errors="ignore"
+            )
+            documents = reader.load_data()
+            
+            if not documents:
+                logging.warning(f"No content extracted from file: {filename}")
+                raise ValueError(f"Failed to extract content from file: {filename}")
+            
             content = documents[0].text
             token_count = num_tokens_from_string(content)
             
-            file_path_relative = f"{settings.UPLOAD_FOLDER}/{user}/attachments/{attachment_id}/{filename}"
+            mime_type = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
             
-            mime_type = mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
+            metadata = {
+                "storage_type": storage_type,
+            }
+            
+            if storage_type == "s3":
+                metadata.update({
+                    "bucket_name": getattr(storage, "bucket_name", "docsgpt-test-bucket"),
+                    "uri": f"s3://{storage.bucket_name}/{relative_path}",
+                    "region": getattr(settings, "SAGEMAKER_REGION", "us-east-1")
+                })
+            
+            self.update_state(state="PROGRESS", meta={"current": 80, "status": "Storing in database"})
             
             doc_id = ObjectId(attachment_id)
             attachments_collection.insert_one({
                 "_id": doc_id,
                 "user": user,
-                "path": file_path_relative,
+                "path": relative_path,
                 "content": content,
                 "token_count": token_count,
                 "mime_type": mime_type,
                 "date": datetime.datetime.now(),
+                "metadata": metadata
             })
             
             logging.info(f"Stored attachment with ID: {attachment_id}", 
                         extra={"user": user})
             
-            self.update_state(state="PROGRESS", meta={"current": 100})
+            self.update_state(state="PROGRESS", meta={"current": 100, "status": "Complete"})
             
             return {
                 "filename": filename,
-                "path": file_path_relative,
+                "path": relative_path,
                 "token_count": token_count,
                 "attachment_id": attachment_id,
-                "mime_type": mime_type
+                "mime_type": mime_type,
+                "metadata": metadata
             }
-        else:
-            logging.warning("No content was extracted from the file", 
-                           extra={"user": user})
-            raise ValueError("No content was extracted from the file")
+            
+        return storage.process_file(relative_path, process_document)
+        
     except Exception as e:
         logging.error(f"Error processing file {filename}: {e}", extra={"user": user}, exc_info=True)
         raise

From 9454150f7d125d179bcfb97629f0cb16ebdf6932 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Thu, 17 Apr 2025 02:36:55 +0530
Subject: [PATCH 06/39] (fix:s3) processor func

---
 application/storage/base.py |  2 +-
 application/storage/s3.py   | 42 ++++++++++++++++++-------------------
 2 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/application/storage/base.py b/application/storage/base.py
index cb205091..88fed0c6 100644
--- a/application/storage/base.py
+++ b/application/storage/base.py
@@ -1,6 +1,6 @@
 """Base storage class for file system abstraction."""
 from abc import ABC, abstractmethod
-from typing import BinaryIO, List, Optional, Callable
+from typing import BinaryIO, List, Callable
 
 
 class BaseStorage(ABC):
diff --git a/application/storage/s3.py b/application/storage/s3.py
index cdec6887..e02a2a5a 100644
--- a/application/storage/s3.py
+++ b/application/storage/s3.py
@@ -1,28 +1,31 @@
 """S3 storage implementation."""
 import io
 from typing import BinaryIO, List, Callable
+import os
 
 import boto3
 from botocore.exceptions import ClientError
 
 from application.storage.base import BaseStorage
+from application.core.settings import settings
 
 
 class S3Storage(BaseStorage):
     """AWS S3 storage implementation."""
     
-    def __init__(self, bucket_name: str, aws_access_key_id=None, 
-                 aws_secret_access_key=None, region_name=None):
+    def __init__(self, bucket_name=None):
         """
         Initialize S3 storage.
         
         Args:
-            bucket_name: S3 bucket name
-            aws_access_key_id: AWS access key ID (optional if using IAM roles)
-            aws_secret_access_key: AWS secret access key (optional if using IAM roles)
-            region_name: AWS region name (optional)
+            bucket_name: S3 bucket name (optional, defaults to settings)
         """
-        self.bucket_name = bucket_name
+        self.bucket_name = bucket_name or getattr(settings, "S3_BUCKET_NAME", "docsgpt-test-bucket")
+        
+        # Get credentials from settings
+        aws_access_key_id = getattr(settings, "SAGEMAKER_ACCESS_KEY", None)
+        aws_secret_access_key = getattr(settings, "SAGEMAKER_SECRET_KEY", None)
+        region_name = getattr(settings, "SAGEMAKER_REGION", None)
         
         self.s3 = boto3.client(
             's3',
@@ -83,8 +86,6 @@ class S3Storage(BaseStorage):
         """
         Process a file using the provided processor function.
         
-        For S3 storage, we need to download the file to a temporary location first.
-        
         Args:
             path: Path to the file
             processor_func: Function that processes the file
@@ -94,21 +95,18 @@ class S3Storage(BaseStorage):
             The result of the processor function
         """
         import tempfile
-        import os
+        import logging
         
         if not self.file_exists(path):
-            raise FileNotFoundError(f"File not found: {path}")
+            raise FileNotFoundError(f"File not found in S3: {path}")
         
-        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
-            self.s3.download_fileobj(self.bucket_name, path, temp_file)
-            temp_path = temp_file.name
-        
-        try:
-            result = processor_func(file_path=temp_path, **kwargs)
-            return result
-        finally:
+        with tempfile.NamedTemporaryFile(suffix=os.path.splitext(path)[1], delete=True) as temp_file:
             try:
-                os.unlink(temp_path)
+                # Download the file from S3 to the temporary file
+                self.s3.download_fileobj(self.bucket_name, path, temp_file)
+                temp_file.flush()
+                result = processor_func(file_path=temp_file.name, **kwargs)
+                return result
             except Exception as e:
-                import logging
-                logging.warning(f"Failed to delete temporary file: {e}")
+                logging.error(f"Error processing S3 file {path}: {e}", exc_info=True)
+                raise

From 68e4cf4d1415fdb66d9dbaf93c30451115ea18df Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Thu, 17 Apr 2025 02:40:53 +0530
Subject: [PATCH 07/39] (feat:fsabstract) add factory class

---
 application/storage/storage_creator.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 application/storage/storage_creator.py

diff --git a/application/storage/storage_creator.py b/application/storage/storage_creator.py
new file mode 100644
index 00000000..dcf64983
--- /dev/null
+++ b/application/storage/storage_creator.py
@@ -0,0 +1,21 @@
+"""Storage factory for creating different storage implementations."""
+from typing import Dict, Type
+
+from application.storage.base import BaseStorage
+from application.storage.local import LocalStorage
+from application.storage.s3 import S3Storage
+
+
+class StorageCreator:
+    storages: Dict[str, Type[BaseStorage]] = {
+        "local": LocalStorage,
+        "s3": S3Storage,
+    }
+    
+    @classmethod
+    def create_storage(cls, type_name: str, *args, **kwargs) -> BaseStorage:
+        storage_class = cls.storages.get(type_name.lower())
+        if not storage_class:
+            raise ValueError(f"No storage implementation found for type {type_name}")
+        
+        return storage_class(*args, **kwargs)

From 0d3e6157cd487fcba1e59a63d496d40bf404a457 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Thu, 17 Apr 2025 16:23:01 +0530
Subject: [PATCH 08/39] (feat:attachmentUpload) parse content before upload

---
 application/worker.py | 65 ++++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 38 deletions(-)

diff --git a/application/worker.py b/application/worker.py
index d561a53f..b5caa23e 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -7,11 +7,12 @@ import io
 import datetime
 import mimetypes
 import requests
+import tempfile
 
 from collections import Counter
 from urllib.parse import urljoin
 
-from application.storage.storage_creator import StorageCreator 
+from application.storage.storage_creator import StorageCreator
 from application.utils import num_tokens_from_string
 from application.core.settings import settings
 from application.parser.file.bulk import SimpleDirectoryReader
@@ -209,7 +210,7 @@ def remote_worker(
     sync_frequency="never",
     operation_mode="upload",
     doc_id=None,
-):  
+):
     full_path = os.path.join(directory, user, name_job)
     if not os.path.exists(full_path):
         os.makedirs(full_path)
@@ -324,58 +325,48 @@ def attachment_worker(self, file_info, user):
     """
     Process and store a single attachment without vectorization.
     """
-    
+
     mongo = MongoDB.get_client()
     db = mongo["docsgpt"]
     attachments_collection = db["attachments"]
-    
+
     filename = file_info["filename"]
     attachment_id = file_info["attachment_id"]
     relative_path = file_info["path"]
     file_content = file_info["file_content"]
-    
+
     try:
         self.update_state(state="PROGRESS", meta={"current": 10})
-        
         storage_type = getattr(settings, "STORAGE_TYPE", "local")
         storage = StorageCreator.create_storage(storage_type)
-        
-        self.update_state(state="PROGRESS", meta={"current": 30, "status": "Saving file"})
-        file_obj = io.BytesIO(file_content)
-        storage.save_file(file_obj, relative_path)
-        
-        def process_document(file_path, **kwargs):
-            self.update_state(state="PROGRESS", meta={"current": 50, "status": "Processing content"})
-            
+        self.update_state(state="PROGRESS", meta={"current": 30, "status": "Processing content"})
+
+        with tempfile.NamedTemporaryFile(suffix=os.path.splitext(filename)[1]) as temp_file:
+            temp_file.write(file_content)
+            temp_file.flush()
             reader = SimpleDirectoryReader(
-                input_files=[file_path],
+                input_files=[temp_file.name],
                 exclude_hidden=True,
                 errors="ignore"
             )
             documents = reader.load_data()
-            
+
             if not documents:
                 logging.warning(f"No content extracted from file: {filename}")
                 raise ValueError(f"Failed to extract content from file: {filename}")
-            
+
             content = documents[0].text
             token_count = num_tokens_from_string(content)
-            
+
+            self.update_state(state="PROGRESS", meta={"current": 60, "status": "Saving file"})
+            file_obj = io.BytesIO(file_content)
+
+            metadata = storage.save_file(file_obj, relative_path)
+
             mime_type = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
-            
-            metadata = {
-                "storage_type": storage_type,
-            }
-            
-            if storage_type == "s3":
-                metadata.update({
-                    "bucket_name": getattr(storage, "bucket_name", "docsgpt-test-bucket"),
-                    "uri": f"s3://{storage.bucket_name}/{relative_path}",
-                    "region": getattr(settings, "SAGEMAKER_REGION", "us-east-1")
-                })
-            
+
             self.update_state(state="PROGRESS", meta={"current": 80, "status": "Storing in database"})
-            
+
             doc_id = ObjectId(attachment_id)
             attachments_collection.insert_one({
                 "_id": doc_id,
@@ -387,12 +378,12 @@ def attachment_worker(self, file_info, user):
                 "date": datetime.datetime.now(),
                 "metadata": metadata
             })
-            
-            logging.info(f"Stored attachment with ID: {attachment_id}", 
+
+            logging.info(f"Stored attachment with ID: {attachment_id}",
                         extra={"user": user})
-            
+
             self.update_state(state="PROGRESS", meta={"current": 100, "status": "Complete"})
-            
+
             return {
                 "filename": filename,
                 "path": relative_path,
@@ -401,9 +392,7 @@ def attachment_worker(self, file_info, user):
                 "mime_type": mime_type,
                 "metadata": metadata
             }
-            
-        return storage.process_file(relative_path, process_document)
-        
+
     except Exception as e:
         logging.error(f"Error processing file {filename}: {e}", extra={"user": user}, exc_info=True)
         raise

From c35d1cecfe41660fa6b95051c650c6f56d233d25 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Thu, 17 Apr 2025 16:29:34 +0530
Subject: [PATCH 09/39] (feat:file_abstract) return storage metadata after
 upload

---
 application/storage/base.py  | 43 ++++++++++++++-------------
 application/storage/local.py | 56 +++++++++++++++++-------------------
 application/storage/s3.py    | 46 +++++++++++++++++------------
 3 files changed, 77 insertions(+), 68 deletions(-)

diff --git a/application/storage/base.py b/application/storage/base.py
index 88fed0c6..273e7761 100644
--- a/application/storage/base.py
+++ b/application/storage/base.py
@@ -7,84 +7,87 @@ class BaseStorage(ABC):
     """Abstract base class for storage implementations."""
 
     @abstractmethod
-    def save_file(self, file_data: BinaryIO, path: str) -> str:
+    def save_file(self, file_data: BinaryIO, path: str) -> dict:
         """
         Save a file to storage.
-        
+
         Args:
             file_data: File-like object containing the data
             path: Path where the file should be stored
-            
+
         Returns:
-            str: The complete path where the file was saved
+            dict: A dictionary containing metadata about the saved file, including:
+                - 'path': The path where the file was saved
+                - 'storage_type': The type of storage (e.g., 'local', 's3')
+                - Other storage-specific metadata (e.g., 'uri', 'bucket_name', etc.)
         """
         pass
-    
+
     @abstractmethod
     def get_file(self, path: str) -> BinaryIO:
         """
         Retrieve a file from storage.
-        
+
         Args:
             path: Path to the file
-            
+
         Returns:
             BinaryIO: File-like object containing the file data
         """
         pass
-    
+
     @abstractmethod
     def process_file(self, path: str, processor_func: Callable, **kwargs):
         """
         Process a file using the provided processor function.
-        
+
         This method handles the details of retrieving the file and providing
         it to the processor function in an appropriate way based on the storage type.
-        
+
         Args:
             path: Path to the file
             processor_func: Function that processes the file
             **kwargs: Additional arguments to pass to the processor function
-            
+
         Returns:
             The result of the processor function
         """
         pass
-    
+
     @abstractmethod
     def delete_file(self, path: str) -> bool:
         """
         Delete a file from storage.
-        
+
         Args:
             path: Path to the file
-            
+
         Returns:
             bool: True if deletion was successful
         """
         pass
-    
+
     @abstractmethod
     def file_exists(self, path: str) -> bool:
         """
         Check if a file exists.
-        
+
         Args:
             path: Path to the file
-            
+
         Returns:
             bool: True if the file exists
         """
         pass
-    
+
     @abstractmethod
     def list_files(self, directory: str) -> List[str]:
         """
         List all files in a directory.
-        
+
         Args:
             directory: Directory path to list
-            
+
         Returns:
             List[str]: List of file paths
         """
diff --git a/application/storage/local.py b/application/storage/local.py
index 91c5c264..db11b63c 100644
--- a/application/storage/local.py
+++ b/application/storage/local.py
@@ -8,98 +8,96 @@ from application.storage.base import BaseStorage
 
 class LocalStorage(BaseStorage):
     """Local file system storage implementation."""
-    
+
     def __init__(self, base_dir: str = None):
         """
         Initialize local storage.
-        
+
         Args:
             base_dir: Base directory for all operations. If None, uses current directory.
         """
         self.base_dir = base_dir or os.path.dirname(
             os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
         )
-    
+
     def _get_full_path(self, path: str) -> str:
         """Get absolute path by combining base_dir and path."""
         if os.path.isabs(path):
             return path
         return os.path.join(self.base_dir, path)
-    
-    def save_file(self, file_data: BinaryIO, path: str) -> str:
+
+    def save_file(self, file_data: BinaryIO, path: str) -> dict:
         """Save a file to local storage."""
         full_path = self._get_full_path(path)
-        
-        # Ensure directory exists
+
         os.makedirs(os.path.dirname(full_path), exist_ok=True)
-        
-        # Write file
+
         if hasattr(file_data, 'save'):
-            # Handle Flask's FileStorage objects
             file_data.save(full_path)
         else:
-            # Handle regular file-like objects
             with open(full_path, 'wb') as f:
                 shutil.copyfileobj(file_data, f)
-        
-        return path
-    
+
+        return {
+            'storage_type': 'local'
+        }
+
     def get_file(self, path: str) -> BinaryIO:
         """Get a file from local storage."""
         full_path = self._get_full_path(path)
-        
+
         if not os.path.exists(full_path):
             raise FileNotFoundError(f"File not found: {full_path}")
-        
+
         return open(full_path, 'rb')
-    
+
     def delete_file(self, path: str) -> bool:
         """Delete a file from local storage."""
         full_path = self._get_full_path(path)
-        
+
         if not os.path.exists(full_path):
             return False
-        
+
         os.remove(full_path)
         return True
-    
+
     def file_exists(self, path: str) -> bool:
         """Check if a file exists in local storage."""
         full_path = self._get_full_path(path)
         return os.path.exists(full_path)
-    
+
     def list_files(self, directory: str) -> List[str]:
         """List all files in a directory in local storage."""
         full_path = self._get_full_path(directory)
-        
+
         if not os.path.exists(full_path):
             return []
-        
+
         result = []
         for root, _, files in os.walk(full_path):
             for file in files:
                 rel_path = os.path.relpath(os.path.join(root, file), self.base_dir)
                 result.append(rel_path)
-        
+
         return result
 
     def process_file(self, path: str, processor_func: Callable, **kwargs):
         """
         Process a file using the provided processor function.
-        
+
         For local storage, we can directly pass the full path to the processor.
-        
+
         Args:
             path: Path to the file
             processor_func: Function that processes the file
             **kwargs: Additional arguments to pass to the processor function
-            
+
         Returns:
             The result of the processor function
         """
         full_path = self._get_full_path(path)
-        
+
         if not os.path.exists(full_path):
             raise FileNotFoundError(f"File not found: {full_path}")
-        
+
         return processor_func(file_path=full_path, **kwargs)
diff --git a/application/storage/s3.py b/application/storage/s3.py
index e02a2a5a..e8df210e 100644
--- a/application/storage/s3.py
+++ b/application/storage/s3.py
@@ -12,43 +12,51 @@ from application.core.settings import settings
 
 class S3Storage(BaseStorage):
     """AWS S3 storage implementation."""
-    
+
     def __init__(self, bucket_name=None):
         """
         Initialize S3 storage.
-        
+
         Args:
             bucket_name: S3 bucket name (optional, defaults to settings)
         """
         self.bucket_name = bucket_name or getattr(settings, "S3_BUCKET_NAME", "docsgpt-test-bucket")
-        
+
         # Get credentials from settings
         aws_access_key_id = getattr(settings, "SAGEMAKER_ACCESS_KEY", None)
         aws_secret_access_key = getattr(settings, "SAGEMAKER_SECRET_KEY", None)
         region_name = getattr(settings, "SAGEMAKER_REGION", None)
-        
+
         self.s3 = boto3.client(
             's3',
             aws_access_key_id=aws_access_key_id,
             aws_secret_access_key=aws_secret_access_key,
             region_name=region_name
         )
-    
-    def save_file(self, file_data: BinaryIO, path: str) -> str:
+
+    def save_file(self, file_data: BinaryIO, path: str) -> dict:
         """Save a file to S3 storage."""
         self.s3.upload_fileobj(file_data, self.bucket_name, path)
-        return path
-    
+
+        region = getattr(settings, "SAGEMAKER_REGION", None)
+
+        return {
+            'storage_type': 's3',
+            'bucket_name': self.bucket_name,
+            'uri': f's3://{self.bucket_name}/{path}',
+            'region': region
+        }
+
     def get_file(self, path: str) -> BinaryIO:
         """Get a file from S3 storage."""
         if not self.file_exists(path):
             raise FileNotFoundError(f"File not found: {path}")
-        
+
         file_obj = io.BytesIO()
         self.s3.download_fileobj(self.bucket_name, path, file_obj)
         file_obj.seek(0)
         return file_obj
-    
+
     def delete_file(self, path: str) -> bool:
         """Delete a file from S3 storage."""
         try:
@@ -56,7 +64,7 @@ class S3Storage(BaseStorage):
             return True
         except ClientError:
             return False
-    
+
     def file_exists(self, path: str) -> bool:
         """Check if a file exists in S3 storage."""
         try:
@@ -64,42 +72,42 @@ class S3Storage(BaseStorage):
             return True
         except ClientError:
             return False
-    
+
     def list_files(self, directory: str) -> List[str]:
         """List all files in a directory in S3 storage."""
         # Ensure directory ends with a slash if it's not empty
         if directory and not directory.endswith('/'):
             directory += '/'
-            
+
         result = []
         paginator = self.s3.get_paginator('list_objects_v2')
         pages = paginator.paginate(Bucket=self.bucket_name, Prefix=directory)
-        
+
         for page in pages:
             if 'Contents' in page:
                 for obj in page['Contents']:
                     result.append(obj['Key'])
-                    
+
         return result
 
     def process_file(self, path: str, processor_func: Callable, **kwargs):
         """
         Process a file using the provided processor function.
-        
+
         Args:
             path: Path to the file
             processor_func: Function that processes the file
             **kwargs: Additional arguments to pass to the processor function
-            
+
         Returns:
             The result of the processor function
         """
         import tempfile
         import logging
-        
+
         if not self.file_exists(path):
             raise FileNotFoundError(f"File not found in S3: {path}")
-        
+
         with tempfile.NamedTemporaryFile(suffix=os.path.splitext(path)[1], delete=True) as temp_file:
             try:
                 # Download the file from S3 to the temporary file

From 335c21c48ac6785a792ea27fa86d10e6a5d41196 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Thu, 17 Apr 2025 16:36:40 +0530
Subject: [PATCH 10/39] (fix:attachment) dont calculate MIME again

---
 application/agents/llm_handler.py | 55 +++++++++++---------------
 application/llm/google_ai.py      | 62 +++++++++++++-----------------
 application/llm/openai.py         | 64 +++++++++++++++----------------
 3 files changed, 80 insertions(+), 101 deletions(-)

diff --git a/application/agents/llm_handler.py b/application/agents/llm_handler.py
index 7fe794f8..bf39f625 100644
--- a/application/agents/llm_handler.py
+++ b/application/agents/llm_handler.py
@@ -15,95 +15,86 @@ class LLMHandler(ABC):
     @abstractmethod
     def handle_response(self, agent, resp, tools_dict, messages, attachments=None, **kwargs):
         pass
-    
+
     def prepare_messages_with_attachments(self, agent, messages, attachments=None):
         """
         Prepare messages with attachment content if available.
-        
+
         Args:
             agent: The current agent instance.
             messages (list): List of message dictionaries.
             attachments (list): List of attachment dictionaries with content.
-            
+
         Returns:
             list: Messages with attachment context added to the system prompt.
         """
         if not attachments:
             return messages
-        
+
         logger.info(f"Preparing messages with {len(attachments)} attachments")
-        
+
         supported_types = agent.llm.get_supported_attachment_types()
-        
+
         supported_attachments = []
         unsupported_attachments = []
-        
+
         for attachment in attachments:
             mime_type = attachment.get('mime_type')
-            if not mime_type:
-                import mimetypes
-                file_path = attachment.get('path')
-                if file_path:
-                    mime_type = mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
-                else:
-                    unsupported_attachments.append(attachment)
-                    continue
-            
             if mime_type in supported_types:
                 supported_attachments.append(attachment)
             else:
                 unsupported_attachments.append(attachment)
-        
+
         # Process supported attachments with the LLM's custom method
         prepared_messages = messages
         if supported_attachments:
             logger.info(f"Processing {len(supported_attachments)} supported attachments with {agent.llm.__class__.__name__}'s method")
             prepared_messages = agent.llm.prepare_messages_with_attachments(messages, supported_attachments)
-        
+
         # Process unsupported attachments with the default method
         if unsupported_attachments:
             logger.info(f"Processing {len(unsupported_attachments)} unsupported attachments with default method")
             prepared_messages = self._append_attachment_content_to_system(prepared_messages, unsupported_attachments)
-            
+
         return prepared_messages
-    
+
     def _append_attachment_content_to_system(self, messages, attachments):
         """
         Default method to append attachment content to the system prompt.
-        
+
         Args:
             messages (list): List of message dictionaries.
             attachments (list): List of attachment dictionaries with content.
-            
+
         Returns:
             list: Messages with attachment context added to the system prompt.
         """
         prepared_messages = messages.copy()
-        
+
         attachment_texts = []
         for attachment in attachments:
             logger.info(f"Adding attachment {attachment.get('id')} to context")
             if 'content' in attachment:
                 attachment_texts.append(f"Attached file content:\n\n{attachment['content']}")
-        
+
         if attachment_texts:
             combined_attachment_text = "\n\n".join(attachment_texts)
-            
+
             system_found = False
             for i in range(len(prepared_messages)):
                 if prepared_messages[i].get("role") == "system":
                     prepared_messages[i]["content"] += f"\n\n{combined_attachment_text}"
                     system_found = True
                     break
-            
+
             if not system_found:
                 prepared_messages.insert(0, {"role": "system", "content": combined_attachment_text})
-        
+
         return prepared_messages
 
 class OpenAILLMHandler(LLMHandler):
     def handle_response(self, agent, resp, tools_dict, messages, attachments=None, stream: bool = True):
-        
+
         messages = self.prepare_messages_with_attachments(agent, messages, attachments)
         logger.info(f"Messages with attachments: {messages}")
         if not stream:
@@ -167,7 +158,7 @@ class OpenAILLMHandler(LLMHandler):
                     if isinstance(chunk, str) and len(chunk) > 0:
                         yield chunk
                         continue
-                    elif hasattr(chunk, "delta"): 
+                    elif hasattr(chunk, "delta"):
                         chunk_delta = chunk.delta
 
                         if (
@@ -258,7 +249,7 @@ class OpenAILLMHandler(LLMHandler):
                             return resp
                     elif isinstance(chunk, str) and len(chunk) == 0:
                             continue
-                
+
                 logger.info(f"Regenerating with messages: {messages}")
                 resp = agent.llm.gen_stream(
                     model=agent.gpt_model, messages=messages, tools=agent.tools
@@ -269,9 +260,9 @@ class OpenAILLMHandler(LLMHandler):
 class GoogleLLMHandler(LLMHandler):
     def handle_response(self, agent, resp, tools_dict, messages, attachments=None, stream: bool = True):
         from google.genai import types
-        
+
         messages = self.prepare_messages_with_attachments(agent, messages, attachments)
-        
+
         while True:
             if not stream:
                 response = agent.llm.gen(
diff --git a/application/llm/google_ai.py b/application/llm/google_ai.py
index c049eaa2..6d709ec2 100644
--- a/application/llm/google_ai.py
+++ b/application/llm/google_ai.py
@@ -18,7 +18,7 @@ class GoogleLLM(BaseLLM):
     def get_supported_attachment_types(self):
         """
         Return a list of MIME types supported by Google Gemini for file uploads.
-        
+
         Returns:
             list: List of supported MIME types
         """
@@ -30,35 +30,35 @@ class GoogleLLM(BaseLLM):
             'image/webp',
             'image/gif'
         ]
-    
+
     def prepare_messages_with_attachments(self, messages, attachments=None):
         """
         Process attachments using Google AI's file API for more efficient handling.
-        
+
         Args:
             messages (list): List of message dictionaries.
             attachments (list): List of attachment dictionaries with content and metadata.
-            
+
         Returns:
             list: Messages formatted with file references for Google AI API.
         """
         if not attachments:
             return messages
-        
+
         prepared_messages = messages.copy()
-        
+
         # Find the user message to attach files to the last one
         user_message_index = None
         for i in range(len(prepared_messages) - 1, -1, -1):
             if prepared_messages[i].get("role") == "user":
                 user_message_index = i
                 break
-        
+
         if user_message_index is None:
             user_message = {"role": "user", "content": []}
             prepared_messages.append(user_message)
             user_message_index = len(prepared_messages) - 1
-        
+
         if isinstance(prepared_messages[user_message_index].get("content"), str):
             text_content = prepared_messages[user_message_index]["content"]
             prepared_messages[user_message_index]["content"] = [
@@ -66,15 +66,11 @@ class GoogleLLM(BaseLLM):
             ]
         elif not isinstance(prepared_messages[user_message_index].get("content"), list):
             prepared_messages[user_message_index]["content"] = []
-        
+
         files = []
         for attachment in attachments:
             mime_type = attachment.get('mime_type')
-            if not mime_type:
-                file_path = attachment.get('path')
-                if file_path:
-                    mime_type = mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
-            
+
             if mime_type in self.get_supported_attachment_types():
                 try:
                     file_uri = self._upload_file_to_google(attachment)
@@ -84,53 +80,49 @@ class GoogleLLM(BaseLLM):
                     logging.error(f"GoogleLLM: Error uploading file: {e}")
                     if 'content' in attachment:
                         prepared_messages[user_message_index]["content"].append({
-                            "type": "text", 
+                            "type": "text",
                             "text": f"[File could not be processed: {attachment.get('path', 'unknown')}]"
                         })
-        
+
         if files:
             logging.info(f"GoogleLLM: Adding {len(files)} files to message")
             prepared_messages[user_message_index]["content"].append({
                 "files": files
             })
-        
+
         return prepared_messages
 
     def _upload_file_to_google(self, attachment):
         """
         Upload a file to Google AI and return the file URI.
-        
+
         Args:
             attachment (dict): Attachment dictionary with path and metadata.
-                
+
         Returns:
             str: Google AI file URI for the uploaded file.
         """
         if 'google_file_uri' in attachment:
             return attachment['google_file_uri']
-        
+
         file_path = attachment.get('path')
         if not file_path:
             raise ValueError("No file path provided in attachment")
-        
+
         if not os.path.isabs(file_path):
             current_dir = os.path.dirname(
                 os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
             )
             file_path = os.path.join(current_dir, "application", file_path)
-        
+
         if not os.path.exists(file_path):
             raise FileNotFoundError(f"File not found: {file_path}")
-        
-        mime_type = attachment.get('mime_type')
-        if not mime_type:
-            mime_type = mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
-        
+
         try:
             response = self.client.files.upload(file=file_path)
-            
+
             file_uri = response.uri
-            
+
             from application.core.mongo_db import MongoDB
             mongo = MongoDB.get_client()
             db = mongo["docsgpt"]
@@ -140,7 +132,7 @@ class GoogleLLM(BaseLLM):
                     {"_id": attachment['_id']},
                     {"$set": {"google_file_uri": file_uri}}
                 )
-            
+
             return file_uri
         except Exception as e:
             logging.error(f"Error uploading file to Google AI: {e}")
@@ -289,7 +281,7 @@ class GoogleLLM(BaseLLM):
         if tools:
             cleaned_tools = self._clean_tools_format(tools)
             config.tools = cleaned_tools
-        
+
         # Check if we have both tools and file attachments
         has_attachments = False
         for message in messages:
@@ -299,16 +291,16 @@ class GoogleLLM(BaseLLM):
                     break
             if has_attachments:
                 break
-        
+
         logging.info(f"GoogleLLM: Starting stream generation. Model: {model}, Messages: {json.dumps(messages, default=str)}, Has attachments: {has_attachments}")
-        
+
         response = client.models.generate_content_stream(
             model=model,
             contents=messages,
             config=config,
         )
-        
-        
+
+
         for chunk in response:
             if hasattr(chunk, "candidates") and chunk.candidates:
                 for candidate in chunk.candidates:
diff --git a/application/llm/openai.py b/application/llm/openai.py
index 75bd37e0..b3e179c1 100644
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -149,7 +149,7 @@ class OpenAILLM(BaseLLM):
     def get_supported_attachment_types(self):
         """
         Return a list of MIME types supported by OpenAI for file uploads.
-        
+
         Returns:
             list: List of supported MIME types
         """
@@ -161,35 +161,35 @@ class OpenAILLM(BaseLLM):
             'image/webp',
             'image/gif'
         ]
-    
+
     def prepare_messages_with_attachments(self, messages, attachments=None):
         """
         Process attachments using OpenAI's file API for more efficient handling.
-        
+
         Args:
             messages (list): List of message dictionaries.
             attachments (list): List of attachment dictionaries with content and metadata.
-            
+
         Returns:
             list: Messages formatted with file references for OpenAI API.
         """
         if not attachments:
             return messages
-        
+
         prepared_messages = messages.copy()
-        
+
         # Find the user message to attach file_id to the last one
         user_message_index = None
         for i in range(len(prepared_messages) - 1, -1, -1):
             if prepared_messages[i].get("role") == "user":
                 user_message_index = i
                 break
-        
+
         if user_message_index is None:
             user_message = {"role": "user", "content": []}
             prepared_messages.append(user_message)
             user_message_index = len(prepared_messages) - 1
-        
+
         if isinstance(prepared_messages[user_message_index].get("content"), str):
             text_content = prepared_messages[user_message_index]["content"]
             prepared_messages[user_message_index]["content"] = [
@@ -197,14 +197,10 @@ class OpenAILLM(BaseLLM):
             ]
         elif not isinstance(prepared_messages[user_message_index].get("content"), list):
             prepared_messages[user_message_index]["content"] = []
-        
+
         for attachment in attachments:
             mime_type = attachment.get('mime_type')
-            if not mime_type:
-                file_path = attachment.get('path')
-                if file_path:
-                    mime_type = mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
-            
+
             if mime_type and mime_type.startswith('image/'):
                 try:
                     base64_image = self._get_base64_image(attachment)
@@ -218,14 +214,14 @@ class OpenAILLM(BaseLLM):
                     logging.error(f"Error processing image attachment: {e}")
                     if 'content' in attachment:
                         prepared_messages[user_message_index]["content"].append({
-                            "type": "text", 
+                            "type": "text",
                             "text": f"[Image could not be processed: {attachment.get('path', 'unknown')}]"
                         })
             # Handle PDFs using the file API
             elif mime_type == 'application/pdf':
                 try:
                     file_id = self._upload_file_to_openai(attachment)
-                    
+
                     prepared_messages[user_message_index]["content"].append({
                         "type": "file",
                         "file": {"file_id": file_id}
@@ -234,80 +230,80 @@ class OpenAILLM(BaseLLM):
                     logging.error(f"Error uploading PDF to OpenAI: {e}")
                     if 'content' in attachment:
                         prepared_messages[user_message_index]["content"].append({
-                            "type": "text", 
+                            "type": "text",
                             "text": f"File content:\n\n{attachment['content']}"
                         })
-        
+
         return prepared_messages
 
     def _get_base64_image(self, attachment):
         """
         Convert an image file to base64 encoding.
-        
+
         Args:
             attachment (dict): Attachment dictionary with path and metadata.
-            
+
         Returns:
             str: Base64-encoded image data.
         """
         file_path = attachment.get('path')
         if not file_path:
             raise ValueError("No file path provided in attachment")
-        
+
         if not os.path.isabs(file_path):
             current_dir = os.path.dirname(
                 os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
             )
             file_path = os.path.join(current_dir, "application", file_path)
-        
+
         if not os.path.exists(file_path):
             raise FileNotFoundError(f"File not found: {file_path}")
-        
+
         with open(file_path, "rb") as image_file:
             return base64.b64encode(image_file.read()).decode('utf-8')
 
     def _upload_file_to_openai(self, attachment): ##pdfs
         """
         Upload a file to OpenAI and return the file_id.
-        
+
         Args:
             attachment (dict): Attachment dictionary with path and metadata.
                 Expected keys:
                 - path: Path to the file
                 - id: Optional MongoDB ID for caching
-                
+
         Returns:
             str: OpenAI file_id for the uploaded file.
         """
         import os
         import logging
-        
+
         if 'openai_file_id' in attachment:
             return attachment['openai_file_id']
-        
+
         file_path = attachment.get('path')
         if not file_path:
             raise ValueError("No file path provided in attachment")
-        
+
         if not os.path.isabs(file_path):
             current_dir = os.path.dirname(
                 os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
             )
             file_path = os.path.join(current_dir,"application", file_path)
-        
+
         if not os.path.exists(file_path):
             raise FileNotFoundError(f"File not found: {file_path}")
 
-        
+
         try:
             with open(file_path, 'rb') as file:
                 response = self.client.files.create(
                     file=file,
                     purpose="assistants"
                 )
-            
+
             file_id = response.id
-            
+
             from application.core.mongo_db import MongoDB
             mongo = MongoDB.get_client()
             db = mongo["docsgpt"]
@@ -317,7 +313,7 @@ class OpenAILLM(BaseLLM):
                     {"_id": attachment['_id']},
                     {"$set": {"openai_file_id": file_id}}
                 )
-            
+
             return file_id
         except Exception as e:
             logging.error(f"Error uploading file to OpenAI: {e}")
@@ -327,7 +323,7 @@ class OpenAILLM(BaseLLM):
 class AzureOpenAILLM(OpenAILLM):
 
     def __init__(
-        self, api_key, user_api_key, *args, **kwargs 
+        self, api_key, user_api_key, *args, **kwargs
     ):
 
         super().__init__(api_key)

From 5aa51f5f3620520002e3b20c03b0a3eddf1b3e39 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Fri, 18 Apr 2025 01:27:21 +0530
Subject: [PATCH 11/39] (feat:file_abstract) openai attachments comply

---
 application/llm/openai.py | 46 +++++++++++++++------------------------
 1 file changed, 18 insertions(+), 28 deletions(-)

diff --git a/application/llm/openai.py b/application/llm/openai.py
index b3e179c1..f36e87cb 100644
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -6,6 +6,7 @@ import logging
 
 from application.core.settings import settings
 from application.llm.base import BaseLLM
+from application.storage.storage_creator import StorageCreator
 
 
 class OpenAILLM(BaseLLM):
@@ -20,6 +21,7 @@ class OpenAILLM(BaseLLM):
             self.client = OpenAI(api_key=api_key)
         self.api_key = api_key
         self.user_api_key = user_api_key
+        self.storage = StorageCreator.create_storage(getattr(settings, "STORAGE_TYPE", "local"))
 
     def _clean_messages_openai(self, messages):
         cleaned_messages = []
@@ -250,19 +252,13 @@ class OpenAILLM(BaseLLM):
         if not file_path:
             raise ValueError("No file path provided in attachment")
 
-        if not os.path.isabs(file_path):
-            current_dir = os.path.dirname(
-                os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-            )
-            file_path = os.path.join(current_dir, "application", file_path)
-
-        if not os.path.exists(file_path):
+        try:
+            with self.storage.get_file(file_path) as image_file:
+                return base64.b64encode(image_file.read()).decode('utf-8')
+        except FileNotFoundError:
             raise FileNotFoundError(f"File not found: {file_path}")
 
-        with open(file_path, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode('utf-8')
-
-    def _upload_file_to_openai(self, attachment): ##pdfs
+    def _upload_file_to_openai(self, attachment):
         """
         Upload a file to OpenAI and return the file_id.
 
@@ -275,34 +271,28 @@ class OpenAILLM(BaseLLM):
         Returns:
             str: OpenAI file_id for the uploaded file.
         """
-        import os
         import logging
 
         if 'openai_file_id' in attachment:
             return attachment['openai_file_id']
 
         file_path = attachment.get('path')
-        if not file_path:
-            raise ValueError("No file path provided in attachment")
 
-        if not os.path.isabs(file_path):
-            current_dir = os.path.dirname(
-                os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-            )
-            file_path = os.path.join(current_dir,"application", file_path)
-
-        if not os.path.exists(file_path):
+        if not self.storage.file_exists(file_path):
             raise FileNotFoundError(f"File not found: {file_path}")
 
-
         try:
-            with open(file_path, 'rb') as file:
-                response = self.client.files.create(
-                    file=file,
-                    purpose="assistants"
-                )
+            # Use storage's process_file method to handle the file appropriately
+            def upload_to_openai(file_path, **kwargs):
+                with open(file_path, 'rb') as file:
+                    logging.info(f"Uploading file to OpenAI: {file_path}")
+                    response = self.client.files.create(
+                        file=file,
+                        purpose="assistants"
+                    )
+                    return response.id
 
-            file_id = response.id
+            file_id = self.storage.process_file(file_path, upload_to_openai)
 
             from application.core.mongo_db import MongoDB
             mongo = MongoDB.get_client()

From c8efef8f04c08821657dc5427b7b21354d058ff7 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Fri, 18 Apr 2025 18:27:02 +0530
Subject: [PATCH 12/39] (fix:openai) image uplads, use lambda in process_files

---
 application/llm/openai.py    | 23 +++++++++--------------
 application/storage/local.py |  2 +-
 application/storage/s3.py    | 10 +++++-----
 3 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/application/llm/openai.py b/application/llm/openai.py
index f36e87cb..87eb295b 100644
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -1,7 +1,5 @@
 import json
 import base64
-import os
-import mimetypes
 import logging
 
 from application.core.settings import settings
@@ -79,6 +77,8 @@ class OpenAILLM(BaseLLM):
                                 content_parts.append(item)
                             elif "type" in item and item["type"] == "file" and "file" in item:
                                 content_parts.append(item)
+                            elif "type" in item and item["type"] == "image_url" and "image_url" in item:
+                                content_parts.append(item)
                             cleaned_messages.append({"role": role, "content": content_parts})
                         else:
                             raise ValueError(
@@ -223,7 +223,6 @@ class OpenAILLM(BaseLLM):
             elif mime_type == 'application/pdf':
                 try:
                     file_id = self._upload_file_to_openai(attachment)
-
                     prepared_messages[user_message_index]["content"].append({
                         "type": "file",
                         "file": {"file_id": file_id}
@@ -282,17 +281,13 @@ class OpenAILLM(BaseLLM):
             raise FileNotFoundError(f"File not found: {file_path}")
 
         try:
-            # Use storage's process_file method to handle the file appropriately
-            def upload_to_openai(file_path, **kwargs):
-                with open(file_path, 'rb') as file:
-                    logging.info(f"Uploading file to OpenAI: {file_path}")
-                    response = self.client.files.create(
-                        file=file,
-                        purpose="assistants"
-                    )
-                    return response.id
-
-            file_id = self.storage.process_file(file_path, upload_to_openai)
+            file_id = self.storage.process_file(
+                file_path,
+                lambda local_path, **kwargs: self.client.files.create(
+                    file=open(local_path, 'rb'),
+                    purpose="assistants"
+                ).id
+            )
 
             from application.core.mongo_db import MongoDB
             mongo = MongoDB.get_client()
diff --git a/application/storage/local.py b/application/storage/local.py
index db11b63c..fb21f08d 100644
--- a/application/storage/local.py
+++ b/application/storage/local.py
@@ -100,4 +100,4 @@ class LocalStorage(BaseStorage):
         if not os.path.exists(full_path):
             raise FileNotFoundError(f"File not found: {full_path}")
 
-        return processor_func(file_path=full_path, **kwargs)
+        return processor_func(local_path=full_path, **kwargs)
diff --git a/application/storage/s3.py b/application/storage/s3.py
index e8df210e..abc57c6d 100644
--- a/application/storage/s3.py
+++ b/application/storage/s3.py
@@ -98,23 +98,23 @@ class S3Storage(BaseStorage):
             path: Path to the file
             processor_func: Function that processes the file
             **kwargs: Additional arguments to pass to the processor function
-
+        
         Returns:
             The result of the processor function
         """
         import tempfile
         import logging
-
+        
         if not self.file_exists(path):
             raise FileNotFoundError(f"File not found in S3: {path}")
-
+        
         with tempfile.NamedTemporaryFile(suffix=os.path.splitext(path)[1], delete=True) as temp_file:
             try:
                 # Download the file from S3 to the temporary file
                 self.s3.download_fileobj(self.bucket_name, path, temp_file)
                 temp_file.flush()
-                result = processor_func(file_path=temp_file.name, **kwargs)
-                return result
+                
+                return processor_func(local_path=temp_file.name, **kwargs)
             except Exception as e:
                 logging.error(f"Error processing S3 file {path}: {e}", exc_info=True)
                 raise

From c50ff6faa386c992b7e0875a18aa6226203997fe Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Fri, 18 Apr 2025 21:03:28 +0530
Subject: [PATCH 13/39] (feat:fs abstract) googleLLM class

---
 application/llm/google_ai.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/application/llm/google_ai.py b/application/llm/google_ai.py
index 6d709ec2..0cfda686 100644
--- a/application/llm/google_ai.py
+++ b/application/llm/google_ai.py
@@ -1,11 +1,11 @@
 from google import genai
 from google.genai import types
-import os
 import logging
-import mimetypes
 import json
 
 from application.llm.base import BaseLLM
+from application.storage.storage_creator import StorageCreator
+from application.core.settings import settings
 
 
 class GoogleLLM(BaseLLM):
@@ -14,6 +14,7 @@ class GoogleLLM(BaseLLM):
         self.api_key = api_key
         self.user_api_key = user_api_key
         self.client = genai.Client(api_key=self.api_key)
+        self.storage = StorageCreator.create_storage(getattr(settings, "STORAGE_TYPE", "local"))
 
     def get_supported_attachment_types(self):
         """
@@ -109,19 +110,14 @@ class GoogleLLM(BaseLLM):
         if not file_path:
             raise ValueError("No file path provided in attachment")
 
-        if not os.path.isabs(file_path):
-            current_dir = os.path.dirname(
-                os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-            )
-            file_path = os.path.join(current_dir, "application", file_path)
-
-        if not os.path.exists(file_path):
+        if not self.storage.file_exists(file_path):
             raise FileNotFoundError(f"File not found: {file_path}")
 
         try:
-            response = self.client.files.upload(file=file_path)
-
-            file_uri = response.uri
+            file_uri = self.storage.process_file(
+                file_path,
+                lambda local_path, **kwargs: self.client.files.upload(file=local_path).uri
+            )
 
             from application.core.mongo_db import MongoDB
             mongo = MongoDB.get_client()

From 38476cfeb8dcc6aff8e3be97654e411131820bf9 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Tue, 22 Apr 2025 00:57:57 +0530
Subject: [PATCH 14/39] (gfeat:storage) get storage instance based on settings

---
 application/storage/storage_creator.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/application/storage/storage_creator.py b/application/storage/storage_creator.py
index dcf64983..3eca2f47 100644
--- a/application/storage/storage_creator.py
+++ b/application/storage/storage_creator.py
@@ -4,6 +4,7 @@ from typing import Dict, Type
 from application.storage.base import BaseStorage
 from application.storage.local import LocalStorage
 from application.storage.s3 import S3Storage
+from application.core.settings import settings
 
 
 class StorageCreator:
@@ -12,6 +13,16 @@ class StorageCreator:
         "s3": S3Storage,
     }
     
+    _instance = None
+    
+    @classmethod
+    def get_storage(cls) -> BaseStorage:
+        if cls._instance is None:
+            storage_type = getattr(settings, "STORAGE_TYPE", "local")
+            cls._instance = cls.create_storage(storage_type)
+        
+        return cls._instance
+    
     @classmethod
     def create_storage(cls, type_name: str, *args, **kwargs) -> BaseStorage:
         storage_class = cls.storages.get(type_name.lower())

From 0a31ddaae6f449f3160589208bcfaffe3ee9913d Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Tue, 22 Apr 2025 01:41:53 +0530
Subject: [PATCH 15/39] (feat:storage) use get storage

---
 application/llm/google_ai.py | 3 +--
 application/llm/openai.py    | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/application/llm/google_ai.py b/application/llm/google_ai.py
index 0cfda686..06dbbdfd 100644
--- a/application/llm/google_ai.py
+++ b/application/llm/google_ai.py
@@ -5,7 +5,6 @@ import json
 
 from application.llm.base import BaseLLM
 from application.storage.storage_creator import StorageCreator
-from application.core.settings import settings
 
 
 class GoogleLLM(BaseLLM):
@@ -14,7 +13,7 @@ class GoogleLLM(BaseLLM):
         self.api_key = api_key
         self.user_api_key = user_api_key
         self.client = genai.Client(api_key=self.api_key)
-        self.storage = StorageCreator.create_storage(getattr(settings, "STORAGE_TYPE", "local"))
+        self.storage = StorageCreator.get_storage()
 
     def get_supported_attachment_types(self):
         """
diff --git a/application/llm/openai.py b/application/llm/openai.py
index 87eb295b..e8df92dd 100644
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -19,7 +19,7 @@ class OpenAILLM(BaseLLM):
             self.client = OpenAI(api_key=api_key)
         self.api_key = api_key
         self.user_api_key = user_api_key
-        self.storage = StorageCreator.create_storage(getattr(settings, "STORAGE_TYPE", "local"))
+        self.storage = StorageCreator.get_storage()
 
     def _clean_messages_openai(self, messages):
         cleaned_messages = []

From 64c42f0ddf75c0e0b9cca77050278b02672927c5 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Tue, 22 Apr 2025 05:18:07 +0530
Subject: [PATCH 16/39] (feat:storage) file, indexes uploads

---
 application/api/internal/routes.py |  20 ++--
 application/api/user/routes.py     |  84 ++++++++---------
 application/worker.py              | 144 ++++++++++++++++++-----------
 3 files changed, 142 insertions(+), 106 deletions(-)

diff --git a/application/api/internal/routes.py b/application/api/internal/routes.py
index c8e32d11..e95b6327 100755
--- a/application/api/internal/routes.py
+++ b/application/api/internal/routes.py
@@ -6,7 +6,7 @@ from bson.objectid import ObjectId
 
 from application.core.mongo_db import MongoDB
 from application.core.settings import settings
-
+from application.storage.storage_creator import StorageCreator
 mongo = MongoDB.get_client()
 db = mongo["docsgpt"]
 conversations_collection = db["conversations"]
@@ -45,7 +45,8 @@ def upload_index_files():
     remote_data = request.form["remote_data"] if "remote_data" in request.form else None
     sync_frequency = secure_filename(request.form["sync_frequency"]) if "sync_frequency" in request.form else None
 
-    save_dir = os.path.join(current_dir, "indexes", str(id))
+    storage = StorageCreator.create_storage(settings.STORAGE_TYPE)
+    
     if settings.VECTOR_STORE == "faiss":
         if "file_faiss" not in request.files:
             print("No file part")
@@ -59,12 +60,13 @@ def upload_index_files():
         file_pkl = request.files["file_pkl"]
         if file_pkl.filename == "":
             return {"status": "no file name"}
-        # saves index files
-
-        if not os.path.exists(save_dir):
-            os.makedirs(save_dir)
-        file_faiss.save(os.path.join(save_dir, "index.faiss"))
-        file_pkl.save(os.path.join(save_dir, "index.pkl"))
+        
+        # Save index files
+        storage_path_faiss = f"indexes/{str(id)}/index.faiss"
+        storage_path_pkl = f"indexes/{str(id)}/index.pkl"
+        
+        storage.save_file(file_faiss, storage_path_faiss)
+        storage.save_file(file_pkl, storage_path_pkl)
 
     existing_entry = sources_collection.find_one({"_id": ObjectId(id)})
     if existing_entry:
@@ -82,6 +84,7 @@ def upload_index_files():
                     "retriever": retriever,
                     "remote_data": remote_data,
                     "sync_frequency": sync_frequency,
+                    "storage_type": settings.STORAGE_TYPE,
                 }
             },
         )
@@ -99,6 +102,7 @@ def upload_index_files():
                 "retriever": retriever,
                 "remote_data": remote_data,
                 "sync_frequency": sync_frequency,
+                "storage_type": settings.STORAGE_TYPE,
             }
         )
     return {"status": "ok"}
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 9e97e2ab..b7d79128 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -4,6 +4,7 @@ import math
 import os
 import shutil
 import uuid
+import tempfile
 
 from bson.binary import Binary, UuidRepresentation
 from bson.dbref import DBRef
@@ -21,6 +22,7 @@ from application.extensions import api
 from application.tts.google_tts import GoogleTTS
 from application.utils import check_required_fields, validate_function_name
 from application.vectorstore.vector_creator import VectorCreator
+from application.storage.storage_creator import StorageCreator
 
 mongo = MongoDB.get_client()
 db = mongo["docsgpt"]
@@ -413,54 +415,50 @@ class UploadFile(Resource):
 
         user = secure_filename(decoded_token.get("sub"))
         job_name = secure_filename(request.form["name"])
+        storage = StorageCreator.get_storage()
+        
         try:
-            save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
-            os.makedirs(save_dir, exist_ok=True)
-
             if len(files) > 1:
-                temp_dir = os.path.join(save_dir, "temp")
-                os.makedirs(temp_dir, exist_ok=True)
-
-                for file in files:
-                    filename = secure_filename(file.filename)
-                    file.save(os.path.join(temp_dir, filename))
-                    print(f"Saved file: {filename}")
-                zip_path = shutil.make_archive(
-                    base_name=os.path.join(save_dir, job_name),
-                    format="zip",
-                    root_dir=temp_dir,
-                )
-                final_filename = os.path.basename(zip_path)
-                shutil.rmtree(temp_dir)
-                task = ingest.delay(
-                    settings.UPLOAD_FOLDER,
-                    [
-                        ".rst",
-                        ".md",
-                        ".pdf",
-                        ".txt",
-                        ".docx",
-                        ".csv",
-                        ".epub",
-                        ".html",
-                        ".mdx",
-                        ".json",
-                        ".xlsx",
-                        ".pptx",
-                        ".png",
-                        ".jpg",
-                        ".jpeg",
-                    ],
-                    job_name,
-                    final_filename,
-                    user,
-                )
+                temp_dir = tempfile.mkdtemp()
+                try:
+                    for file in files:
+                        filename = secure_filename(file.filename)
+                        file.save(os.path.join(temp_dir, filename))
+                    
+                    zip_path = os.path.join(temp_dir, f"{job_name}.zip")
+                    shutil.make_archive(
+                        base_name=os.path.join(temp_dir, job_name),
+                        format="zip",
+                        root_dir=temp_dir,
+                        base_dir="."
+                    )
+                    
+                    final_filename = f"{job_name}.zip"
+                    relative_path = f"{settings.UPLOAD_FOLDER}/{user}/{job_name}/{final_filename}"
+                    
+                    with open(zip_path, 'rb') as zip_file:
+                        storage.save_file(zip_file, relative_path)
+                    
+                    task = ingest.delay(
+                        relative_path,
+                        [
+                            ".rst", ".md", ".pdf", ".txt", ".docx", ".csv", 
+                            ".epub", ".html", ".mdx", ".json", ".xlsx", 
+                            ".pptx", ".png", ".jpg", ".jpeg",
+                        ],
+                        job_name,
+                        final_filename,
+                        user,
+                    )
+                finally:
+                    shutil.rmtree(temp_dir)
             else:
                 file = files[0]
                 final_filename = secure_filename(file.filename)
-                file_path = os.path.join(save_dir, final_filename)
-                file.save(file_path)
-
+                relative_path = f"{settings.UPLOAD_FOLDER}/{user}/{job_name}/{final_filename}"
+                
+                storage.save_file(file, relative_path)
+                
                 task = ingest.delay(
                     settings.UPLOAD_FOLDER,
                     [
diff --git a/application/worker.py b/application/worker.py
index b5caa23e..f8076260 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -133,71 +133,105 @@ def ingest_worker(
     limit = None
     exclude = True
     sample = False
-    full_path = os.path.join(directory, user, name_job)
-
-    logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job})
-    file_data = {"name": name_job, "file": filename, "user": user}
 
+    storage = StorageCreator.create_storage(settings.STORAGE_TYPE)
+    temp_dir = tempfile.mkdtemp()
+    full_path = os.path.join(temp_dir, name_job)
+    
     if not os.path.exists(full_path):
         os.makedirs(full_path)
-    download_file(urljoin(settings.API_URL, "/api/download"), file_data, os.path.join(full_path, filename))
 
-    # check if file is .zip and extract it
-    if filename.endswith(".zip"):
-        extract_zip_recursive(
-            os.path.join(full_path, filename), full_path, 0, RECURSION_DEPTH
+    logging.info(f"Ingest file: {directory}/{user}/{name_job}/{filename}", extra={"user": user, "job": name_job})
+    file_data = {"name": name_job, "file": filename, "user": user}
+    
+    try:
+        file_path = f"{directory}/{user}/{name_job}/{filename}"
+        
+        try:
+            file_obj = storage.get_file(file_path)
+            
+            local_file_path = os.path.join(full_path, filename)
+            with open(local_file_path, 'wb') as f:
+                shutil.copyfileobj(file_obj, f)
+    
+            # check if file is .zip and extract it
+            if filename.endswith(".zip"):
+                extract_zip_recursive(
+                    os.path.join(full_path, filename), full_path, 0, RECURSION_DEPTH
+                )
+        except FileNotFoundError as e:
+            logging.error(f"File not found in storage: {file_path}")
+            raise FileNotFoundError(f"File not found: {file_path}") from e
+
+        self.update_state(state="PROGRESS", meta={"current": 1})
+
+        raw_docs = SimpleDirectoryReader(
+            input_dir=full_path,
+            input_files=input_files,
+            recursive=recursive,
+            required_exts=formats,
+            num_files_limit=limit,
+            exclude_hidden=exclude,
+            file_metadata=metadata_from_filename,
+        ).load_data()
+
+        chunker = Chunker(
+            chunking_strategy="classic_chunk",
+            max_tokens=MAX_TOKENS,
+            min_tokens=MIN_TOKENS,
+            duplicate_headers=False
         )
+        raw_docs = chunker.chunk(documents=raw_docs)
 
-    self.update_state(state="PROGRESS", meta={"current": 1})
+        docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
+        id = ObjectId()
 
-    raw_docs = SimpleDirectoryReader(
-        input_dir=full_path,
-        input_files=input_files,
-        recursive=recursive,
-        required_exts=formats,
-        num_files_limit=limit,
-        exclude_hidden=exclude,
-        file_metadata=metadata_from_filename,
-    ).load_data()
+        vector_dir = os.path.join(temp_dir, "vector_store")
+        os.makedirs(vector_dir, exist_ok=True)
+        
+        embed_and_store_documents(docs, vector_dir, str(id), self)
+        tokens = count_tokens_docs(docs)
+        self.update_state(state="PROGRESS", meta={"current": 100})
 
-    chunker = Chunker(
-        chunking_strategy="classic_chunk",
-        max_tokens=MAX_TOKENS,
-        min_tokens=MIN_TOKENS,
-        duplicate_headers=False
-    )
-    raw_docs = chunker.chunk(documents=raw_docs)
+        if sample:
+            for i in range(min(5, len(raw_docs))):
+                logging.info(f"Sample document {i}: {raw_docs[i]}")
 
-    docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
-    id = ObjectId()
+        file_data.update({
+            "tokens": tokens,
+            "retriever": retriever,
+            "id": str(id),
+            "type": "local",
+        })
+        
+        mongo = MongoDB.get_client()
+        db = mongo["docsgpt"]
+        sources_collection = db["sources"]
+        
+        sources_collection.insert_one({
+            "_id": id,
+            "name": name_job,
+            "user": user,
+            "date": datetime.datetime.now(),
+            "tokens": tokens,
+            "retriever": retriever,
+            "type": "local",
+            "storage_type": settings.STORAGE_TYPE,
+            "original_file_path": file_path
+        })
 
-    embed_and_store_documents(docs, full_path, id, self)
-    tokens = count_tokens_docs(docs)
-    self.update_state(state="PROGRESS", meta={"current": 100})
-
-    if sample:
-        for i in range(min(5, len(raw_docs))):
-            logging.info(f"Sample document {i}: {raw_docs[i]}")
-
-    file_data.update({
-        "tokens": tokens,
-        "retriever": retriever,
-        "id": str(id),
-        "type": "local",
-    })
-    upload_index(full_path, file_data)
-
-    # delete local
-    shutil.rmtree(full_path)
-
-    return {
-        "directory": directory,
-        "formats": formats,
-        "name_job": name_job,
-        "filename": filename,
-        "user": user,
-        "limited": False,
-    }
+        return {
+            "directory": directory,
+            "formats": formats,
+            "name_job": name_job,
+            "filename": filename,
+            "user": user,
+            "limited": False,
+        }
+    
+    finally:
+        if os.path.exists(temp_dir):
+            shutil.rmtree(temp_dir)
 
 def remote_worker(
     self,

From 5ad34e2216e3052f075059afbf2026043047bf06 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Tue, 22 Apr 2025 17:34:25 +0530
Subject: [PATCH 17/39] (fix:indexes) look for the right path

---
 application/api/internal/routes.py       |  16 ++--
 application/parser/embedding_pipeline.py |   6 +-
 application/vectorstore/faiss.py         | 110 +++++++++++++++++++----
 application/worker.py                    |  30 ++++---
 4 files changed, 128 insertions(+), 34 deletions(-)

diff --git a/application/api/internal/routes.py b/application/api/internal/routes.py
index e95b6327..6ba07431 100755
--- a/application/api/internal/routes.py
+++ b/application/api/internal/routes.py
@@ -1,5 +1,6 @@
 import os
 import datetime
+import logging
 from flask import Blueprint, request, send_from_directory
 from werkzeug.utils import secure_filename
 from bson.objectid import ObjectId
@@ -46,7 +47,7 @@ def upload_index_files():
     sync_frequency = secure_filename(request.form["sync_frequency"]) if "sync_frequency" in request.form else None
 
     storage = StorageCreator.create_storage(settings.STORAGE_TYPE)
-    
+
     if settings.VECTOR_STORE == "faiss":
         if "file_faiss" not in request.files:
             print("No file part")
@@ -60,13 +61,18 @@ def upload_index_files():
         file_pkl = request.files["file_pkl"]
         if file_pkl.filename == "":
             return {"status": "no file name"}
-        
+
         # Save index files
         storage_path_faiss = f"indexes/{str(id)}/index.faiss"
         storage_path_pkl = f"indexes/{str(id)}/index.pkl"
-        
-        storage.save_file(file_faiss, storage_path_faiss)
-        storage.save_file(file_pkl, storage_path_pkl)
+
+        try:
+            storage.save_file(file_faiss, storage_path_faiss)
+            storage.save_file(file_pkl, storage_path_pkl)
+            logging.info(f"Successfully saved FAISS index files for ID {id}")
+        except Exception as e:
+            logging.error(f"Error saving FAISS index files: {e}")
+            return {"status": "error", "message": str(e)}
 
     existing_entry = sources_collection.find_one({"_id": ObjectId(id)})
     if existing_entry:
diff --git a/application/parser/embedding_pipeline.py b/application/parser/embedding_pipeline.py
index 0435cd14..005d3756 100755
--- a/application/parser/embedding_pipeline.py
+++ b/application/parser/embedding_pipeline.py
@@ -42,17 +42,18 @@ def embed_and_store_documents(docs, folder_name, source_id, task_status):
 
     # Initialize vector store
     if settings.VECTOR_STORE == "faiss":
+
         docs_init = [docs.pop(0)]
         store = VectorCreator.create_vectorstore(
             settings.VECTOR_STORE,
             docs_init=docs_init,
-            source_id=folder_name,
+            source_id=str(source_id),
             embeddings_key=os.getenv("EMBEDDINGS_KEY"),
         )
     else:
         store = VectorCreator.create_vectorstore(
             settings.VECTOR_STORE,
-            source_id=source_id,
+            source_id=str(source_id),
             embeddings_key=os.getenv("EMBEDDINGS_KEY"),
         )
         store.delete_index()
@@ -82,5 +83,6 @@ def embed_and_store_documents(docs, folder_name, source_id, task_status):
 
     # Save the vector store
     if settings.VECTOR_STORE == "faiss":
+        # For FAISS, save to the temporary folder first
         store.save_local(folder_name)
     logging.info("Vector store saved successfully.")
diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py
index 87ffcccb..5a38f966 100644
--- a/application/vectorstore/faiss.py
+++ b/application/vectorstore/faiss.py
@@ -1,35 +1,45 @@
 import os
+import tempfile
+import logging
 
 from langchain_community.vectorstores import FAISS
 
 from application.core.settings import settings
 from application.parser.schema.base import Document
 from application.vectorstore.base import BaseVectorStore
+from application.storage.storage_creator import StorageCreator
 
 
-def get_vectorstore(path: str) -> str:
-    if path:
-        vectorstore = os.path.join("application", "indexes", path)
+def get_vectorstore_path(source_id: str) -> str:
+    if source_id:
+        clean_id = source_id.replace("application/indexes/", "").rstrip("/")
+        return f"indexes/{clean_id}"
     else:
-        vectorstore = os.path.join("application")
-    return vectorstore
-
+        return "indexes"
 
 class FaissStore(BaseVectorStore):
     def __init__(self, source_id: str, embeddings_key: str, docs_init=None):
         super().__init__()
         self.source_id = source_id
-        self.path = get_vectorstore(source_id)
+        self.storage = StorageCreator.get_storage()
+        self.storage_path = get_vectorstore_path(source_id)
         self.embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
 
         try:
             if docs_init:
                 self.docsearch = FAISS.from_documents(docs_init, self.embeddings)
             else:
-                self.docsearch = FAISS.load_local(
-                    self.path, self.embeddings, allow_dangerous_deserialization=True
-                )
-        except Exception:
+                if self.storage.__class__.__name__ == "LocalStorage":
+                    # For local storage, we can use the path directly
+                    local_path = self.storage._get_full_path(self.storage_path)
+                    self.docsearch = FAISS.load_local(
+                        local_path, self.embeddings, allow_dangerous_deserialization=True
+                    )
+                else:
+                    # For non-local storage (S3, etc.), download files to temp directory first
+                    self.docsearch = self._load_from_remote_storage()
+        except Exception as e:
+            logging.error(f"Error initializing FAISS store: {e}")
             raise
 
         self.assert_embedding_dimensions(self.embeddings)
@@ -40,8 +50,26 @@ class FaissStore(BaseVectorStore):
     def add_texts(self, *args, **kwargs):
         return self.docsearch.add_texts(*args, **kwargs)
 
-    def save_local(self, *args, **kwargs):
-        return self.docsearch.save_local(*args, **kwargs)
+    def save_local(self, folder_path=None):
+        path_to_use = folder_path or self.storage_path
+
+        if folder_path or self.storage.__class__.__name__ == "LocalStorage":
+            # If it's a local path or temp dir, save directly
+            local_path = path_to_use
+            if self.storage.__class__.__name__ == "LocalStorage" and not folder_path:
+                local_path = self.storage._get_full_path(path_to_use)
+
+            os.makedirs(os.path.dirname(local_path) if os.path.dirname(local_path) else local_path, exist_ok=True)
+
+            self.docsearch.save_local(local_path)
+
+            if folder_path and self.storage.__class__.__name__ != "LocalStorage":
+                self._upload_index_to_remote(folder_path)
+        else:
+            # For remote storage, save to temp dir first, then upload
+            with tempfile.TemporaryDirectory() as temp_dir:
+                self.docsearch.save_local(temp_dir)
+                self._upload_index_to_remote(temp_dir)
 
     def delete_index(self, *args, **kwargs):
         return self.docsearch.delete(*args, **kwargs)
@@ -80,10 +108,62 @@ class FaissStore(BaseVectorStore):
         metadata = metadata or {}
         doc = Document(text=text, extra_info=metadata).to_langchain_format()
         doc_id = self.docsearch.add_documents([doc])
-        self.save_local(self.path)
+        self.save_local()
         return doc_id
 
     def delete_chunk(self, chunk_id):
         self.delete_index([chunk_id])
-        self.save_local(self.path)
+        self.save_local()
         return True
+
+    def _load_from_remote_storage(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            try:
+                # Check if both index files exist in remote storage
+                faiss_path = f"{self.storage_path}/index.faiss"
+                pkl_path = f"{self.storage_path}/index.pkl"
+
+                if not self.storage.file_exists(faiss_path) or not self.storage.file_exists(pkl_path):
+                    raise FileNotFoundError(f"FAISS index files not found at {self.storage_path}")
+
+                # Download both files to temp directory
+                faiss_file = self.storage.get_file(faiss_path)
+                pkl_file = self.storage.get_file(pkl_path)
+
+                local_faiss_path = os.path.join(temp_dir, "index.faiss")
+                local_pkl_path = os.path.join(temp_dir, "index.pkl")
+
+                with open(local_faiss_path, 'wb') as f:
+                    f.write(faiss_file.read())
+
+                with open(local_pkl_path, 'wb') as f:
+                    f.write(pkl_file.read())
+
+                # Load the index from the temp directory
+                return FAISS.load_local(
+                    temp_dir, self.embeddings, allow_dangerous_deserialization=True
+                )
+            except Exception as e:
+                logging.error(f"Error loading FAISS index from remote storage: {e}")
+                raise
+
+    def _upload_index_to_remote(self, local_folder):
+        try:
+            # Get paths to the index files
+            local_faiss_path = os.path.join(local_folder, "index.faiss")
+            local_pkl_path = os.path.join(local_folder, "index.pkl")
+
+            remote_faiss_path = f"{self.storage_path}/index.faiss"
+            remote_pkl_path = f"{self.storage_path}/index.pkl"
+
+            # Upload both files to remote storage
+            with open(local_faiss_path, 'rb') as f:
+                self.storage.save_file(f, remote_faiss_path)
+
+            with open(local_pkl_path, 'rb') as f:
+                self.storage.save_file(f, remote_pkl_path)
+
+            logging.info(f"Successfully uploaded FAISS index to {self.storage_path}")
+        except Exception as e:
+            logging.error(f"Error uploading FAISS index to remote storage: {e}")
+            raise
diff --git a/application/worker.py b/application/worker.py
index f8076260..5d32cf66 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -89,9 +89,12 @@ def download_file(url, params, dest_path):
 def upload_index(full_path, file_data):
     try:
         if settings.VECTOR_STORE == "faiss":
+            faiss_path = os.path.join(full_path, "index.faiss")
+            pkl_path = os.path.join(full_path, "index.pkl")
+
             files = {
-                "file_faiss": open(full_path + "/index.faiss", "rb"),
-                "file_pkl": open(full_path + "/index.pkl", "rb"),
+                "file_faiss": open(faiss_path, "rb"),
+                "file_pkl": open(pkl_path, "rb"),
             }
             response = requests.post(
                 urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
@@ -104,8 +107,11 @@ def upload_index(full_path, file_data):
     except requests.RequestException as e:
         logging.error(f"Error uploading index: {e}")
         raise
+    except FileNotFoundError as e:
+        logging.error(f"File not found: {e}")
+        raise
     finally:
-        if settings.VECTOR_STORE == "faiss":
+        if settings.VECTOR_STORE == "faiss" and 'files' in locals():
             for file in files.values():
                 file.close()
 
@@ -137,23 +143,23 @@ def ingest_worker(
     storage = StorageCreator.create_storage(settings.STORAGE_TYPE)
     temp_dir = tempfile.mkdtemp()
     full_path = os.path.join(temp_dir, name_job)
-    
+
     if not os.path.exists(full_path):
         os.makedirs(full_path)
 
     logging.info(f"Ingest file: {directory}/{user}/{name_job}/{filename}", extra={"user": user, "job": name_job})
     file_data = {"name": name_job, "file": filename, "user": user}
-    
+
     try:
         file_path = f"{directory}/{user}/{name_job}/{filename}"
-        
+
         try:
             file_obj = storage.get_file(file_path)
-            
+
             local_file_path = os.path.join(full_path, filename)
             with open(local_file_path, 'wb') as f:
                 shutil.copyfileobj(file_obj, f)
-    
+
             # check if file is .zip and extract it
             if filename.endswith(".zip"):
                 extract_zip_recursive(
@@ -188,7 +194,7 @@ def ingest_worker(
 
         vector_dir = os.path.join(temp_dir, "vector_store")
         os.makedirs(vector_dir, exist_ok=True)
-        
+
         embed_and_store_documents(docs, vector_dir, str(id), self)
         tokens = count_tokens_docs(docs)
         self.update_state(state="PROGRESS", meta={"current": 100})
@@ -203,11 +209,11 @@ def ingest_worker(
             "id": str(id),
             "type": "local",
         })
-        
+
         mongo = MongoDB.get_client()
         db = mongo["docsgpt"]
         sources_collection = db["sources"]
-        
+
         sources_collection.insert_one({
             "_id": id,
             "name": name_job,
@@ -228,7 +234,7 @@ def ingest_worker(
             "user": user,
             "limited": False,
         }
-    
+
     finally:
         if os.path.exists(temp_dir):
             shutil.rmtree(temp_dir)

From 24c8b24b1f367669d208cf93e07895b3d9b896f2 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Wed, 23 Apr 2025 00:52:22 +0530
Subject: [PATCH 18/39] Revert "(fix:indexes) look for the right path"

This reverts commit 5ad34e2216e3052f075059afbf2026043047bf06.
---
 application/api/internal/routes.py       |  16 ++--
 application/parser/embedding_pipeline.py |   6 +-
 application/vectorstore/faiss.py         | 110 ++++-------------------
 application/worker.py                    |  30 +++----
 4 files changed, 34 insertions(+), 128 deletions(-)

diff --git a/application/api/internal/routes.py b/application/api/internal/routes.py
index 6ba07431..e95b6327 100755
--- a/application/api/internal/routes.py
+++ b/application/api/internal/routes.py
@@ -1,6 +1,5 @@
 import os
 import datetime
-import logging
 from flask import Blueprint, request, send_from_directory
 from werkzeug.utils import secure_filename
 from bson.objectid import ObjectId
@@ -47,7 +46,7 @@ def upload_index_files():
     sync_frequency = secure_filename(request.form["sync_frequency"]) if "sync_frequency" in request.form else None
 
     storage = StorageCreator.create_storage(settings.STORAGE_TYPE)
-
+    
     if settings.VECTOR_STORE == "faiss":
         if "file_faiss" not in request.files:
             print("No file part")
@@ -61,18 +60,13 @@ def upload_index_files():
         file_pkl = request.files["file_pkl"]
         if file_pkl.filename == "":
             return {"status": "no file name"}
-
+        
         # Save index files
         storage_path_faiss = f"indexes/{str(id)}/index.faiss"
         storage_path_pkl = f"indexes/{str(id)}/index.pkl"
-
-        try:
-            storage.save_file(file_faiss, storage_path_faiss)
-            storage.save_file(file_pkl, storage_path_pkl)
-            logging.info(f"Successfully saved FAISS index files for ID {id}")
-        except Exception as e:
-            logging.error(f"Error saving FAISS index files: {e}")
-            return {"status": "error", "message": str(e)}
+        
+        storage.save_file(file_faiss, storage_path_faiss)
+        storage.save_file(file_pkl, storage_path_pkl)
 
     existing_entry = sources_collection.find_one({"_id": ObjectId(id)})
     if existing_entry:
diff --git a/application/parser/embedding_pipeline.py b/application/parser/embedding_pipeline.py
index 005d3756..0435cd14 100755
--- a/application/parser/embedding_pipeline.py
+++ b/application/parser/embedding_pipeline.py
@@ -42,18 +42,17 @@ def embed_and_store_documents(docs, folder_name, source_id, task_status):
 
     # Initialize vector store
     if settings.VECTOR_STORE == "faiss":
-
         docs_init = [docs.pop(0)]
         store = VectorCreator.create_vectorstore(
             settings.VECTOR_STORE,
             docs_init=docs_init,
-            source_id=str(source_id),
+            source_id=folder_name,
             embeddings_key=os.getenv("EMBEDDINGS_KEY"),
         )
     else:
         store = VectorCreator.create_vectorstore(
             settings.VECTOR_STORE,
-            source_id=str(source_id),
+            source_id=source_id,
             embeddings_key=os.getenv("EMBEDDINGS_KEY"),
         )
         store.delete_index()
@@ -83,6 +82,5 @@ def embed_and_store_documents(docs, folder_name, source_id, task_status):
 
     # Save the vector store
     if settings.VECTOR_STORE == "faiss":
-        # For FAISS, save to the temporary folder first
         store.save_local(folder_name)
     logging.info("Vector store saved successfully.")
diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py
index 5a38f966..87ffcccb 100644
--- a/application/vectorstore/faiss.py
+++ b/application/vectorstore/faiss.py
@@ -1,45 +1,35 @@
 import os
-import tempfile
-import logging
 
 from langchain_community.vectorstores import FAISS
 
 from application.core.settings import settings
 from application.parser.schema.base import Document
 from application.vectorstore.base import BaseVectorStore
-from application.storage.storage_creator import StorageCreator
 
 
-def get_vectorstore_path(source_id: str) -> str:
-    if source_id:
-        clean_id = source_id.replace("application/indexes/", "").rstrip("/")
-        return f"indexes/{clean_id}"
+def get_vectorstore(path: str) -> str:
+    if path:
+        vectorstore = os.path.join("application", "indexes", path)
     else:
-        return "indexes"
+        vectorstore = os.path.join("application")
+    return vectorstore
+
 
 class FaissStore(BaseVectorStore):
     def __init__(self, source_id: str, embeddings_key: str, docs_init=None):
         super().__init__()
         self.source_id = source_id
-        self.storage = StorageCreator.get_storage()
-        self.storage_path = get_vectorstore_path(source_id)
+        self.path = get_vectorstore(source_id)
         self.embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
 
         try:
             if docs_init:
                 self.docsearch = FAISS.from_documents(docs_init, self.embeddings)
             else:
-                if self.storage.__class__.__name__ == "LocalStorage":
-                    # For local storage, we can use the path directly
-                    local_path = self.storage._get_full_path(self.storage_path)
-                    self.docsearch = FAISS.load_local(
-                        local_path, self.embeddings, allow_dangerous_deserialization=True
-                    )
-                else:
-                    # For non-local storage (S3, etc.), download files to temp directory first
-                    self.docsearch = self._load_from_remote_storage()
-        except Exception as e:
-            logging.error(f"Error initializing FAISS store: {e}")
+                self.docsearch = FAISS.load_local(
+                    self.path, self.embeddings, allow_dangerous_deserialization=True
+                )
+        except Exception:
             raise
 
         self.assert_embedding_dimensions(self.embeddings)
@@ -50,26 +40,8 @@ class FaissStore(BaseVectorStore):
     def add_texts(self, *args, **kwargs):
         return self.docsearch.add_texts(*args, **kwargs)
 
-    def save_local(self, folder_path=None):
-        path_to_use = folder_path or self.storage_path
-
-        if folder_path or self.storage.__class__.__name__ == "LocalStorage":
-            # If it's a local path or temp dir, save directly
-            local_path = path_to_use
-            if self.storage.__class__.__name__ == "LocalStorage" and not folder_path:
-                local_path = self.storage._get_full_path(path_to_use)
-
-            os.makedirs(os.path.dirname(local_path) if os.path.dirname(local_path) else local_path, exist_ok=True)
-
-            self.docsearch.save_local(local_path)
-
-            if folder_path and self.storage.__class__.__name__ != "LocalStorage":
-                self._upload_index_to_remote(folder_path)
-        else:
-            # For remote storage, save to temp dir first, then upload
-            with tempfile.TemporaryDirectory() as temp_dir:
-                self.docsearch.save_local(temp_dir)
-                self._upload_index_to_remote(temp_dir)
+    def save_local(self, *args, **kwargs):
+        return self.docsearch.save_local(*args, **kwargs)
 
     def delete_index(self, *args, **kwargs):
         return self.docsearch.delete(*args, **kwargs)
@@ -108,62 +80,10 @@ class FaissStore(BaseVectorStore):
         metadata = metadata or {}
         doc = Document(text=text, extra_info=metadata).to_langchain_format()
         doc_id = self.docsearch.add_documents([doc])
-        self.save_local()
+        self.save_local(self.path)
         return doc_id
 
     def delete_chunk(self, chunk_id):
         self.delete_index([chunk_id])
-        self.save_local()
+        self.save_local(self.path)
         return True
-
-    def _load_from_remote_storage(self):
-        with tempfile.TemporaryDirectory() as temp_dir:
-            try:
-                # Check if both index files exist in remote storage
-                faiss_path = f"{self.storage_path}/index.faiss"
-                pkl_path = f"{self.storage_path}/index.pkl"
-
-                if not self.storage.file_exists(faiss_path) or not self.storage.file_exists(pkl_path):
-                    raise FileNotFoundError(f"FAISS index files not found at {self.storage_path}")
-
-                # Download both files to temp directory
-                faiss_file = self.storage.get_file(faiss_path)
-                pkl_file = self.storage.get_file(pkl_path)
-
-                local_faiss_path = os.path.join(temp_dir, "index.faiss")
-                local_pkl_path = os.path.join(temp_dir, "index.pkl")
-
-                with open(local_faiss_path, 'wb') as f:
-                    f.write(faiss_file.read())
-
-                with open(local_pkl_path, 'wb') as f:
-                    f.write(pkl_file.read())
-
-                # Load the index from the temp directory
-                return FAISS.load_local(
-                    temp_dir, self.embeddings, allow_dangerous_deserialization=True
-                )
-            except Exception as e:
-                logging.error(f"Error loading FAISS index from remote storage: {e}")
-                raise
-
-    def _upload_index_to_remote(self, local_folder):
-        try:
-            # Get paths to the index files
-            local_faiss_path = os.path.join(local_folder, "index.faiss")
-            local_pkl_path = os.path.join(local_folder, "index.pkl")
-
-            remote_faiss_path = f"{self.storage_path}/index.faiss"
-            remote_pkl_path = f"{self.storage_path}/index.pkl"
-
-            # Upload both files to remote storage
-            with open(local_faiss_path, 'rb') as f:
-                self.storage.save_file(f, remote_faiss_path)
-
-            with open(local_pkl_path, 'rb') as f:
-                self.storage.save_file(f, remote_pkl_path)
-
-            logging.info(f"Successfully uploaded FAISS index to {self.storage_path}")
-        except Exception as e:
-            logging.error(f"Error uploading FAISS index to remote storage: {e}")
-            raise
diff --git a/application/worker.py b/application/worker.py
index 5d32cf66..f8076260 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -89,12 +89,9 @@ def download_file(url, params, dest_path):
 def upload_index(full_path, file_data):
     try:
         if settings.VECTOR_STORE == "faiss":
-            faiss_path = os.path.join(full_path, "index.faiss")
-            pkl_path = os.path.join(full_path, "index.pkl")
-
             files = {
-                "file_faiss": open(faiss_path, "rb"),
-                "file_pkl": open(pkl_path, "rb"),
+                "file_faiss": open(full_path + "/index.faiss", "rb"),
+                "file_pkl": open(full_path + "/index.pkl", "rb"),
             }
             response = requests.post(
                 urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
@@ -107,11 +104,8 @@ def upload_index(full_path, file_data):
     except requests.RequestException as e:
         logging.error(f"Error uploading index: {e}")
         raise
-    except FileNotFoundError as e:
-        logging.error(f"File not found: {e}")
-        raise
     finally:
-        if settings.VECTOR_STORE == "faiss" and 'files' in locals():
+        if settings.VECTOR_STORE == "faiss":
             for file in files.values():
                 file.close()
 
@@ -143,23 +137,23 @@ def ingest_worker(
     storage = StorageCreator.create_storage(settings.STORAGE_TYPE)
     temp_dir = tempfile.mkdtemp()
     full_path = os.path.join(temp_dir, name_job)
-
+    
     if not os.path.exists(full_path):
         os.makedirs(full_path)
 
     logging.info(f"Ingest file: {directory}/{user}/{name_job}/{filename}", extra={"user": user, "job": name_job})
     file_data = {"name": name_job, "file": filename, "user": user}
-
+    
     try:
         file_path = f"{directory}/{user}/{name_job}/{filename}"
-
+        
         try:
             file_obj = storage.get_file(file_path)
-
+            
             local_file_path = os.path.join(full_path, filename)
             with open(local_file_path, 'wb') as f:
                 shutil.copyfileobj(file_obj, f)
-
+    
             # check if file is .zip and extract it
             if filename.endswith(".zip"):
                 extract_zip_recursive(
@@ -194,7 +188,7 @@ def ingest_worker(
 
         vector_dir = os.path.join(temp_dir, "vector_store")
         os.makedirs(vector_dir, exist_ok=True)
-
+        
         embed_and_store_documents(docs, vector_dir, str(id), self)
         tokens = count_tokens_docs(docs)
         self.update_state(state="PROGRESS", meta={"current": 100})
@@ -209,11 +203,11 @@ def ingest_worker(
             "id": str(id),
             "type": "local",
         })
-
+        
         mongo = MongoDB.get_client()
         db = mongo["docsgpt"]
         sources_collection = db["sources"]
-
+        
         sources_collection.insert_one({
             "_id": id,
             "name": name_job,
@@ -234,7 +228,7 @@ def ingest_worker(
             "user": user,
             "limited": False,
         }
-
+    
     finally:
         if os.path.exists(temp_dir):
             shutil.rmtree(temp_dir)

From 637d3a24a1d7adb6ed78e6b3be22855d195fc4af Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Wed, 23 Apr 2025 00:52:55 +0530
Subject: [PATCH 19/39] Revert "(feat:storage) file, indexes uploads"

This reverts commit 64c42f0ddf75c0e0b9cca77050278b02672927c5.
---
 application/api/internal/routes.py |  20 ++--
 application/api/user/routes.py     |  84 +++++++++--------
 application/worker.py              | 144 +++++++++++------------------
 3 files changed, 106 insertions(+), 142 deletions(-)

diff --git a/application/api/internal/routes.py b/application/api/internal/routes.py
index e95b6327..c8e32d11 100755
--- a/application/api/internal/routes.py
+++ b/application/api/internal/routes.py
@@ -6,7 +6,7 @@ from bson.objectid import ObjectId
 
 from application.core.mongo_db import MongoDB
 from application.core.settings import settings
-from application.storage.storage_creator import StorageCreator
+
 mongo = MongoDB.get_client()
 db = mongo["docsgpt"]
 conversations_collection = db["conversations"]
@@ -45,8 +45,7 @@ def upload_index_files():
     remote_data = request.form["remote_data"] if "remote_data" in request.form else None
     sync_frequency = secure_filename(request.form["sync_frequency"]) if "sync_frequency" in request.form else None
 
-    storage = StorageCreator.create_storage(settings.STORAGE_TYPE)
-    
+    save_dir = os.path.join(current_dir, "indexes", str(id))
     if settings.VECTOR_STORE == "faiss":
         if "file_faiss" not in request.files:
             print("No file part")
@@ -60,13 +59,12 @@ def upload_index_files():
         file_pkl = request.files["file_pkl"]
         if file_pkl.filename == "":
             return {"status": "no file name"}
-        
-        # Save index files
-        storage_path_faiss = f"indexes/{str(id)}/index.faiss"
-        storage_path_pkl = f"indexes/{str(id)}/index.pkl"
-        
-        storage.save_file(file_faiss, storage_path_faiss)
-        storage.save_file(file_pkl, storage_path_pkl)
+        # saves index files
+
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir)
+        file_faiss.save(os.path.join(save_dir, "index.faiss"))
+        file_pkl.save(os.path.join(save_dir, "index.pkl"))
 
     existing_entry = sources_collection.find_one({"_id": ObjectId(id)})
     if existing_entry:
@@ -84,7 +82,6 @@ def upload_index_files():
                     "retriever": retriever,
                     "remote_data": remote_data,
                     "sync_frequency": sync_frequency,
-                    "storage_type": settings.STORAGE_TYPE,
                 }
             },
         )
@@ -102,7 +99,6 @@ def upload_index_files():
                 "retriever": retriever,
                 "remote_data": remote_data,
                 "sync_frequency": sync_frequency,
-                "storage_type": settings.STORAGE_TYPE,
             }
         )
     return {"status": "ok"}
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index b7d79128..9e97e2ab 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -4,7 +4,6 @@ import math
 import os
 import shutil
 import uuid
-import tempfile
 
 from bson.binary import Binary, UuidRepresentation
 from bson.dbref import DBRef
@@ -22,7 +21,6 @@ from application.extensions import api
 from application.tts.google_tts import GoogleTTS
 from application.utils import check_required_fields, validate_function_name
 from application.vectorstore.vector_creator import VectorCreator
-from application.storage.storage_creator import StorageCreator
 
 mongo = MongoDB.get_client()
 db = mongo["docsgpt"]
@@ -415,50 +413,54 @@ class UploadFile(Resource):
 
         user = secure_filename(decoded_token.get("sub"))
         job_name = secure_filename(request.form["name"])
-        storage = StorageCreator.get_storage()
-        
         try:
+            save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
+            os.makedirs(save_dir, exist_ok=True)
+
             if len(files) > 1:
-                temp_dir = tempfile.mkdtemp()
-                try:
-                    for file in files:
-                        filename = secure_filename(file.filename)
-                        file.save(os.path.join(temp_dir, filename))
-                    
-                    zip_path = os.path.join(temp_dir, f"{job_name}.zip")
-                    shutil.make_archive(
-                        base_name=os.path.join(temp_dir, job_name),
-                        format="zip",
-                        root_dir=temp_dir,
-                        base_dir="."
-                    )
-                    
-                    final_filename = f"{job_name}.zip"
-                    relative_path = f"{settings.UPLOAD_FOLDER}/{user}/{job_name}/{final_filename}"
-                    
-                    with open(zip_path, 'rb') as zip_file:
-                        storage.save_file(zip_file, relative_path)
-                    
-                    task = ingest.delay(
-                        relative_path,
-                        [
-                            ".rst", ".md", ".pdf", ".txt", ".docx", ".csv", 
-                            ".epub", ".html", ".mdx", ".json", ".xlsx", 
-                            ".pptx", ".png", ".jpg", ".jpeg",
-                        ],
-                        job_name,
-                        final_filename,
-                        user,
-                    )
-                finally:
-                    shutil.rmtree(temp_dir)
+                temp_dir = os.path.join(save_dir, "temp")
+                os.makedirs(temp_dir, exist_ok=True)
+
+                for file in files:
+                    filename = secure_filename(file.filename)
+                    file.save(os.path.join(temp_dir, filename))
+                    print(f"Saved file: {filename}")
+                zip_path = shutil.make_archive(
+                    base_name=os.path.join(save_dir, job_name),
+                    format="zip",
+                    root_dir=temp_dir,
+                )
+                final_filename = os.path.basename(zip_path)
+                shutil.rmtree(temp_dir)
+                task = ingest.delay(
+                    settings.UPLOAD_FOLDER,
+                    [
+                        ".rst",
+                        ".md",
+                        ".pdf",
+                        ".txt",
+                        ".docx",
+                        ".csv",
+                        ".epub",
+                        ".html",
+                        ".mdx",
+                        ".json",
+                        ".xlsx",
+                        ".pptx",
+                        ".png",
+                        ".jpg",
+                        ".jpeg",
+                    ],
+                    job_name,
+                    final_filename,
+                    user,
+                )
             else:
                 file = files[0]
                 final_filename = secure_filename(file.filename)
-                relative_path = f"{settings.UPLOAD_FOLDER}/{user}/{job_name}/{final_filename}"
-                
-                storage.save_file(file, relative_path)
-                
+                file_path = os.path.join(save_dir, final_filename)
+                file.save(file_path)
+
                 task = ingest.delay(
                     settings.UPLOAD_FOLDER,
                     [
diff --git a/application/worker.py b/application/worker.py
index f8076260..b5caa23e 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -133,105 +133,71 @@ def ingest_worker(
     limit = None
     exclude = True
     sample = False
+    full_path = os.path.join(directory, user, name_job)
+
+    logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job})
+    file_data = {"name": name_job, "file": filename, "user": user}
 
-    storage = StorageCreator.create_storage(settings.STORAGE_TYPE)
-    temp_dir = tempfile.mkdtemp()
-    full_path = os.path.join(temp_dir, name_job)
-    
     if not os.path.exists(full_path):
         os.makedirs(full_path)
+    download_file(urljoin(settings.API_URL, "/api/download"), file_data, os.path.join(full_path, filename))
 
-    logging.info(f"Ingest file: {directory}/{user}/{name_job}/{filename}", extra={"user": user, "job": name_job})
-    file_data = {"name": name_job, "file": filename, "user": user}
-    
-    try:
-        file_path = f"{directory}/{user}/{name_job}/{filename}"
-        
-        try:
-            file_obj = storage.get_file(file_path)
-            
-            local_file_path = os.path.join(full_path, filename)
-            with open(local_file_path, 'wb') as f:
-                shutil.copyfileobj(file_obj, f)
-    
-            # check if file is .zip and extract it
-            if filename.endswith(".zip"):
-                extract_zip_recursive(
-                    os.path.join(full_path, filename), full_path, 0, RECURSION_DEPTH
-                )
-        except FileNotFoundError as e:
-            logging.error(f"File not found in storage: {file_path}")
-            raise FileNotFoundError(f"File not found: {file_path}") from e
-
-        self.update_state(state="PROGRESS", meta={"current": 1})
-
-        raw_docs = SimpleDirectoryReader(
-            input_dir=full_path,
-            input_files=input_files,
-            recursive=recursive,
-            required_exts=formats,
-            num_files_limit=limit,
-            exclude_hidden=exclude,
-            file_metadata=metadata_from_filename,
-        ).load_data()
-
-        chunker = Chunker(
-            chunking_strategy="classic_chunk",
-            max_tokens=MAX_TOKENS,
-            min_tokens=MIN_TOKENS,
-            duplicate_headers=False
+    # check if file is .zip and extract it
+    if filename.endswith(".zip"):
+        extract_zip_recursive(
+            os.path.join(full_path, filename), full_path, 0, RECURSION_DEPTH
         )
-        raw_docs = chunker.chunk(documents=raw_docs)
 
-        docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
-        id = ObjectId()
+    self.update_state(state="PROGRESS", meta={"current": 1})
 
-        vector_dir = os.path.join(temp_dir, "vector_store")
-        os.makedirs(vector_dir, exist_ok=True)
-        
-        embed_and_store_documents(docs, vector_dir, str(id), self)
-        tokens = count_tokens_docs(docs)
-        self.update_state(state="PROGRESS", meta={"current": 100})
+    raw_docs = SimpleDirectoryReader(
+        input_dir=full_path,
+        input_files=input_files,
+        recursive=recursive,
+        required_exts=formats,
+        num_files_limit=limit,
+        exclude_hidden=exclude,
+        file_metadata=metadata_from_filename,
+    ).load_data()
 
-        if sample:
-            for i in range(min(5, len(raw_docs))):
-                logging.info(f"Sample document {i}: {raw_docs[i]}")
+    chunker = Chunker(
+        chunking_strategy="classic_chunk",
+        max_tokens=MAX_TOKENS,
+        min_tokens=MIN_TOKENS,
+        duplicate_headers=False
+    )
+    raw_docs = chunker.chunk(documents=raw_docs)
 
-        file_data.update({
-            "tokens": tokens,
-            "retriever": retriever,
-            "id": str(id),
-            "type": "local",
-        })
-        
-        mongo = MongoDB.get_client()
-        db = mongo["docsgpt"]
-        sources_collection = db["sources"]
-        
-        sources_collection.insert_one({
-            "_id": id,
-            "name": name_job,
-            "user": user,
-            "date": datetime.datetime.now(),
-            "tokens": tokens,
-            "retriever": retriever,
-            "type": "local",
-            "storage_type": settings.STORAGE_TYPE,
-            "original_file_path": file_path
-        })
+    docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
+    id = ObjectId()
 
-        return {
-            "directory": directory,
-            "formats": formats,
-            "name_job": name_job,
-            "filename": filename,
-            "user": user,
-            "limited": False,
-        }
-    
-    finally:
-        if os.path.exists(temp_dir):
-            shutil.rmtree(temp_dir)
+    embed_and_store_documents(docs, full_path, id, self)
+    tokens = count_tokens_docs(docs)
+    self.update_state(state="PROGRESS", meta={"current": 100})
+
+    if sample:
+        for i in range(min(5, len(raw_docs))):
+            logging.info(f"Sample document {i}: {raw_docs[i]}")
+
+    file_data.update({
+        "tokens": tokens,
+        "retriever": retriever,
+        "id": str(id),
+        "type": "local",
+    })
+    upload_index(full_path, file_data)
+
+    # delete local
+    shutil.rmtree(full_path)
+
+    return {
+        "directory": directory,
+        "formats": formats,
+        "name_job": name_job,
+        "filename": filename,
+        "user": user,
+        "limited": False,
+    }
 
 def remote_worker(
     self,

From e60f78ac4afb8d10992fa5cd8e787649d3e00be6 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Wed, 23 Apr 2025 03:39:35 +0530
Subject: [PATCH 20/39] (feat:storage) file uploads

---
 application/api/user/routes.py | 104 ++++++++++++++--------------
 application/worker.py          | 121 ++++++++++++++++++++-------------
 2 files changed, 129 insertions(+), 96 deletions(-)

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 9e97e2ab..6b52a436 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -413,81 +413,85 @@ class UploadFile(Resource):
 
         user = secure_filename(decoded_token.get("sub"))
         job_name = secure_filename(request.form["name"])
+        
         try:
-            save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
-            os.makedirs(save_dir, exist_ok=True)
-
+            from application.storage.storage_creator import StorageCreator
+            storage = StorageCreator.get_storage()
+            
+            base_path = f"{settings.UPLOAD_FOLDER}/{user}/{job_name}"
+            
             if len(files) > 1:
-                temp_dir = os.path.join(save_dir, "temp")
-                os.makedirs(temp_dir, exist_ok=True)
-
+                temp_files = []
                 for file in files:
                     filename = secure_filename(file.filename)
-                    file.save(os.path.join(temp_dir, filename))
+                    temp_path = f"{base_path}/temp/{filename}"
+                    storage.save_file(file, temp_path)
+                    temp_files.append(temp_path)
                     print(f"Saved file: {filename}")
-                zip_path = shutil.make_archive(
-                    base_name=os.path.join(save_dir, job_name),
-                    format="zip",
-                    root_dir=temp_dir,
-                )
-                final_filename = os.path.basename(zip_path)
-                shutil.rmtree(temp_dir)
+                
+                zip_filename = f"{job_name}.zip"
+                zip_path = f"{base_path}/{zip_filename}"
+                
+                def create_zip_archive(temp_paths, **kwargs):
+                    import tempfile
+                    with tempfile.TemporaryDirectory() as temp_dir:
+                        for path in temp_paths:
+                            file_data = storage.get_file(path)
+                            with open(os.path.join(temp_dir, os.path.basename(path)), 'wb') as f:
+                                f.write(file_data.read())
+                        
+                        # Create zip archive
+                        zip_temp = shutil.make_archive(
+                            base_name=os.path.join(temp_dir, job_name),
+                            format="zip",
+                            root_dir=temp_dir
+                        )
+                        
+                        return zip_temp
+                
+                zip_temp_path = create_zip_archive(temp_files)
+                with open(zip_temp_path, 'rb') as zip_file:
+                    storage.save_file(zip_file, zip_path)
+                
+                # Clean up temp files
+                for temp_path in temp_files:
+                    storage.delete_file(temp_path)
+                
                 task = ingest.delay(
                     settings.UPLOAD_FOLDER,
                     [
-                        ".rst",
-                        ".md",
-                        ".pdf",
-                        ".txt",
-                        ".docx",
-                        ".csv",
-                        ".epub",
-                        ".html",
-                        ".mdx",
-                        ".json",
-                        ".xlsx",
-                        ".pptx",
-                        ".png",
-                        ".jpg",
-                        ".jpeg",
+                        ".rst", ".md", ".pdf", ".txt", ".docx", ".csv", ".epub",
+                        ".html", ".mdx", ".json", ".xlsx", ".pptx", ".png",
+                        ".jpg", ".jpeg",
                     ],
                     job_name,
-                    final_filename,
+                    zip_filename,
                     user,
                 )
             else:
+                # For single file
                 file = files[0]
-                final_filename = secure_filename(file.filename)
-                file_path = os.path.join(save_dir, final_filename)
-                file.save(file_path)
-
+                filename = secure_filename(file.filename)
+                file_path = f"{base_path}/{filename}"
+                
+                storage.save_file(file, file_path)
+                
                 task = ingest.delay(
                     settings.UPLOAD_FOLDER,
                     [
-                        ".rst",
-                        ".md",
-                        ".pdf",
-                        ".txt",
-                        ".docx",
-                        ".csv",
-                        ".epub",
-                        ".html",
-                        ".mdx",
-                        ".json",
-                        ".xlsx",
-                        ".pptx",
-                        ".png",
-                        ".jpg",
-                        ".jpeg",
+                        ".rst", ".md", ".pdf", ".txt", ".docx", ".csv", ".epub",
+                        ".html", ".mdx", ".json", ".xlsx", ".pptx", ".png",
+                        ".jpg", ".jpeg",
                     ],
                     job_name,
-                    final_filename,
+                    filename,
                     user,
                 )
 
         except Exception as err:
             current_app.logger.error(f"Error uploading file: {err}")
             return make_response(jsonify({"success": False}), 400)
+            
         return make_response(jsonify({"success": True, "task_id": task.id}), 200)
 
 
diff --git a/application/worker.py b/application/worker.py
index b5caa23e..d83639d7 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -133,62 +133,91 @@ def ingest_worker(
     limit = None
     exclude = True
     sample = False
+    
+    storage = StorageCreator.get_storage()
+    
     full_path = os.path.join(directory, user, name_job)
-
+    source_file_path = os.path.join(full_path, filename)
+    
     logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job})
-    file_data = {"name": name_job, "file": filename, "user": user}
+    
+    # Create temporary working directory
+    with tempfile.TemporaryDirectory() as temp_dir:
+        try:
+            os.makedirs(temp_dir, exist_ok=True)
+            
+            # Download file from storage to temp directory
+            temp_file_path = os.path.join(temp_dir, filename)
+            file_data = storage.get_file(source_file_path)
+            
+            with open(temp_file_path, 'wb') as f:
+                f.write(file_data.read())
+            
+            self.update_state(state="PROGRESS", meta={"current": 1})
 
-    if not os.path.exists(full_path):
-        os.makedirs(full_path)
-    download_file(urljoin(settings.API_URL, "/api/download"), file_data, os.path.join(full_path, filename))
+            # Handle zip files
+            if filename.endswith('.zip'):
+                logging.info(f"Extracting zip file: {filename}")
+                extract_zip_recursive(
+                    temp_file_path,
+                    temp_dir,
+                    current_depth=0,
+                    max_depth=RECURSION_DEPTH
+                )
 
-    # check if file is .zip and extract it
-    if filename.endswith(".zip"):
-        extract_zip_recursive(
-            os.path.join(full_path, filename), full_path, 0, RECURSION_DEPTH
-        )
+            if sample:
+                logging.info(f"Sample mode enabled. Using {limit} documents.")
 
-    self.update_state(state="PROGRESS", meta={"current": 1})
+            reader = SimpleDirectoryReader(
+                input_dir=temp_dir,
+                input_files=input_files,
+                recursive=recursive,
+                required_exts=formats,
+                exclude_hidden=exclude,
+                file_metadata=metadata_from_filename,
+            )
+            raw_docs = reader.load_data()
 
-    raw_docs = SimpleDirectoryReader(
-        input_dir=full_path,
-        input_files=input_files,
-        recursive=recursive,
-        required_exts=formats,
-        num_files_limit=limit,
-        exclude_hidden=exclude,
-        file_metadata=metadata_from_filename,
-    ).load_data()
+            chunker = Chunker(
+                chunking_strategy="classic_chunk",
+                max_tokens=MAX_TOKENS,
+                min_tokens=MIN_TOKENS,
+                duplicate_headers=False
+            )
+            raw_docs = chunker.chunk(documents=raw_docs)
+            
+            docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
+            
+            id = ObjectId()
+            
+            vector_store_path = os.path.join(temp_dir, 'vector_store')
+            os.makedirs(vector_store_path, exist_ok=True)
+            
+            embed_and_store_documents(docs, vector_store_path, id, self)
+            
+            tokens = count_tokens_docs(docs)
+            
+            self.update_state(state="PROGRESS", meta={"current": 100})
 
-    chunker = Chunker(
-        chunking_strategy="classic_chunk",
-        max_tokens=MAX_TOKENS,
-        min_tokens=MIN_TOKENS,
-        duplicate_headers=False
-    )
-    raw_docs = chunker.chunk(documents=raw_docs)
+            if sample:
+               for i in range(min(5, len(raw_docs))):
+                    logging.info(f"Sample document {i}: {raw_docs[i]}")
+            file_data = {
+                "name": name_job,
+                "file": filename,
+                "user": user,
+                "tokens": tokens,
+                "retriever": retriever,
+                "id": str(id),
+                "type": "local",
+            }
 
-    docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
-    id = ObjectId()
 
-    embed_and_store_documents(docs, full_path, id, self)
-    tokens = count_tokens_docs(docs)
-    self.update_state(state="PROGRESS", meta={"current": 100})
+            upload_index(vector_store_path, file_data)
 
-    if sample:
-        for i in range(min(5, len(raw_docs))):
-            logging.info(f"Sample document {i}: {raw_docs[i]}")
-
-    file_data.update({
-        "tokens": tokens,
-        "retriever": retriever,
-        "id": str(id),
-        "type": "local",
-    })
-    upload_index(full_path, file_data)
-
-    # delete local
-    shutil.rmtree(full_path)
+        except Exception as e:
+            logging.error(f"Error in ingest_worker: {e}", exc_info=True)
+            raise
 
     return {
         "directory": directory,

From 0ce27f274ab806c457ee75a666f9e7a776c37854 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Wed, 23 Apr 2025 04:28:45 +0530
Subject: [PATCH 21/39] (feat:storage) file indexes/faiss

---
 application/api/internal/routes.py | 23 ++++++++++---------
 application/vectorstore/faiss.py   | 36 ++++++++++++++++++++++++------
 2 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/application/api/internal/routes.py b/application/api/internal/routes.py
index c8e32d11..f0ad042f 100755
--- a/application/api/internal/routes.py
+++ b/application/api/internal/routes.py
@@ -3,10 +3,13 @@ import datetime
 from flask import Blueprint, request, send_from_directory
 from werkzeug.utils import secure_filename
 from bson.objectid import ObjectId
-
+import logging
 from application.core.mongo_db import MongoDB
 from application.core.settings import settings
+from application.storage.storage_creator import StorageCreator
 
+
+logger = logging.getLogger(__name__)
 mongo = MongoDB.get_client()
 db = mongo["docsgpt"]
 conversations_collection = db["conversations"]
@@ -45,26 +48,26 @@ def upload_index_files():
     remote_data = request.form["remote_data"] if "remote_data" in request.form else None
     sync_frequency = secure_filename(request.form["sync_frequency"]) if "sync_frequency" in request.form else None
 
-    save_dir = os.path.join(current_dir, "indexes", str(id))
+    storage = StorageCreator.get_storage()
+    index_base_path = f"indexes/{id}"
+    
     if settings.VECTOR_STORE == "faiss":
         if "file_faiss" not in request.files:
-            print("No file part")
+            logger.error("No file_faiss part")
             return {"status": "no file"}
         file_faiss = request.files["file_faiss"]
         if file_faiss.filename == "":
             return {"status": "no file name"}
         if "file_pkl" not in request.files:
-            print("No file part")
+            logger.error("No file_pkl part")
             return {"status": "no file"}
         file_pkl = request.files["file_pkl"]
         if file_pkl.filename == "":
             return {"status": "no file name"}
-        # saves index files
-
-        if not os.path.exists(save_dir):
-            os.makedirs(save_dir)
-        file_faiss.save(os.path.join(save_dir, "index.faiss"))
-        file_pkl.save(os.path.join(save_dir, "index.pkl"))
+        
+        # Save index files to storage
+        storage.save_file(file_faiss, f"{index_base_path}/index.faiss")
+        storage.save_file(file_pkl, f"{index_base_path}/index.pkl")
 
     existing_entry = sources_collection.find_one({"_id": ObjectId(id)})
     if existing_entry:
diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py
index 87ffcccb..ce455bd8 100644
--- a/application/vectorstore/faiss.py
+++ b/application/vectorstore/faiss.py
@@ -1,17 +1,19 @@
 import os
+import tempfile
 
 from langchain_community.vectorstores import FAISS
 
 from application.core.settings import settings
 from application.parser.schema.base import Document
 from application.vectorstore.base import BaseVectorStore
+from application.storage.storage_creator import StorageCreator
 
 
 def get_vectorstore(path: str) -> str:
     if path:
-        vectorstore = os.path.join("application", "indexes", path)
+        vectorstore = f"indexes/{path}"
     else:
-        vectorstore = os.path.join("application")
+        vectorstore = "indexes"
     return vectorstore
 
 
@@ -21,16 +23,36 @@ class FaissStore(BaseVectorStore):
         self.source_id = source_id
         self.path = get_vectorstore(source_id)
         self.embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
+        self.storage = StorageCreator.get_storage()
 
         try:
             if docs_init:
                 self.docsearch = FAISS.from_documents(docs_init, self.embeddings)
             else:
-                self.docsearch = FAISS.load_local(
-                    self.path, self.embeddings, allow_dangerous_deserialization=True
-                )
-        except Exception:
-            raise
+                with tempfile.TemporaryDirectory() as temp_dir:
+                    faiss_path = f"{self.path}/index.faiss"
+                    pkl_path = f"{self.path}/index.pkl"
+                    
+                    if not self.storage.file_exists(faiss_path) or not self.storage.file_exists(pkl_path):
+                        raise FileNotFoundError(f"Index files not found in storage at {self.path}")
+                    
+                    faiss_file = self.storage.get_file(faiss_path)
+                    pkl_file = self.storage.get_file(pkl_path)
+                    
+                    local_faiss_path = os.path.join(temp_dir, "index.faiss")
+                    local_pkl_path = os.path.join(temp_dir, "index.pkl")
+                    
+                    with open(local_faiss_path, 'wb') as f:
+                        f.write(faiss_file.read())
+                    
+                    with open(local_pkl_path, 'wb') as f:
+                        f.write(pkl_file.read())
+                    
+                    self.docsearch = FAISS.load_local(
+                        temp_dir, self.embeddings, allow_dangerous_deserialization=True
+                    )
+        except Exception as e:
+            raise Exception(f"Error loading FAISS index: {str(e)}")
 
         self.assert_embedding_dimensions(self.embeddings)
 

From 3cd9a72495533cc738e400ba9dc74db94f304cc9 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Wed, 23 Apr 2025 23:13:39 +0100
Subject: [PATCH 22/39] add storage type to the settings cofig

---
 application/core/settings.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/application/core/settings.py b/application/core/settings.py
index 74bffe53..c3c5159e 100644
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -98,6 +98,8 @@ class Settings(BaseSettings):
     BRAVE_SEARCH_API_KEY: Optional[str] = None
 
     FLASK_DEBUG_MODE: bool = False
+    STORAGE_TYPE: str = "local"  # local or s3 
+
 
     JWT_SECRET_KEY: str = ""
 

From 76fd6e15cc2e84071e68d893da1304cf38a3cef4 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Thu, 24 Apr 2025 18:54:58 +0300
Subject: [PATCH 23/39] Update Dockerfile

---
 application/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/Dockerfile b/application/Dockerfile
index 308b721b..e33721a2 100644
--- a/application/Dockerfile
+++ b/application/Dockerfile
@@ -84,4 +84,4 @@ EXPOSE 7091
 USER appuser
 
 # Start Gunicorn
-CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "application.wsgi:app"]
\ No newline at end of file
+CMD ["gunicorn", "-w", "1", "--timeout", "120", "--bind", "0.0.0.0:7091", "--preload", "application.wsgi:app"]

From df9d432d29c1bbdf28abb3d35d129060b1964dd3 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Thu, 24 Apr 2025 17:29:41 +0100
Subject: [PATCH 24/39] fix: mongo db database name in settings

---
 application/agents/base.py         | 5 +++--
 application/api/answer/routes.py   | 2 +-
 application/api/internal/routes.py | 2 +-
 application/api/user/routes.py     | 2 +-
 application/core/settings.py       | 1 +
 application/llm/google_ai.py       | 3 ++-
 application/llm/openai.py          | 2 +-
 application/logging.py             | 3 ++-
 application/usage.py               | 3 ++-
 application/worker.py              | 4 ++--
 10 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/application/agents/base.py b/application/agents/base.py
index 64fac17b..e4b76ca1 100644
--- a/application/agents/base.py
+++ b/application/agents/base.py
@@ -10,6 +10,7 @@ from application.core.mongo_db import MongoDB
 from application.llm.llm_creator import LLMCreator
 from application.logging import build_stack_data, log_activity, LogContext
 from application.retriever.base import BaseRetriever
+from application.core.settings import settings
 from bson.objectid import ObjectId
 
 
@@ -61,7 +62,7 @@ class BaseAgent(ABC):
 
     def _get_tools(self, api_key: str = None) -> Dict[str, Dict]:
         mongo = MongoDB.get_client()
-        db = mongo["docsgpt"]
+        db = mongo[settings.MONGO_DB_NAME]
         agents_collection = db["agents"]
         tools_collection = db["user_tools"]
 
@@ -82,7 +83,7 @@ class BaseAgent(ABC):
 
     def _get_user_tools(self, user="local"):
         mongo = MongoDB.get_client()
-        db = mongo["docsgpt"]
+        db = mongo[settings.MONGO_DB_NAME]
         user_tools_collection = db["user_tools"]
         user_tools = user_tools_collection.find({"user": user, "status": True})
         user_tools = list(user_tools)
diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index 8f44385b..2a8476d8 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -23,7 +23,7 @@ from application.utils import check_required_fields, limit_chat_history
 logger = logging.getLogger(__name__)
 
 mongo = MongoDB.get_client()
-db = mongo["docsgpt"]
+db = mongo[settings.MONGO_DB_NAME]
 conversations_collection = db["conversations"]
 sources_collection = db["sources"]
 prompts_collection = db["prompts"]
diff --git a/application/api/internal/routes.py b/application/api/internal/routes.py
index f0ad042f..80759593 100755
--- a/application/api/internal/routes.py
+++ b/application/api/internal/routes.py
@@ -11,7 +11,7 @@ from application.storage.storage_creator import StorageCreator
 
 logger = logging.getLogger(__name__)
 mongo = MongoDB.get_client()
-db = mongo["docsgpt"]
+db = mongo[settings.MONGO_DB_NAME]
 conversations_collection = db["conversations"]
 sources_collection = db["sources"]
 
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 6b52a436..d9c41c8f 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -23,7 +23,7 @@ from application.utils import check_required_fields, validate_function_name
 from application.vectorstore.vector_creator import VectorCreator
 
 mongo = MongoDB.get_client()
-db = mongo["docsgpt"]
+db = mongo[settings.MONGO_DB_NAME]
 conversations_collection = db["conversations"]
 sources_collection = db["sources"]
 prompts_collection = db["prompts"]
diff --git a/application/core/settings.py b/application/core/settings.py
index c3c5159e..3be34242 100644
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -19,6 +19,7 @@ class Settings(BaseSettings):
     CELERY_BROKER_URL: str = "redis://localhost:6379/0"
     CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
     MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
+    MONGO_DB_NAME: str = "docsgpt"
     MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
     DEFAULT_MAX_HISTORY: int = 150
     MODEL_TOKEN_LIMITS: dict = {
diff --git a/application/llm/google_ai.py b/application/llm/google_ai.py
index 06dbbdfd..a56616d2 100644
--- a/application/llm/google_ai.py
+++ b/application/llm/google_ai.py
@@ -5,6 +5,7 @@ import json
 
 from application.llm.base import BaseLLM
 from application.storage.storage_creator import StorageCreator
+from application.core.settings import settings
 
 
 class GoogleLLM(BaseLLM):
@@ -120,7 +121,7 @@ class GoogleLLM(BaseLLM):
 
             from application.core.mongo_db import MongoDB
             mongo = MongoDB.get_client()
-            db = mongo["docsgpt"]
+            db = mongo[settings.MONGO_DB_NAME]
             attachments_collection = db["attachments"]
             if '_id' in attachment:
                 attachments_collection.update_one(
diff --git a/application/llm/openai.py b/application/llm/openai.py
index e8df92dd..248fd7e2 100644
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -291,7 +291,7 @@ class OpenAILLM(BaseLLM):
 
             from application.core.mongo_db import MongoDB
             mongo = MongoDB.get_client()
-            db = mongo["docsgpt"]
+            db = mongo[settings.MONGO_DB_NAME]
             attachments_collection = db["attachments"]
             if '_id' in attachment:
                 attachments_collection.update_one(
diff --git a/application/logging.py b/application/logging.py
index 1dd0d557..ed07f858 100644
--- a/application/logging.py
+++ b/application/logging.py
@@ -7,6 +7,7 @@ import uuid
 from typing import Any, Callable, Dict, Generator, List
 
 from application.core.mongo_db import MongoDB
+from application.core.settings import settings
 
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -131,7 +132,7 @@ def _log_to_mongodb(
 ) -> None:
     try:
         mongo = MongoDB.get_client()
-        db = mongo["docsgpt"]
+        db = mongo[settings.MONGO_DB_NAME]
         user_logs_collection = db["stack_logs"]
 
         log_entry = {
diff --git a/application/usage.py b/application/usage.py
index 85328c1f..46620fff 100644
--- a/application/usage.py
+++ b/application/usage.py
@@ -2,10 +2,11 @@ import sys
 from datetime import datetime
 
 from application.core.mongo_db import MongoDB
+from application.core.settings import settings
 from application.utils import num_tokens_from_object_or_list, num_tokens_from_string
 
 mongo = MongoDB.get_client()
-db = mongo["docsgpt"]
+db = mongo[settings.MONGO_DB_NAME]
 usage_collection = db["token_usage"]
 
 
diff --git a/application/worker.py b/application/worker.py
index d83639d7..3f542b6a 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -26,7 +26,7 @@ from application.parser.chunking import Chunker
 from application.utils import count_tokens_docs
 
 mongo = MongoDB.get_client()
-db = mongo["docsgpt"]
+db = mongo[settings.MONGO_DB_NAME]
 sources_collection = db["sources"]
 
 # Constants
@@ -356,7 +356,7 @@ def attachment_worker(self, file_info, user):
     """
 
     mongo = MongoDB.get_client()
-    db = mongo["docsgpt"]
+    db = mongo[settings.MONGO_DB_NAME]
     attachments_collection = db["attachments"]
 
     filename = file_info["filename"]

From 8289b02ab0d533d45bf9dcf6a1af2dea4b003984 Mon Sep 17 00:00:00 2001
From: Siddhant Rai <siddhant.rai.5686@gmail.com>
Date: Sat, 26 Apr 2025 12:00:29 +0530
Subject: [PATCH 25/39] feat: add agent webhook endpoint and implement related
 functionality

---
 application/api/user/routes.py            |  90 +++++++-
 application/api/user/tasks.py             |  14 +-
 application/worker.py                     | 247 +++++++++++++++++-----
 frontend/src/Navigation.tsx               |  47 ++--
 frontend/src/agents/AgentPreview.tsx      |   1 +
 frontend/src/agents/NewAgent.tsx          |  11 +-
 frontend/src/agents/index.tsx             |  29 ++-
 frontend/src/api/endpoints.ts             |   1 +
 frontend/src/api/services/userService.ts  |   2 +
 frontend/src/components/MessageInput.tsx  |  13 +-
 frontend/src/modals/AgentDetailsModal.tsx |  59 +++++-
 frontend/src/modals/ConfirmationModal.tsx |  10 +-
 12 files changed, 424 insertions(+), 100 deletions(-)

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 8876be6b..391444fc 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -4,6 +4,7 @@ import math
 import os
 import shutil
 import uuid
+import secrets
 
 from bson.binary import Binary, UuidRepresentation
 from bson.dbref import DBRef
@@ -14,7 +15,12 @@ from werkzeug.utils import secure_filename
 
 from application.agents.tools.tool_manager import ToolManager
 
-from application.api.user.tasks import ingest, ingest_remote, store_attachment
+from application.api.user.tasks import (
+    ingest,
+    ingest_remote,
+    store_attachment,
+    process_agent_webhook,
+)
 from application.core.mongo_db import MongoDB
 from application.core.settings import settings
 from application.extensions import api
@@ -1329,6 +1335,88 @@ class DeleteAgent(Resource):
         return make_response(jsonify({"id": deleted_id}), 200)
 
 
+@user_ns.route("/api/agent_webhook")
+class AgentWebhook(Resource):
+    @api.doc(
+        params={"id": "ID of the agent"},
+        description="Generate webhook URL for the agent",
+    )
+    def get(self):
+        decoded_token = request.decoded_token
+        if not decoded_token:
+            return make_response(jsonify({"success": False}), 401)
+        user = decoded_token.get("sub")
+        agent_id = request.args.get("id")
+        if not agent_id:
+            return make_response(
+                jsonify({"success": False, "message": "ID is required"}), 400
+            )
+
+        try:
+            agent = agents_collection.find_one(
+                {"_id": ObjectId(agent_id), "user": user}
+            )
+            if not agent:
+                return make_response(
+                    jsonify({"success": False, "message": "Agent not found"}), 404
+                )
+
+            webhook_token = agent.get("incoming_webhook_token")
+            if not webhook_token:
+                webhook_token = secrets.token_urlsafe(32)
+                agents_collection.update_one(
+                    {"_id": ObjectId(agent_id), "user": user},
+                    {"$set": {"incoming_webhook_token": webhook_token}},
+                )
+            base_url = settings.API_URL.rstrip("/")
+            full_webhook_url = f"{base_url}/api/webhooks/agents/{webhook_token}"
+
+        except Exception as err:
+            current_app.logger.error(f"Error generating webhook URL: {err}")
+            return make_response(
+                jsonify({"success": False, "message": "Error generating webhook URL"}),
+                400,
+            )
+        return make_response(
+            jsonify({"success": True, "webhook_url": full_webhook_url}), 200
+        )
+
+
+@user_ns.route(f"/api/webhooks/agents/<string:webhook_token>")
+class AgentWebhookListener(Resource):
+    @api.doc(description="Webhook listener for agent events")
+    def post(self, webhook_token):
+        agent = agents_collection.find_one(
+            {"incoming_webhook_token": webhook_token}, {"_id": 1}
+        )
+        if not agent:
+            return make_response(
+                jsonify({"success": False, "message": "Agent not found"}), 404
+            )
+        data = request.get_json()
+        if not data:
+            return make_response(
+                jsonify({"success": False, "message": "No data provided"}), 400
+            )
+
+        agent_id_str = str(agent["_id"])
+        current_app.logger.info(
+            f"Incoming webhook received for agent {agent_id_str}. Enqueuing task."
+        )
+
+        try:
+            task = process_agent_webhook.delay(
+                agent_id=agent_id_str,
+                payload=data,
+            )
+        except Exception as err:
+            current_app.logger.error(f"Error processing webhook: {err}")
+            return make_response(
+                jsonify({"success": False, "message": "Error processing webhook"}), 400
+            )
+        return make_response(jsonify({"success": True, "task_id": task.id}), 200)
+
+
 @user_ns.route("/api/share")
 class ShareConversation(Resource):
     share_conversation_model = api.model(
diff --git a/application/api/user/tasks.py b/application/api/user/tasks.py
index 24cff3c6..f53d856b 100644
--- a/application/api/user/tasks.py
+++ b/application/api/user/tasks.py
@@ -1,7 +1,13 @@
 from datetime import timedelta
 
 from application.celery_init import celery
-from application.worker import ingest_worker, remote_worker, sync_worker, attachment_worker
+from application.worker import (
+    agent_webhook_worker,
+    attachment_worker,
+    ingest_worker,
+    remote_worker,
+    sync_worker,
+)
 
 
 @celery.task(bind=True)
@@ -28,6 +34,12 @@ def store_attachment(self, directory, saved_files, user):
     return resp
 
 
+@celery.task(bind=True)
+def process_agent_webhook(self, agent_id, payload):
+    resp = agent_webhook_worker(self, agent_id, payload)
+    return resp
+
+
 @celery.on_after_configure.connect
 def setup_periodic_tasks(sender, **kwargs):
     sender.add_periodic_task(
diff --git a/application/worker.py b/application/worker.py
index bbd422ac..4782a83b 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import os
 import shutil
@@ -7,15 +8,20 @@ from collections import Counter
 from urllib.parse import urljoin
 
 import requests
+from bson.dbref import DBRef
 from bson.objectid import ObjectId
 
+from application.agents.agent_creator import AgentCreator
+from application.api.answer.routes import get_prompt
+
 from application.core.mongo_db import MongoDB
 from application.core.settings import settings
-from application.parser.file.bulk import SimpleDirectoryReader
+from application.parser.chunking import Chunker
 from application.parser.embedding_pipeline import embed_and_store_documents
+from application.parser.file.bulk import SimpleDirectoryReader
 from application.parser.remote.remote_creator import RemoteCreator
 from application.parser.schema.base import Document
-from application.parser.chunking import Chunker
+from application.retriever.retriever_creator import RetrieverCreator
 from application.utils import count_tokens_docs
 
 mongo = MongoDB.get_client()
@@ -27,18 +33,22 @@ MIN_TOKENS = 150
 MAX_TOKENS = 1250
 RECURSION_DEPTH = 2
 
+
 # Define a function to extract metadata from a given filename.
 def metadata_from_filename(title):
     return {"title": title}
 
+
 # Define a function to generate a random string of a given length.
 def generate_random_string(length):
     return "".join([string.ascii_letters[i % 52] for i in range(length)])
 
+
 current_dir = os.path.dirname(
     os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 )
 
+
 def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
     """
     Recursively extract zip files with a limit on recursion depth.
@@ -69,6 +79,7 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
                 file_path = os.path.join(root, file)
                 extract_zip_recursive(file_path, root, current_depth + 1, max_depth)
 
+
 def download_file(url, params, dest_path):
     try:
         response = requests.get(url, params=params)
@@ -79,6 +90,7 @@ def download_file(url, params, dest_path):
         logging.error(f"Error downloading file: {e}")
         raise
 
+
 def upload_index(full_path, file_data):
     try:
         if settings.VECTOR_STORE == "faiss":
@@ -87,7 +99,9 @@ def upload_index(full_path, file_data):
                 "file_pkl": open(full_path + "/index.pkl", "rb"),
             }
             response = requests.post(
-                urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
+                urljoin(settings.API_URL, "/api/upload_index"),
+                files=files,
+                data=file_data,
             )
         else:
             response = requests.post(
@@ -102,6 +116,75 @@ def upload_index(full_path, file_data):
             for file in files.values():
                 file.close()
 
+
+def run_agent_logic(agent_config, input_data):
+    try:
+        source = agent_config.get("source")
+        retriever = agent_config.get("retriever", "classic")
+        if isinstance(source, DBRef):
+            source_doc = db.dereference(source)
+            source = str(source_doc["_id"])
+            retriever = source_doc.get("retriever", agent_config.get("retriever"))
+        else:
+            source = {}
+        source = {"active_docs": source}
+        chunks = int(agent_config.get("chunks", 2))
+        prompt_id = agent_config.get("prompt_id", "default")
+        user_api_key = agent_config["key"]
+        agent_type = agent_config.get("agent_type", "classic")
+        decoded_token = {"sub": agent_config.get("user")}
+        prompt = get_prompt(prompt_id)
+        agent = AgentCreator.create_agent(
+            agent_type,
+            endpoint="webhook",
+            llm_name=settings.LLM_NAME,
+            gpt_model=settings.MODEL_NAME,
+            api_key=settings.API_KEY,
+            user_api_key=user_api_key,
+            prompt=prompt,
+            chat_history=[],
+            decoded_token=decoded_token,
+            attachments=[],
+        )
+        retriever = RetrieverCreator.create_retriever(
+            retriever,
+            source=source,
+            chat_history=[],
+            prompt=prompt,
+            chunks=chunks,
+            token_limit=settings.DEFAULT_MAX_HISTORY,
+            gpt_model=settings.MODEL_NAME,
+            user_api_key=user_api_key,
+            decoded_token=decoded_token,
+        )
+        answer = agent.gen(query=input_data, retriever=retriever)
+        response_full = ""
+        thought = ""
+        source_log_docs = []
+        tool_calls = []
+
+        for line in answer:
+            if "answer" in line:
+                response_full += str(line["answer"])
+            elif "sources" in line:
+                source_log_docs.extend(line["sources"])
+            elif "tool_calls" in line:
+                tool_calls.extend(line["tool_calls"])
+            elif "thought" in line:
+                thought += line["thought"]
+
+        result = {
+            "answer": response_full,
+            "sources": source_log_docs,
+            "tool_calls": tool_calls,
+            "thought": thought,
+        }
+        return result
+    except Exception as e:
+        logging.error(f"Error in run_agent_logic: {e}", exc_info=True)
+        raise
+
+
 # Define the main function for ingesting and processing documents.
 def ingest_worker(
     self, directory, formats, name_job, filename, user, retriever="classic"
@@ -133,7 +216,11 @@ def ingest_worker(
 
     if not os.path.exists(full_path):
         os.makedirs(full_path)
-    download_file(urljoin(settings.API_URL, "/api/download"), file_data, os.path.join(full_path, filename))
+    download_file(
+        urljoin(settings.API_URL, "/api/download"),
+        file_data,
+        os.path.join(full_path, filename),
+    )
 
     # check if file is .zip and extract it
     if filename.endswith(".zip"):
@@ -157,7 +244,7 @@ def ingest_worker(
         chunking_strategy="classic_chunk",
         max_tokens=MAX_TOKENS,
         min_tokens=MIN_TOKENS,
-        duplicate_headers=False
+        duplicate_headers=False,
     )
     raw_docs = chunker.chunk(documents=raw_docs)
 
@@ -172,12 +259,14 @@ def ingest_worker(
         for i in range(min(5, len(raw_docs))):
             logging.info(f"Sample document {i}: {raw_docs[i]}")
 
-    file_data.update({
-        "tokens": tokens,
-        "retriever": retriever,
-        "id": str(id),
-        "type": "local",
-    })
+    file_data.update(
+        {
+            "tokens": tokens,
+            "retriever": retriever,
+            "id": str(id),
+            "type": "local",
+        }
+    )
     upload_index(full_path, file_data)
 
     # delete local
@@ -192,6 +281,7 @@ def ingest_worker(
         "limited": False,
     }
 
+
 def remote_worker(
     self,
     source_data,
@@ -203,7 +293,7 @@ def remote_worker(
     sync_frequency="never",
     operation_mode="upload",
     doc_id=None,
-):  
+):
     full_path = os.path.join(directory, user, name_job)
     if not os.path.exists(full_path):
         os.makedirs(full_path)
@@ -218,7 +308,7 @@ def remote_worker(
             chunking_strategy="classic_chunk",
             max_tokens=MAX_TOKENS,
             min_tokens=MIN_TOKENS,
-            duplicate_headers=False
+            duplicate_headers=False,
         )
         docs = chunker.chunk(documents=raw_docs)
         docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
@@ -260,6 +350,7 @@ def remote_worker(
     logging.info("remote_worker task completed successfully")
     return {"urls": source_data, "name_job": name_job, "user": user, "limited": False}
 
+
 def sync(
     self,
     source_data,
@@ -289,6 +380,7 @@ def sync(
         return {"status": "error", "error": str(e)}
     return {"status": "success"}
 
+
 def sync_worker(self, frequency):
     sync_counts = Counter()
     sources = sources_collection.find()
@@ -313,84 +405,137 @@ def sync_worker(self, frequency):
         for key in ["total_sync_count", "sync_success", "sync_failure"]
     }
 
+
 def attachment_worker(self, directory, file_info, user):
     """
     Process and store a single attachment without vectorization.
-    
+
     Args:
         self: Reference to the instance of the task.
         directory (str): Base directory for storing files.
         file_info (dict): Dictionary with folder and filename info.
         user (str): User identifier.
-        
+
     Returns:
         dict: Information about processed attachment.
     """
     import datetime
-    import os
     import mimetypes
+    import os
+
     from application.utils import num_tokens_from_string
-    
+
     mongo = MongoDB.get_client()
     db = mongo["docsgpt"]
     attachments_collection = db["attachments"]
-    
+
     filename = file_info["filename"]
     attachment_id = file_info["attachment_id"]
-    
-    logging.info(f"Processing attachment: {attachment_id}/{filename}", extra={"user": user})
-    
+
+    logging.info(
+        f"Processing attachment: {attachment_id}/{filename}", extra={"user": user}
+    )
+
     self.update_state(state="PROGRESS", meta={"current": 10})
-    
+
     file_path = os.path.join(directory, filename)
-    
+
     if not os.path.exists(file_path):
         logging.warning(f"File not found: {file_path}", extra={"user": user})
         raise FileNotFoundError(f"File not found: {file_path}")
-    
+
     try:
-        reader = SimpleDirectoryReader(
-            input_files=[file_path]
-        )
+        reader = SimpleDirectoryReader(input_files=[file_path])
         documents = reader.load_data()
-        
+
         self.update_state(state="PROGRESS", meta={"current": 50})
-        
+
         if documents:
             content = documents[0].text
             token_count = num_tokens_from_string(content)
-            
+
             file_path_relative = f"{settings.UPLOAD_FOLDER}/{user}/attachments/{attachment_id}/{filename}"
-            
-            mime_type = mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
-            
+
+            mime_type = mimetypes.guess_type(file_path)[0] or "application/octet-stream"
+
             doc_id = ObjectId(attachment_id)
-            attachments_collection.insert_one({
-                "_id": doc_id,
-                "user": user,
-                "path": file_path_relative,
-                "content": content,
-                "token_count": token_count,
-                "mime_type": mime_type,
-                "date": datetime.datetime.now(),
-            })
-            
-            logging.info(f"Stored attachment with ID: {attachment_id}", 
-                        extra={"user": user})
-            
+            attachments_collection.insert_one(
+                {
+                    "_id": doc_id,
+                    "user": user,
+                    "path": file_path_relative,
+                    "content": content,
+                    "token_count": token_count,
+                    "mime_type": mime_type,
+                    "date": datetime.datetime.now(),
+                }
+            )
+
+            logging.info(
+                f"Stored attachment with ID: {attachment_id}", extra={"user": user}
+            )
+
             self.update_state(state="PROGRESS", meta={"current": 100})
-            
+
             return {
                 "filename": filename,
                 "path": file_path_relative,
                 "token_count": token_count,
                 "attachment_id": attachment_id,
-                "mime_type": mime_type
+                "mime_type": mime_type,
             }
         else:
-            logging.warning("No content was extracted from the file", 
-                           extra={"user": user})
+            logging.warning(
+                "No content was extracted from the file", extra={"user": user}
+            )
             raise ValueError("No content was extracted from the file")
     except Exception as e:
-        logging.error(f"Error processing file {filename}: {e}", extra={"user": user}, exc_info=True)
+        logging.error(
+            f"Error processing file {filename}: {e}",
+            extra={"user": user},
+            exc_info=True,
+        )
         raise
+
+
+def agent_webhook_worker(self, agent_id, payload):
+    """
+    Process the webhook payload for an agent.
+
+    Args:
+        self: Reference to the instance of the task.
+        agent_id (str): Unique identifier for the agent.
+        payload (dict): The payload data from the webhook.
+
+    Returns:
+        dict: Information about the processed webhook.
+    """
+    mongo = MongoDB.get_client()
+    db = mongo["docsgpt"]
+    agents_collection = db["agents"]
+
+    self.update_state(state="PROGRESS", meta={"current": 1})
+    try:
+        agent_oid = ObjectId(agent_id)
+        agent_config = agents_collection.find_one({"_id": agent_oid})
+        if not agent_config:
+            raise ValueError(f"Agent with ID {agent_id} not found.")
+        input_data = payload.get("query", "")
+        if input_data is None or not isinstance(input_data, str):
+            input_data = json.dumps(payload)
+    except Exception as e:
+        logging.error(f"Error processing agent webhook: {e}", exc_info=True)
+        return {"status": "error", "error": str(e)}
+
+    self.update_state(state="PROGRESS", meta={"current": 50})
+    try:
+        result = run_agent_logic(agent_config, input_data)
+    except Exception as e:
+        logging.error(f"Error running agent logic: {e}", exc_info=True)
+        return {"status": "error", "error": str(e)}
+    finally:
+        self.update_state(state="PROGRESS", meta={"current": 100})
+        logging.info(
+            f"Webhook processed for agent {agent_id}", extra={"agent_id": agent_id}
+        )
+        return {"status": "success", "result": result}
diff --git a/frontend/src/Navigation.tsx b/frontend/src/Navigation.tsx
index 0e357a6d..53487dd6 100644
--- a/frontend/src/Navigation.tsx
+++ b/frontend/src/Navigation.tsx
@@ -44,6 +44,7 @@ import {
   setModalStateDeleteConv,
   setSelectedAgent,
   setAgents,
+  selectAgents,
 } from './preferences/preferenceSlice';
 import Upload from './upload/Upload';
 
@@ -63,6 +64,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
   const conversations = useSelector(selectConversations);
   const conversationId = useSelector(selectConversationId);
   const modalStateDeleteConv = useSelector(selectModalStateDeleteConv);
+  const agents = useSelector(selectAgents);
   const selectedAgent = useSelector(selectSelectedAgent);
 
   const { isMobile } = useMediaQuery();
@@ -76,6 +78,31 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
 
   const navRef = useRef(null);
 
+  async function fetchRecentAgents() {
+    try {
+      let recentAgents: Agent[] = [];
+      if (!agents) {
+        const response = await userService.getAgents(token);
+        if (!response.ok) throw new Error('Failed to fetch agents');
+        const data: Agent[] = await response.json();
+        dispatch(setAgents(data));
+        recentAgents = data;
+      } else recentAgents = agents;
+      setRecentAgents(
+        recentAgents
+          .filter((agent: Agent) => agent.status === 'published')
+          .sort(
+            (a: Agent, b: Agent) =>
+              new Date(b.last_used_at ?? 0).getTime() -
+              new Date(a.last_used_at ?? 0).getTime(),
+          )
+          .slice(0, 3),
+      );
+    } catch (error) {
+      console.error('Failed to fetch recent agents: ', error);
+    }
+  }
+
   async function fetchConversations() {
     dispatch(setConversations({ ...conversations, loading: true }));
     return await getConversations(token)
@@ -88,25 +115,11 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
       });
   }
 
-  async function getAgents() {
-    const response = await userService.getAgents(token);
-    if (!response.ok) throw new Error('Failed to fetch agents');
-    const data: Agent[] = await response.json();
-    dispatch(setAgents(data));
-    setRecentAgents(
-      data
-        .filter((agent: Agent) => agent.status === 'published')
-        .sort(
-          (a: Agent, b: Agent) =>
-            new Date(b.last_used_at ?? 0).getTime() -
-            new Date(a.last_used_at ?? 0).getTime(),
-        )
-        .slice(0, 3),
-    );
-  }
+  useEffect(() => {
+    if (token) fetchRecentAgents();
+  }, [agents, token, dispatch]);
 
   useEffect(() => {
-    if (recentAgents.length === 0) getAgents();
     if (!conversations?.data) fetchConversations();
     if (queries.length === 0) resetConversation();
   }, [conversations?.data, dispatch]);
diff --git a/frontend/src/agents/AgentPreview.tsx b/frontend/src/agents/AgentPreview.tsx
index 5eaf10a9..621ac477 100644
--- a/frontend/src/agents/AgentPreview.tsx
+++ b/frontend/src/agents/AgentPreview.tsx
@@ -141,6 +141,7 @@ export default function AgentPreview() {
             loading={status === 'loading'}
             showSourceButton={selectedAgent ? false : true}
             showToolButton={selectedAgent ? false : true}
+            autoFocus={false}
           />
           <p className="w-full self-center bg-transparent pt-2 text-center text-xs text-gray-4000 dark:text-sonic-silver md:inline">
             This is a preview of the agent. You can publish it to start using it
diff --git a/frontend/src/agents/NewAgent.tsx b/frontend/src/agents/NewAgent.tsx
index 37466a86..3aa1bf7d 100644
--- a/frontend/src/agents/NewAgent.tsx
+++ b/frontend/src/agents/NewAgent.tsx
@@ -155,9 +155,10 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
     const data = await response.json();
     if (data.id) setAgent((prev) => ({ ...prev, id: data.id }));
     if (data.key) setAgent((prev) => ({ ...prev, key: data.key }));
-    if (effectiveMode === 'new') {
-      setAgentDetails('ACTIVE');
+    if (effectiveMode === 'new' || effectiveMode === 'draft') {
       setEffectiveMode('edit');
+      setAgent((prev) => ({ ...prev, status: 'published' }));
+      setAgentDetails('ACTIVE');
     }
   };
 
@@ -408,7 +409,7 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
                     agent.prompt_id
                       ? prompts.filter(
                           (prompt) => prompt.id === agent.prompt_id,
-                        )[0].name || null
+                        )[0]?.name || null
                       : null
                   }
                   onSelect={(option: { label: string; value: string }) =>
@@ -532,7 +533,7 @@ function AgentPreviewArea() {
   const selectedAgent = useSelector(selectSelectedAgent);
   return (
     <div className="h-full w-full rounded-[30px] border border-[#F6F6F6] bg-white dark:border-[#7E7E7E] dark:bg-[#222327] max-[1180px]:h-[48rem]">
-      {selectedAgent?.id ? (
+      {selectedAgent?.status === 'published' ? (
         <div className="flex h-full w-full flex-col justify-end overflow-auto rounded-[30px]">
           <AgentPreview />
         </div>
@@ -540,7 +541,7 @@ function AgentPreviewArea() {
         <div className="flex h-full w-full flex-col items-center justify-center gap-2">
           <span className="block h-12 w-12 bg-[url('/src/assets/science-spark.svg')] bg-contain bg-center bg-no-repeat transition-all dark:bg-[url('/src/assets/science-spark-dark.svg')]" />{' '}
           <p className="text-xs text-[#18181B] dark:text-[#949494]">
-            Published agents can be previewd here
+            Published agents can be previewed here
           </p>
         </div>
       )}
diff --git a/frontend/src/agents/index.tsx b/frontend/src/agents/index.tsx
index 49123cd6..0ceef669 100644
--- a/frontend/src/agents/index.tsx
+++ b/frontend/src/agents/index.tsx
@@ -12,7 +12,13 @@ import ThreeDots from '../assets/three-dots.svg';
 import ContextMenu, { MenuOption } from '../components/ContextMenu';
 import ConfirmationModal from '../modals/ConfirmationModal';
 import { ActiveState } from '../models/misc';
-import { selectToken, setSelectedAgent } from '../preferences/preferenceSlice';
+import {
+  selectToken,
+  setSelectedAgent,
+  setAgents,
+  selectAgents,
+  selectSelectedAgent,
+} from '../preferences/preferenceSlice';
 import AgentLogs from './AgentLogs';
 import NewAgent from './NewAgent';
 import { Agent } from './types';
@@ -31,9 +37,12 @@ export default function Agents() {
 
 function AgentsList() {
   const navigate = useNavigate();
+  const dispatch = useDispatch();
   const token = useSelector(selectToken);
+  const agents = useSelector(selectAgents);
+  const selectedAgent = useSelector(selectSelectedAgent);
 
-  const [userAgents, setUserAgents] = useState<Agent[]>([]);
+  const [userAgents, setUserAgents] = useState<Agent[]>(agents || []);
   const [loading, setLoading] = useState<boolean>(true);
 
   const getAgents = async () => {
@@ -43,6 +52,7 @@ function AgentsList() {
       if (!response.ok) throw new Error('Failed to fetch agents');
       const data = await response.json();
       setUserAgents(data);
+      dispatch(setAgents(data));
       setLoading(false);
     } catch (error) {
       console.error('Error:', error);
@@ -52,6 +62,7 @@ function AgentsList() {
 
   useEffect(() => {
     getAgents();
+    if (selectedAgent) dispatch(setSelectedAgent(null));
   }, [token]);
   return (
     <div className="p-4 md:p-12">
@@ -62,6 +73,7 @@ function AgentsList() {
         Discover and create custom versions of DocsGPT that combine
         instructions, extra knowledge, and any combination of skills.
       </p>
+      {/* Premade agents section */}
       {/* <div className="mt-6">
         <h2 className="text-[18px] font-semibold text-[#18181B] dark:text-[#E0E0E0]">
           Premade by DocsGPT
@@ -200,8 +212,10 @@ function AgentCard({
   ];
 
   const handleClick = () => {
-    dispatch(setSelectedAgent(agent));
-    navigate(`/`);
+    if (agent.status === 'published') {
+      dispatch(setSelectedAgent(agent));
+      navigate(`/`);
+    }
   };
 
   const handleDelete = async (agentId: string) => {
@@ -214,8 +228,11 @@ function AgentCard({
   };
   return (
     <div
-      className="relative flex h-44 w-48 cursor-pointer flex-col justify-between rounded-[1.2rem] bg-[#F6F6F6] px-6 py-5 dark:bg-[#383838]"
-      onClick={(e) => handleClick()}
+      className={`relative flex h-44 w-48 flex-col justify-between rounded-[1.2rem] bg-[#F6F6F6] px-6 py-5 hover:bg-[#ECECEC] dark:bg-[#383838] hover:dark:bg-[#383838]/80 ${agent.status === 'published' && 'cursor-pointer'}`}
+      onClick={(e) => {
+        e.stopPropagation();
+        handleClick();
+      }}
     >
       <div
         ref={menuRef}
diff --git a/frontend/src/api/endpoints.ts b/frontend/src/api/endpoints.ts
index 80f03056..222bdb15 100644
--- a/frontend/src/api/endpoints.ts
+++ b/frontend/src/api/endpoints.ts
@@ -13,6 +13,7 @@ const endpoints = {
     CREATE_AGENT: '/api/create_agent',
     UPDATE_AGENT: (agent_id: string) => `/api/update_agent/${agent_id}`,
     DELETE_AGENT: (id: string) => `/api/delete_agent?id=${id}`,
+    AGENT_WEBHOOK: (id: string) => `/api/agent_webhook?id=${id}`,
     PROMPTS: '/api/get_prompts',
     CREATE_PROMPT: '/api/create_prompt',
     DELETE_PROMPT: '/api/delete_prompt',
diff --git a/frontend/src/api/services/userService.ts b/frontend/src/api/services/userService.ts
index bbe20b10..4a0f45d8 100644
--- a/frontend/src/api/services/userService.ts
+++ b/frontend/src/api/services/userService.ts
@@ -31,6 +31,8 @@ const userService = {
     apiClient.put(endpoints.USER.UPDATE_AGENT(agent_id), data, token),
   deleteAgent: (id: string, token: string | null): Promise<any> =>
     apiClient.delete(endpoints.USER.DELETE_AGENT(id), token),
+  getAgentWebhook: (id: string, token: string | null): Promise<any> =>
+    apiClient.get(endpoints.USER.AGENT_WEBHOOK(id), token),
   getPrompts: (token: string | null): Promise<any> =>
     apiClient.get(endpoints.USER.PROMPTS, token),
   createPrompt: (data: any, token: string | null): Promise<any> =>
diff --git a/frontend/src/components/MessageInput.tsx b/frontend/src/components/MessageInput.tsx
index e7ef7f9d..60cd4b81 100644
--- a/frontend/src/components/MessageInput.tsx
+++ b/frontend/src/components/MessageInput.tsx
@@ -36,15 +36,7 @@ type MessageInputProps = {
   loading: boolean;
   showSourceButton?: boolean;
   showToolButton?: boolean;
-};
-
-type UploadState = {
-  taskId: string;
-  fileName: string;
-  progress: number;
-  attachment_id?: string;
-  token_count?: number;
-  status: 'uploading' | 'processing' | 'completed' | 'failed';
+  autoFocus?: boolean;
 };
 
 export default function MessageInput({
@@ -54,6 +46,7 @@ export default function MessageInput({
   loading,
   showSourceButton = true,
   showToolButton = true,
+  autoFocus = true,
 }: MessageInputProps) {
   const { t } = useTranslation();
   const [isDarkTheme] = useDarkTheme();
@@ -235,7 +228,7 @@ export default function MessageInput({
   };
 
   useEffect(() => {
-    inputRef.current?.focus();
+    if (autoFocus) inputRef.current?.focus();
     handleInput();
   }, []);
 
diff --git a/frontend/src/modals/AgentDetailsModal.tsx b/frontend/src/modals/AgentDetailsModal.tsx
index 377dd7bd..c1a8c131 100644
--- a/frontend/src/modals/AgentDetailsModal.tsx
+++ b/frontend/src/modals/AgentDetailsModal.tsx
@@ -1,7 +1,12 @@
+import { useState } from 'react';
+import { useSelector } from 'react-redux';
+
 import { Agent } from '../agents/types';
 import { ActiveState } from '../models/misc';
 import WrapperModal from './WrapperModal';
-import { useNavigate } from 'react-router-dom';
+import userService from '../api/services/userService';
+import { selectToken } from '../preferences/preferenceSlice';
+import Spinner from '../components/Spinner';
 
 type AgentDetailsModalProps = {
   agent: Agent;
@@ -16,13 +21,41 @@ export default function AgentDetailsModal({
   modalState,
   setModalState,
 }: AgentDetailsModalProps) {
-  const navigate = useNavigate();
+  const token = useSelector(selectToken);
+
+  const [publicLink, setPublicLink] = useState<string | null>(null);
+  const [apiKey, setApiKey] = useState<string | null>(null);
+  const [webhookUrl, setWebhookUrl] = useState<string | null>(null);
+  const [loadingStates, setLoadingStates] = useState({
+    publicLink: false,
+    apiKey: false,
+    webhook: false,
+  });
+
+  const setLoading = (
+    key: 'publicLink' | 'apiKey' | 'webhook',
+    state: boolean,
+  ) => {
+    setLoadingStates((prev) => ({ ...prev, [key]: state }));
+  };
+
+  const handleGenerateWebhook = async () => {
+    setLoading('webhook', true);
+    const response = await userService.getAgentWebhook(agent.id ?? '', token);
+    if (!response.ok) {
+      setLoading('webhook', false);
+      return;
+    }
+    const data = await response.json();
+    setWebhookUrl(data.webhook_url);
+    setLoading('webhook', false);
+  };
+
   if (modalState !== 'ACTIVE') return null;
   return (
     <WrapperModal
       className="sm:w-[512px]"
       close={() => {
-        // if (mode === 'new') navigate('/agents');
         setModalState('INACTIVE');
       }}
     >
@@ -57,9 +90,23 @@ export default function AgentDetailsModal({
             <h2 className="text-base font-semibold text-jet dark:text-bright-gray">
               Webhooks
             </h2>
-            <button className="hover:bg-vi</button>olets-are-blue w-28 rounded-3xl border border-solid border-violets-are-blue px-5 py-2 text-sm font-medium text-violets-are-blue transition-colors hover:bg-violets-are-blue hover:text-white">
-              Generate
-            </button>
+            {webhookUrl ? (
+              <div className="flex flex-wrap items-center gap-2">
+                <span className="font-mono text-sm text-gray-700 dark:text-[#ECECF1]">
+                  {webhookUrl}
+                </span>
+                <button className="hover:bg-vi</button>olets-are-blue w-28 rounded-3xl border border-solid border-violets-are-blue px-5 py-2 text-sm font-medium text-violets-are-blue transition-colors hover:bg-violets-are-blue hover:text-white">
+                  Copy
+                </button>
+              </div>
+            ) : (
+              <button
+                className="hover:bg-vi</button>olets-are-blue w-28 rounded-3xl border border-solid border-violets-are-blue px-5 py-2 text-sm font-medium text-violets-are-blue transition-colors hover:bg-violets-are-blue hover:text-white"
+                onClick={handleGenerateWebhook}
+              >
+                {loadingStates.webhook ? <Spinner /> : 'Generate'}
+              </button>
+            )}
           </div>
         </div>
       </div>
diff --git a/frontend/src/modals/ConfirmationModal.tsx b/frontend/src/modals/ConfirmationModal.tsx
index 25f8c2da..28151736 100644
--- a/frontend/src/modals/ConfirmationModal.tsx
+++ b/frontend/src/modals/ConfirmationModal.tsx
@@ -40,19 +40,23 @@ export default function ConfirmationModal({
         >
           <div className="relative">
             <div>
-              <p className="font-base mb-1 w-[90%] text-lg break-words text-jet dark:text-bright-gray">
+              <p className="font-base mb-1 w-[90%] break-words text-lg text-jet dark:text-bright-gray">
                 {message}
               </p>
               <div>
                 <div className="mt-6 flex flex-row-reverse gap-1">
                   <button
-                    onClick={handleSubmit}
+                    onClick={(e) => {
+                      e.stopPropagation();
+                      handleSubmit();
+                    }}
                     className={submitButtonClasses}
                   >
                     {submitLabel}
                   </button>
                   <button
-                    onClick={() => {
+                    onClick={(e) => {
+                      e.stopPropagation();
                       setModalState('INACTIVE');
                       handleCancel && handleCancel();
                     }}

From 4979e1ac9a70a1b86de280b04fd03d2115bb1782 Mon Sep 17 00:00:00 2001
From: Siddhant Rai <siddhant.rai.5686@gmail.com>
Date: Mon, 28 Apr 2025 14:18:28 +0530
Subject: [PATCH 26/39] feat: add clsx dependency, enhance logging in agent
 logic, and improve agent logs component

---
 application/agents/base.py                    |   4 +-
 application/agents/classic_agent.py           |  10 +-
 application/agents/react_agent.py             |   4 +
 application/logging.py                        |   6 +-
 application/worker.py                         |   1 +
 frontend/package-lock.json                    |  17 ++
 frontend/package.json                         |   1 +
 frontend/src/agents/AgentLogs.tsx             |  53 +++++-
 frontend/src/agents/NewAgent.tsx              |   8 +-
 frontend/src/agents/index.tsx                 |   4 +
 frontend/src/assets/monitoring-purple.svg     |   3 +
 frontend/src/assets/monitoring-white.svg      |   3 +
 frontend/src/components/CopyButton.tsx        | 160 +++++++++++++-----
 .../src/conversation/ConversationBubble.tsx   |  20 +--
 frontend/src/modals/AgentDetailsModal.tsx     |  37 ++--
 frontend/src/settings/Analytics.tsx           |  41 +----
 frontend/src/settings/Logs.tsx                |   3 +-
 17 files changed, 254 insertions(+), 121 deletions(-)
 create mode 100644 frontend/src/assets/monitoring-purple.svg
 create mode 100644 frontend/src/assets/monitoring-white.svg

diff --git a/application/agents/base.py b/application/agents/base.py
index 64fac17b..b3797fc6 100644
--- a/application/agents/base.py
+++ b/application/agents/base.py
@@ -255,7 +255,7 @@ class BaseAgent(ABC):
             model=self.gpt_model, messages=messages, tools=self.tools
         )
         if log_context:
-            data = build_stack_data(self.llm)
+            data = build_stack_data(self.llm, exclude_attributes=["client"])
             log_context.stacks.append({"component": "llm", "data": data})
         return resp
 
@@ -271,6 +271,6 @@ class BaseAgent(ABC):
             self, resp, tools_dict, messages, attachments
         )
         if log_context:
-            data = build_stack_data(self.llm_handler)
+            data = build_stack_data(self.llm_handler, exclude_attributes=["tool_calls"])
             log_context.stacks.append({"component": "llm_handler", "data": data})
         return resp
diff --git a/application/agents/classic_agent.py b/application/agents/classic_agent.py
index bf472cd0..b96a77fc 100644
--- a/application/agents/classic_agent.py
+++ b/application/agents/classic_agent.py
@@ -48,15 +48,13 @@ class ClassicAgent(BaseAgent):
         ):
             yield {"answer": resp.message.content}
         else:
-            # completion = self.llm.gen_stream(
-            #     model=self.gpt_model, messages=messages, tools=self.tools
-            # )
-            # log type of resp
-            logger.info(f"Response type: {type(resp)}")
-            logger.info(f"Response: {resp}")
             for line in resp:
                 if isinstance(line, str):
                     yield {"answer": line}
 
+        log_context.stacks.append(
+            {"component": "agent", "data": {"tool_calls": self.tool_calls.copy()}}
+        )
+
         yield {"sources": retrieved_data}
         yield {"tool_calls": self.tool_calls.copy()}
diff --git a/application/agents/react_agent.py b/application/agents/react_agent.py
index 3fae1fda..a5d47850 100644
--- a/application/agents/react_agent.py
+++ b/application/agents/react_agent.py
@@ -82,6 +82,10 @@ class ReActAgent(BaseAgent):
                 if isinstance(line, str):
                     self.observations.append(line)
 
+        log_context.stacks.append(
+            {"component": "agent", "data": {"tool_calls": self.tool_calls.copy()}}
+        )
+
         yield {"sources": retrieved_data}
         yield {"tool_calls": self.tool_calls.copy()}
 
diff --git a/application/logging.py b/application/logging.py
index 1dd0d557..eaf43d7c 100644
--- a/application/logging.py
+++ b/application/logging.py
@@ -29,6 +29,8 @@ def build_stack_data(
     exclude_attributes: List[str] = None,
     custom_data: Dict = None,
 ) -> Dict:
+    if obj is None:
+        raise ValueError("The 'obj' parameter cannot be None")
     data = {}
     if include_attributes is None:
         include_attributes = []
@@ -56,8 +58,8 @@ def build_stack_data(
                         data[attr_name] = [str(item) for item in attr_value]
                 elif isinstance(attr_value, dict):
                     data[attr_name] = {k: str(v) for k, v in attr_value.items()}
-                else:
-                    data[attr_name] = str(attr_value)
+        except AttributeError as e:
+            logging.warning(f"AttributeError while accessing {attr_name}: {e}")
         except AttributeError:
             pass
     if custom_data:
diff --git a/application/worker.py b/application/worker.py
index 4782a83b..537206b7 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -179,6 +179,7 @@ def run_agent_logic(agent_config, input_data):
             "tool_calls": tool_calls,
             "thought": thought,
         }
+        logging.info(f"Agent response: {result}")
         return result
     except Exception as e:
         logging.error(f"Error in run_agent_logic: {e}", exc_info=True)
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index 043bbf58..fa250e66 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -10,6 +10,7 @@
       "dependencies": {
         "@reduxjs/toolkit": "^2.5.1",
         "chart.js": "^4.4.4",
+        "clsx": "^2.1.1",
         "i18next": "^24.2.0",
         "i18next-browser-languagedetector": "^8.0.2",
         "prop-types": "^15.8.1",
@@ -2751,6 +2752,15 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/clsx": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz",
+      "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
     "node_modules/color-name": {
       "version": "1.1.4",
       "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
@@ -9405,6 +9415,13 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/undici-types": {
+      "version": "6.21.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
+      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/unified": {
       "version": "11.0.5",
       "resolved": "https://registry.npmjs.org/unified/-/unified-11.0.5.tgz",
diff --git a/frontend/package.json b/frontend/package.json
index 45058e98..62afaad3 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -21,6 +21,7 @@
   "dependencies": {
     "@reduxjs/toolkit": "^2.5.1",
     "chart.js": "^4.4.4",
+    "clsx": "^2.1.1",
     "i18next": "^24.2.0",
     "i18next-browser-languagedetector": "^8.0.2",
     "prop-types": "^15.8.1",
diff --git a/frontend/src/agents/AgentLogs.tsx b/frontend/src/agents/AgentLogs.tsx
index 3773e54f..864a85fe 100644
--- a/frontend/src/agents/AgentLogs.tsx
+++ b/frontend/src/agents/AgentLogs.tsx
@@ -1,12 +1,40 @@
+import { useEffect, useState } from 'react';
+import { useSelector } from 'react-redux';
 import { useNavigate, useParams } from 'react-router-dom';
 
+import userService from '../api/services/userService';
 import ArrowLeft from '../assets/arrow-left.svg';
+import { selectToken } from '../preferences/preferenceSlice';
 import Analytics from '../settings/Analytics';
 import Logs from '../settings/Logs';
+import Spinner from '../components/Spinner';
+import { Agent } from './types';
 
 export default function AgentLogs() {
   const navigate = useNavigate();
   const { agentId } = useParams();
+  const token = useSelector(selectToken);
+
+  const [agent, setAgent] = useState<Agent>();
+  const [loadingAgent, setLoadingAgent] = useState<boolean>(true);
+
+  const fetchAgent = async (agentId: string) => {
+    setLoadingAgent(true);
+    try {
+      const response = await userService.getAgent(agentId ?? '', token);
+      if (!response.ok) throw new Error('Failed to fetch Chatbots');
+      const agent = await response.json();
+      setAgent(agent);
+    } catch (error) {
+      console.error(error);
+    } finally {
+      setLoadingAgent(false);
+    }
+  };
+
+  useEffect(() => {
+    if (agentId) fetchAgent(agentId);
+  }, [agentId, token]);
   return (
     <div className="p-4 md:p-12">
       <div className="flex items-center gap-3 px-4">
@@ -25,8 +53,29 @@ export default function AgentLogs() {
           Agent Logs
         </h1>
       </div>
-      <Analytics agentId={agentId} />
-      <Logs agentId={agentId} tableHeader="Agent endpoint logs" />
+      <div className="mt-6 flex flex-col gap-3 px-4">
+        <h2 className="text-sm font-semibold text-black dark:text-[#E0E0E0]">
+          Agent Name
+        </h2>
+        {agent && (
+          <p className="text-[#28292E] dark:text-[#E0E0E0]">{agent.name}</p>
+        )}
+      </div>
+      {loadingAgent ? (
+        <div className="flex h-[345px] w-full items-center justify-center">
+          <Spinner />
+        </div>
+      ) : (
+        agent && <Analytics agentId={agent.id} />
+      )}
+      {loadingAgent ? (
+        <div className="flex h-[55vh] w-full items-center justify-center">
+          {' '}
+          <Spinner />
+        </div>
+      ) : (
+        agent && <Logs agentId={agentId} tableHeader="Agent endpoint logs" />
+      )}
     </div>
   );
 }
diff --git a/frontend/src/agents/NewAgent.tsx b/frontend/src/agents/NewAgent.tsx
index 3aa1bf7d..0cccfbe6 100644
--- a/frontend/src/agents/NewAgent.tsx
+++ b/frontend/src/agents/NewAgent.tsx
@@ -11,10 +11,7 @@ import AgentDetailsModal from '../modals/AgentDetailsModal';
 import ConfirmationModal from '../modals/ConfirmationModal';
 import { ActiveState, Doc, Prompt } from '../models/misc';
 import {
-  selectSelectedAgent,
-  selectSourceDocs,
-  selectToken,
-  setSelectedAgent,
+    selectSelectedAgent, selectSourceDocs, selectToken, setSelectedAgent
 } from '../preferences/preferenceSlice';
 import PromptsModal from '../preferences/PromptsModal';
 import { UserToolType } from '../settings/types';
@@ -287,9 +284,10 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
           )}
           {modeConfig[effectiveMode].showAccessDetails && (
             <button
-              className="hover:bg-vi</button>olets-are-blue rounded-3xl border border-solid border-violets-are-blue px-5 py-2 text-sm font-medium text-violets-are-blue transition-colors hover:bg-violets-are-blue hover:text-white"
+              className="group flex items-center gap-2 rounded-3xl border border-solid border-violets-are-blue px-5 py-2 text-sm font-medium text-violets-are-blue transition-colors hover:bg-violets-are-blue hover:text-white"
               onClick={() => navigate(`/agents/logs/${agent.id}`)}
             >
+              <span className="block h-5 w-5 bg-[url('/src/assets/monitoring-purple.svg')] bg-contain bg-center bg-no-repeat transition-all group-hover:bg-[url('/src/assets/monitoring-white.svg')]" />
               Logs
             </button>
           )}
diff --git a/frontend/src/agents/index.tsx b/frontend/src/agents/index.tsx
index 0ceef669..c2edb34a 100644
--- a/frontend/src/agents/index.tsx
+++ b/frontend/src/agents/index.tsx
@@ -138,6 +138,7 @@ function AgentsList() {
               <AgentCard
                 key={agent.id}
                 agent={agent}
+                agents={userAgents}
                 setUserAgents={setUserAgents}
               />
             ))
@@ -160,9 +161,11 @@ function AgentsList() {
 
 function AgentCard({
   agent,
+  agents,
   setUserAgents,
 }: {
   agent: Agent;
+  agents: Agent[];
   setUserAgents: React.Dispatch<React.SetStateAction<Agent[]>>;
 }) {
   const navigate = useNavigate();
@@ -225,6 +228,7 @@ function AgentCard({
     setUserAgents((prevAgents) =>
       prevAgents.filter((prevAgent) => prevAgent.id !== data.id),
     );
+    dispatch(setAgents(agents.filter((prevAgent) => prevAgent.id !== data.id)));
   };
   return (
     <div
diff --git a/frontend/src/assets/monitoring-purple.svg b/frontend/src/assets/monitoring-purple.svg
new file mode 100644
index 00000000..ef849521
--- /dev/null
+++ b/frontend/src/assets/monitoring-purple.svg
@@ -0,0 +1,3 @@
+<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M3.50195 17.1494C3.68072 17.1503 3.83797 17.2119 3.96289 17.3359C4.08842 17.4607 4.15029 17.6188 4.15039 17.7988V18.5869C4.15039 18.7666 4.08775 18.9248 3.96191 19.0498C3.83677 19.1741 3.68038 19.2364 3.50098 19.2373H3.5C3.36498 19.2373 3.24235 19.2024 3.13672 19.1318L3.03613 19.0488C2.91173 18.9234 2.84961 18.7657 2.84961 18.5869V17.7988C2.84971 17.6193 2.91265 17.4614 3.03809 17.3359C3.16377 17.2104 3.32208 17.1486 3.50195 17.1494ZM7.50195 12.6494C7.68072 12.6503 7.83797 12.7119 7.96289 12.8359C8.08842 12.9607 8.15029 13.1188 8.15039 13.2988V18.5869C8.15039 18.7666 8.08775 18.9248 7.96191 19.0498C7.83656 19.1743 7.67907 19.2364 7.5 19.2373H7.49902C7.31988 19.2373 7.16245 19.175 7.03711 19.0498C6.91161 18.9243 6.84884 18.7663 6.84961 18.5869V13.2988C6.84971 13.1193 6.91265 12.9614 7.03809 12.8359C7.16377 12.7104 7.32208 12.6486 7.50195 12.6494ZM11.502 14.6494C11.6807 14.6503 11.838 14.7119 11.9629 14.8359C12.0884 14.9607 12.1503 15.1188 12.1504 15.2988V18.5869C12.1504 18.7666 12.0878 18.9248 11.9619 19.0498C11.8366 19.1743 11.6791 19.2364 11.5 19.2373H11.499C11.3193 19.2373 11.1611 19.1747 11.0361 19.0488C10.9117 18.9234 10.8496 18.7657 10.8496 18.5869V15.2988C10.8497 15.1193 10.9127 14.9614 11.0381 14.8359C11.1638 14.7104 11.3221 14.6486 11.502 14.6494ZM15.502 13.1494C15.6807 13.1503 15.838 13.2119 15.9629 13.3359C16.0884 13.4607 16.1503 13.6188 16.1504 13.7988V18.5869C16.1504 18.7666 16.0878 18.9248 15.9619 19.0498C15.8366 19.1743 15.6791 19.2364 15.5 19.2373H15.499C15.3199 19.2373 15.1625 19.175 15.0371 19.0498C14.9116 18.9243 14.8488 18.7663 14.8496 18.5869V13.7988C14.8497 13.6193 14.9127 13.4614 15.0381 13.3359C15.1638 13.2104 15.3221 13.1486 15.502 13.1494ZM19.502 9.14941C19.6807 9.15031 19.838 9.2119 19.9629 9.33594C20.0884 9.46066 20.1503 9.61875 20.1504 9.79883V18.5869C20.1504 18.7666 20.0878 18.9248 19.9619 19.0498C19.8366 19.1743 19.6791 19.2364 19.5 19.2373H19.499C19.3199 19.2373 19.1625 19.175 19.0371 19.0498C18.9116 18.9243 18.8488 18.7663 18.8496 18.5869V9.79883C18.8497 9.61927 18.9127 9.46137 19.0381 9.33594C19.1638 9.21036 19.3221 9.14857 19.502 9.14941ZM19.499 3.35156C19.6838 3.34567 19.8427 3.41642 19.9678 3.55469H19.9688C20.0596 3.64961 20.1156 3.761 20.1357 3.88477L20.1436 4.0127C20.1385 4.18363 20.079 4.33405 19.9609 4.45312H19.96L14.7422 9.6709C14.5779 9.83222 14.3846 9.95688 14.1641 10.0439C13.9451 10.1304 13.7235 10.1738 13.5 10.1738C13.3327 10.1738 13.1678 10.1497 13.0059 10.1006L12.8457 10.043C12.6325 9.95623 12.4382 9.83371 12.2637 9.67578L12.2578 9.6709L12.3643 9.56445L12.2578 9.66992L9.83594 7.24902C9.75169 7.16486 9.64381 7.12012 9.5 7.12012C9.39199 7.12012 9.30408 7.14514 9.23145 7.19336L9.16406 7.24902L3.95996 12.4531C3.83505 12.578 3.67971 12.6445 3.50195 12.6504H3.50098C3.31619 12.6556 3.1575 12.583 3.0332 12.4443V12.4453C2.91072 12.3194 2.85137 12.1633 2.85645 11.9873L2.87012 11.8623C2.89478 11.7425 2.95119 11.6357 3.04004 11.5469L8.25879 6.32812L8.39453 6.20703C8.53435 6.09371 8.68597 6.00535 8.84961 5.94434L9.00977 5.89258C9.17067 5.84784 9.33418 5.8252 9.5 5.8252C9.72114 5.8252 9.94159 5.86551 10.1602 5.94434C10.3843 6.02524 10.5789 6.15388 10.7422 6.3291L13.1641 8.75L13.2314 8.80566C13.3041 8.854 13.3919 8.87891 13.5 8.87891C13.6439 8.87891 13.7517 8.83427 13.8359 8.75L19.04 3.54688C19.165 3.42139 19.321 3.35565 19.499 3.35059V3.35156Z" fill="#7D54D1" stroke="#7D54D1" stroke-width="0.3"/>
+</svg>
diff --git a/frontend/src/assets/monitoring-white.svg b/frontend/src/assets/monitoring-white.svg
new file mode 100644
index 00000000..b015eeee
--- /dev/null
+++ b/frontend/src/assets/monitoring-white.svg
@@ -0,0 +1,3 @@
+<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M3.50195 17.1494C3.68072 17.1503 3.83797 17.2119 3.96289 17.3359C4.08842 17.4607 4.15029 17.6188 4.15039 17.7988V18.5869C4.15039 18.7666 4.08775 18.9248 3.96191 19.0498C3.83677 19.1741 3.68038 19.2364 3.50098 19.2373H3.5C3.36498 19.2373 3.24235 19.2024 3.13672 19.1318L3.03613 19.0488C2.91173 18.9234 2.84961 18.7657 2.84961 18.5869V17.7988C2.84971 17.6193 2.91265 17.4614 3.03809 17.3359C3.16377 17.2104 3.32208 17.1486 3.50195 17.1494ZM7.50195 12.6494C7.68072 12.6503 7.83797 12.7119 7.96289 12.8359C8.08842 12.9607 8.15029 13.1188 8.15039 13.2988V18.5869C8.15039 18.7666 8.08775 18.9248 7.96191 19.0498C7.83656 19.1743 7.67907 19.2364 7.5 19.2373H7.49902C7.31988 19.2373 7.16245 19.175 7.03711 19.0498C6.91161 18.9243 6.84884 18.7663 6.84961 18.5869V13.2988C6.84971 13.1193 6.91265 12.9614 7.03809 12.8359C7.16377 12.7104 7.32208 12.6486 7.50195 12.6494ZM11.502 14.6494C11.6807 14.6503 11.838 14.7119 11.9629 14.8359C12.0884 14.9607 12.1503 15.1188 12.1504 15.2988V18.5869C12.1504 18.7666 12.0878 18.9248 11.9619 19.0498C11.8366 19.1743 11.6791 19.2364 11.5 19.2373H11.499C11.3193 19.2373 11.1611 19.1747 11.0361 19.0488C10.9117 18.9234 10.8496 18.7657 10.8496 18.5869V15.2988C10.8497 15.1193 10.9127 14.9614 11.0381 14.8359C11.1638 14.7104 11.3221 14.6486 11.502 14.6494ZM15.502 13.1494C15.6807 13.1503 15.838 13.2119 15.9629 13.3359C16.0884 13.4607 16.1503 13.6188 16.1504 13.7988V18.5869C16.1504 18.7666 16.0878 18.9248 15.9619 19.0498C15.8366 19.1743 15.6791 19.2364 15.5 19.2373H15.499C15.3199 19.2373 15.1625 19.175 15.0371 19.0498C14.9116 18.9243 14.8488 18.7663 14.8496 18.5869V13.7988C14.8497 13.6193 14.9127 13.4614 15.0381 13.3359C15.1638 13.2104 15.3221 13.1486 15.502 13.1494ZM19.502 9.14941C19.6807 9.15031 19.838 9.2119 19.9629 9.33594C20.0884 9.46066 20.1503 9.61875 20.1504 9.79883V18.5869C20.1504 18.7666 20.0878 18.9248 19.9619 19.0498C19.8366 19.1743 19.6791 19.2364 19.5 19.2373H19.499C19.3199 19.2373 19.1625 19.175 19.0371 19.0498C18.9116 18.9243 18.8488 18.7663 18.8496 18.5869V9.79883C18.8497 9.61927 18.9127 9.46137 19.0381 9.33594C19.1638 9.21036 19.3221 9.14857 19.502 9.14941ZM19.499 3.35156C19.6838 3.34567 19.8427 3.41642 19.9678 3.55469H19.9688C20.0596 3.64961 20.1156 3.761 20.1357 3.88477L20.1436 4.0127C20.1385 4.18363 20.079 4.33405 19.9609 4.45312H19.96L14.7422 9.6709C14.5779 9.83222 14.3846 9.95688 14.1641 10.0439C13.9451 10.1304 13.7235 10.1738 13.5 10.1738C13.3327 10.1738 13.1678 10.1497 13.0059 10.1006L12.8457 10.043C12.6325 9.95623 12.4382 9.83371 12.2637 9.67578L12.2578 9.6709L12.3643 9.56445L12.2578 9.66992L9.83594 7.24902C9.75169 7.16486 9.64381 7.12012 9.5 7.12012C9.39199 7.12012 9.30408 7.14514 9.23145 7.19336L9.16406 7.24902L3.95996 12.4531C3.83505 12.578 3.67971 12.6445 3.50195 12.6504H3.50098C3.31619 12.6556 3.1575 12.583 3.0332 12.4443V12.4453C2.91072 12.3194 2.85137 12.1633 2.85645 11.9873L2.87012 11.8623C2.89478 11.7425 2.95119 11.6357 3.04004 11.5469L8.25879 6.32812L8.39453 6.20703C8.53435 6.09371 8.68597 6.00535 8.84961 5.94434L9.00977 5.89258C9.17067 5.84784 9.33418 5.8252 9.5 5.8252C9.72114 5.8252 9.94159 5.86551 10.1602 5.94434C10.3843 6.02524 10.5789 6.15388 10.7422 6.3291L13.1641 8.75L13.2314 8.80566C13.3041 8.854 13.3919 8.87891 13.5 8.87891C13.6439 8.87891 13.7517 8.83427 13.8359 8.75L19.04 3.54688C19.165 3.42139 19.321 3.35565 19.499 3.35059V3.35156Z" fill="#FFFFFF" stroke="#FFFFFF" stroke-width="0.3"/>
+</svg>
diff --git a/frontend/src/components/CopyButton.tsx b/frontend/src/components/CopyButton.tsx
index c430603f..0afbbe82 100644
--- a/frontend/src/components/CopyButton.tsx
+++ b/frontend/src/components/CopyButton.tsx
@@ -1,58 +1,136 @@
+import clsx from 'clsx';
 import copy from 'copy-to-clipboard';
-import { useState } from 'react';
+import { useCallback, useEffect, useRef, useState } from 'react';
 import { useTranslation } from 'react-i18next';
 
 import CheckMark from '../assets/checkmark.svg?react';
-import Copy from '../assets/copy.svg?react';
+import CopyIcon from '../assets/copy.svg?react';
+
+type CopyButtonProps = {
+  textToCopy: string;
+  bgColorLight?: string;
+  bgColorDark?: string;
+  hoverBgColorLight?: string;
+  hoverBgColorDark?: string;
+  iconSize?: string;
+  padding?: string;
+  showText?: boolean;
+  copiedDuration?: number;
+  className?: string;
+  iconWrapperClassName?: string;
+  textClassName?: string;
+};
+
+const DEFAULT_ICON_SIZE = 'w-4 h-4';
+const DEFAULT_PADDING = 'p-2';
+const DEFAULT_COPIED_DURATION = 2000;
+const DEFAULT_BG_LIGHT = '#FFFFFF';
+const DEFAULT_BG_DARK = 'transparent';
+const DEFAULT_HOVER_BG_LIGHT = '#EEEEEE';
+const DEFAULT_HOVER_BG_DARK = '#4A4A4A';
 
 export default function CopyButton({
-  text,
-  colorLight,
-  colorDark,
+  textToCopy,
+  bgColorLight = DEFAULT_BG_LIGHT,
+  bgColorDark = DEFAULT_BG_DARK,
+  hoverBgColorLight = DEFAULT_HOVER_BG_LIGHT,
+  hoverBgColorDark = DEFAULT_HOVER_BG_DARK,
+  iconSize = DEFAULT_ICON_SIZE,
+  padding = DEFAULT_PADDING,
   showText = false,
-}: {
-  text: string;
-  colorLight?: string;
-  colorDark?: string;
-  showText?: boolean;
-}) {
+  copiedDuration = DEFAULT_COPIED_DURATION,
+  className,
+  iconWrapperClassName,
+  textClassName,
+}: CopyButtonProps) {
   const { t } = useTranslation();
-  const [copied, setCopied] = useState(false);
-  const [isCopyHovered, setIsCopyHovered] = useState(false);
+  const [isCopied, setIsCopied] = useState(false);
+  const timeoutIdRef = useRef<number | null>(null);
 
-  const handleCopyClick = (text: string) => {
-    copy(text);
-    setCopied(true);
-    setTimeout(() => {
-      setCopied(false);
-    }, 3000);
-  };
+  const iconWrapperClasses = clsx(
+    'flex items-center justify-center rounded-full transition-colors duration-150 ease-in-out',
+    padding,
+    `bg-[${bgColorLight}] dark:bg-[${bgColorDark}]`,
+    `hover:bg-[${hoverBgColorLight}] dark:hover:bg-[${hoverBgColorDark}]`,
+    {
+      'bg-green-100 dark:bg-green-900 hover:bg-green-100 dark:hover:bg-green-900':
+        isCopied,
+    },
+    iconWrapperClassName,
+  );
 
+  const rootButtonClasses = clsx(
+    'flex items-center gap-2 group',
+    'focus:outline-none focus-visible:ring-2 focus-visible:ring-offset-2 focus-visible:ring-blue-500 rounded-full',
+    className,
+  );
+
+  const textSpanClasses = clsx(
+    'text-xs text-gray-600 dark:text-gray-400 transition-opacity duration-150 ease-in-out',
+    { 'opacity-75': isCopied },
+    textClassName,
+  );
+
+  const IconComponent = isCopied ? CheckMark : CopyIcon;
+  const iconClasses = clsx(iconSize, {
+    'stroke-green-600 dark:stroke-green-400': isCopied,
+    'fill-none text-gray-700 dark:text-gray-300': !isCopied,
+  });
+
+  const buttonTitle = isCopied
+    ? t('conversation.copied')
+    : t('conversation.copy');
+  const displayedText = isCopied
+    ? t('conversation.copied')
+    : t('conversation.copy');
+
+  const handleCopy = useCallback(() => {
+    if (isCopied) return;
+
+    try {
+      const success = copy(textToCopy);
+      if (success) {
+        setIsCopied(true);
+
+        if (timeoutIdRef.current) {
+          clearTimeout(timeoutIdRef.current);
+        }
+
+        timeoutIdRef.current = setTimeout(() => {
+          setIsCopied(false);
+          timeoutIdRef.current = null;
+        }, copiedDuration);
+      } else {
+        console.warn('Copy command failed.');
+      }
+    } catch (error) {
+      console.error('Failed to copy text:', error);
+    }
+  }, [textToCopy, copiedDuration, isCopied]);
+
+  useEffect(() => {
+    return () => {
+      if (timeoutIdRef.current) {
+        clearTimeout(timeoutIdRef.current);
+      }
+    };
+  }, []);
   return (
     <button
-      onClick={() => handleCopyClick(text)}
-      onMouseEnter={() => setIsCopyHovered(true)}
-      onMouseLeave={() => setIsCopyHovered(false)}
-      className="flex items-center gap-2"
+      type="button"
+      onClick={handleCopy}
+      className={rootButtonClasses}
+      title={buttonTitle}
+      aria-label={buttonTitle}
+      disabled={isCopied}
     >
-      <div
-        className={`flex items-center justify-center rounded-full p-2 ${
-          isCopyHovered
-            ? `bg-[#EEEEEE] dark:bg-purple-taupe`
-            : `bg-[${colorLight ? colorLight : '#FFFFFF'}] dark:bg-[${colorDark ? colorDark : 'transparent'}]`
-        }`}
-      >
-        {copied ? (
-          <CheckMark className="cursor-pointer stroke-green-2000" />
-        ) : (
-          <Copy className="w-4 cursor-pointer fill-none" />
-        )}
+      <div className={iconWrapperClasses}>
+        <IconComponent className={iconClasses} aria-hidden="true" />
       </div>
-      {showText && (
-        <span className="text-xs text-gray-600 dark:text-gray-400">
-          {copied ? t('conversation.copied') : t('conversation.copy')}
-        </span>
-      )}
+      {showText && <span className={textSpanClasses}>{displayedText}</span>}
+      <span className="sr-only" aria-live="polite" aria-atomic="true">
+        {isCopied ? t('conversation.copied', 'Copied to clipboard') : ''}
+      </span>
     </button>
   );
 }
diff --git a/frontend/src/conversation/ConversationBubble.tsx b/frontend/src/conversation/ConversationBubble.tsx
index a241b2d3..a7c8467d 100644
--- a/frontend/src/conversation/ConversationBubble.tsx
+++ b/frontend/src/conversation/ConversationBubble.tsx
@@ -5,10 +5,7 @@ import { useTranslation } from 'react-i18next';
 import ReactMarkdown from 'react-markdown';
 import { useSelector } from 'react-redux';
 import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
-import {
-  oneLight,
-  vscDarkPlus,
-} from 'react-syntax-highlighter/dist/cjs/styles/prism';
+import { oneLight, vscDarkPlus } from 'react-syntax-highlighter/dist/cjs/styles/prism';
 import rehypeKatex from 'rehype-katex';
 import remarkGfm from 'remark-gfm';
 import remarkMath from 'remark-math';
@@ -29,10 +26,7 @@ import CopyButton from '../components/CopyButton';
 import Sidebar from '../components/Sidebar';
 import SpeakButton from '../components/TextToSpeechButton';
 import { useDarkTheme, useOutsideAlerter } from '../hooks';
-import {
-  selectChunks,
-  selectSelectedDocs,
-} from '../preferences/preferenceSlice';
+import { selectChunks, selectSelectedDocs } from '../preferences/preferenceSlice';
 import classes from './ConversationBubble.module.css';
 import { FEEDBACK, MESSAGE_TYPE } from './conversationModels';
 import { ToolCallsType } from './types';
@@ -377,7 +371,7 @@ const ConversationBubble = forwardRef<
                             {language}
                           </span>
                           <CopyButton
-                            text={String(children).replace(/\n$/, '')}
+                            textToCopy={String(children).replace(/\n$/, '')}
                           />
                         </div>
                         <SyntaxHighlighter
@@ -462,7 +456,7 @@ const ConversationBubble = forwardRef<
             ${type !== 'ERROR' ? 'group-hover:lg:visible' : 'hidden'}`}
             >
               <div>
-                <CopyButton text={message} />
+                <CopyButton textToCopy={message} />
               </div>
             </div>
             <div
@@ -671,7 +665,7 @@ function ToolCalls({ toolCalls }: { toolCalls: ToolCallsType[] }) {
                         Arguments
                       </span>{' '}
                       <CopyButton
-                        text={JSON.stringify(toolCall.arguments, null, 2)}
+                        textToCopy={JSON.stringify(toolCall.arguments, null, 2)}
                       />
                     </p>
                     <p className="p-2 font-mono text-sm dark:tex dark:bg-[#222327] rounded-b-2xl break-words">
@@ -689,7 +683,7 @@ function ToolCalls({ toolCalls }: { toolCalls: ToolCallsType[] }) {
                         Response
                       </span>{' '}
                       <CopyButton
-                        text={JSON.stringify(toolCall.result, null, 2)}
+                        textToCopy={JSON.stringify(toolCall.result, null, 2)}
                       />
                     </p>
                     <p className="p-2 font-mono text-sm dark:tex dark:bg-[#222327] rounded-b-2xl break-words">
@@ -766,7 +760,7 @@ function Thought({
                           {language}
                         </span>
                         <CopyButton
-                          text={String(children).replace(/\n$/, '')}
+                          textToCopy={String(children).replace(/\n$/, '')}
                         />
                       </div>
                       <SyntaxHighlighter
diff --git a/frontend/src/modals/AgentDetailsModal.tsx b/frontend/src/modals/AgentDetailsModal.tsx
index c1a8c131..c10a837b 100644
--- a/frontend/src/modals/AgentDetailsModal.tsx
+++ b/frontend/src/modals/AgentDetailsModal.tsx
@@ -2,11 +2,12 @@ import { useState } from 'react';
 import { useSelector } from 'react-redux';
 
 import { Agent } from '../agents/types';
-import { ActiveState } from '../models/misc';
-import WrapperModal from './WrapperModal';
 import userService from '../api/services/userService';
-import { selectToken } from '../preferences/preferenceSlice';
+import CopyButton from '../components/CopyButton';
 import Spinner from '../components/Spinner';
+import { ActiveState } from '../models/misc';
+import { selectToken } from '../preferences/preferenceSlice';
+import WrapperModal from './WrapperModal';
 
 type AgentDetailsModalProps = {
   agent: Agent;
@@ -87,24 +88,32 @@ export default function AgentDetailsModal({
             )}
           </div>
           <div className="flex flex-col gap-3">
-            <h2 className="text-base font-semibold text-jet dark:text-bright-gray">
-              Webhooks
-            </h2>
+            <div className="flex items-center gap-2">
+              <h2 className="text-base font-semibold text-jet dark:text-bright-gray">
+                Webhook URL
+              </h2>
+              {webhookUrl && (
+                <div className="mb-1">
+                  <CopyButton textToCopy={webhookUrl} padding="p-1" />
+                </div>
+              )}
+            </div>
             {webhookUrl ? (
-              <div className="flex flex-wrap items-center gap-2">
-                <span className="font-mono text-sm text-gray-700 dark:text-[#ECECF1]">
+              <div className="flex flex-col flex-wrap items-start gap-2">
+                <p className="f break-all font-mono text-sm text-gray-700 dark:text-[#ECECF1]">
                   {webhookUrl}
-                </span>
-                <button className="hover:bg-vi</button>olets-are-blue w-28 rounded-3xl border border-solid border-violets-are-blue px-5 py-2 text-sm font-medium text-violets-are-blue transition-colors hover:bg-violets-are-blue hover:text-white">
-                  Copy
-                </button>
+                </p>
               </div>
             ) : (
               <button
-                className="hover:bg-vi</button>olets-are-blue w-28 rounded-3xl border border-solid border-violets-are-blue px-5 py-2 text-sm font-medium text-violets-are-blue transition-colors hover:bg-violets-are-blue hover:text-white"
+                className="hover:bg-vi</button>olets-are-blue flex w-28 items-center justify-center rounded-3xl border border-solid border-violets-are-blue px-5 py-2 text-sm font-medium text-violets-are-blue transition-colors hover:bg-violets-are-blue hover:text-white"
                 onClick={handleGenerateWebhook}
               >
-                {loadingStates.webhook ? <Spinner /> : 'Generate'}
+                {loadingStates.webhook ? (
+                  <Spinner size="small" color="#976af3" />
+                ) : (
+                  'Generate'
+                )}
               </button>
             )}
           </div>
diff --git a/frontend/src/settings/Analytics.tsx b/frontend/src/settings/Analytics.tsx
index 04bec5c2..535200ef 100644
--- a/frontend/src/settings/Analytics.tsx
+++ b/frontend/src/settings/Analytics.tsx
@@ -1,11 +1,5 @@
 import {
-  BarElement,
-  CategoryScale,
-  Chart as ChartJS,
-  Legend,
-  LinearScale,
-  Title,
-  Tooltip,
+    BarElement, CategoryScale, Chart as ChartJS, Legend, LinearScale, Title, Tooltip
 } from 'chart.js';
 import { useEffect, useState } from 'react';
 import { Bar } from 'react-chartjs-2';
@@ -71,7 +65,6 @@ export default function Analytics({ agentId }: AnalyticsProps) {
     string,
     { positive: number; negative: number }
   > | null>(null);
-  const [agent, setAgent] = useState<Agent>();
   const [messagesFilter, setMessagesFilter] = useState<{
     label: string;
     value: string;
@@ -97,21 +90,6 @@ export default function Analytics({ agentId }: AnalyticsProps) {
   const [loadingMessages, setLoadingMessages] = useLoaderState(true);
   const [loadingTokens, setLoadingTokens] = useLoaderState(true);
   const [loadingFeedback, setLoadingFeedback] = useLoaderState(true);
-  const [loadingAgent, setLoadingAgent] = useLoaderState(true);
-
-  const fetchAgent = async (agentId: string) => {
-    setLoadingAgent(true);
-    try {
-      const response = await userService.getAgent(agentId ?? '', token);
-      if (!response.ok) throw new Error('Failed to fetch Chatbots');
-      const agent = await response.json();
-      setAgent(agent);
-    } catch (error) {
-      console.error(error);
-    } finally {
-      setLoadingAgent(false);
-    }
-  };
 
   const fetchMessagesData = async (agent_id?: string, filter?: string) => {
     setLoadingMessages(true);
@@ -174,27 +152,22 @@ export default function Analytics({ agentId }: AnalyticsProps) {
   };
 
   useEffect(() => {
-    if (agentId) fetchAgent(agentId);
-  }, []);
-
-  useEffect(() => {
-    const id = agent?.id;
+    const id = agentId;
     const filter = messagesFilter;
     fetchMessagesData(id, filter?.value);
-  }, [agent, messagesFilter]);
+  }, [agentId, messagesFilter]);
 
   useEffect(() => {
-    const id = agent?.id;
+    const id = agentId;
     const filter = tokenUsageFilter;
     fetchTokenData(id, filter?.value);
-  }, [agent, tokenUsageFilter]);
+  }, [agentId, tokenUsageFilter]);
 
   useEffect(() => {
-    const id = agent?.id;
+    const id = agentId;
     const filter = feedbackFilter;
     fetchFeedbackData(id, filter?.value);
-  }, [agent, feedbackFilter]);
-
+  }, [agentId, feedbackFilter]);
   return (
     <div className="mt-12">
       {/* Messages Analytics */}
diff --git a/frontend/src/settings/Logs.tsx b/frontend/src/settings/Logs.tsx
index a14f5966..50d67b54 100644
--- a/frontend/src/settings/Logs.tsx
+++ b/frontend/src/settings/Logs.tsx
@@ -181,8 +181,7 @@ function Log({
         </p>
         <div className="my-px w-fit">
           <CopyButton
-            text={JSON.stringify(filteredLog)}
-            colorLight="transparent"
+            textToCopy={JSON.stringify(filteredLog)}
             showText={true}
           />
         </div>

From cc67d4a1e2034df2fb196d2b2d3e07beed8c5224 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 28 Apr 2025 17:49:29 +0100
Subject: [PATCH 27/39] process all request data implicitly

---
 application/worker.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/application/worker.py b/application/worker.py
index e7ac85a9..bebd88a6 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -547,9 +547,7 @@ def agent_webhook_worker(self, agent_id, payload):
         agent_config = agents_collection.find_one({"_id": agent_oid})
         if not agent_config:
             raise ValueError(f"Agent with ID {agent_id} not found.")
-        input_data = payload.get("query", "")
-        if input_data is None or not isinstance(input_data, str):
-            input_data = json.dumps(payload)
+        input_data = json.dumps(payload)
     except Exception as e:
         logging.error(f"Error processing agent webhook: {e}", exc_info=True)
         return {"status": "error", "error": str(e)}

From 22c7015c695f2053492b2bc43c9c595bccd01411 Mon Sep 17 00:00:00 2001
From: Siddhant Rai <siddhant.rai.5686@gmail.com>
Date: Tue, 29 Apr 2025 00:29:16 +0530
Subject: [PATCH 28/39] refactor: webhook listener handle both POST and GET
 requests

---
 application/api/user/routes.py | 192 +++++++++++++++++++++++----------
 1 file changed, 134 insertions(+), 58 deletions(-)

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 528a4c29..f8e40b24 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -2,9 +2,10 @@ import datetime
 import json
 import math
 import os
+import secrets
 import shutil
 import uuid
-import secrets
+from functools import wraps
 
 from bson.binary import Binary, UuidRepresentation
 from bson.dbref import DBRef
@@ -18,8 +19,8 @@ from application.agents.tools.tool_manager import ToolManager
 from application.api.user.tasks import (
     ingest,
     ingest_remote,
-    store_attachment,
     process_agent_webhook,
+    store_attachment,
 )
 from application.core.mongo_db import MongoDB
 from application.core.settings import settings
@@ -419,13 +420,14 @@ class UploadFile(Resource):
 
         user = secure_filename(decoded_token.get("sub"))
         job_name = secure_filename(request.form["name"])
-        
+
         try:
             from application.storage.storage_creator import StorageCreator
+
             storage = StorageCreator.get_storage()
-            
+
             base_path = f"{settings.UPLOAD_FOLDER}/{user}/{job_name}"
-            
+
             if len(files) > 1:
                 temp_files = []
                 for file in files:
@@ -434,41 +436,56 @@ class UploadFile(Resource):
                     storage.save_file(file, temp_path)
                     temp_files.append(temp_path)
                     print(f"Saved file: {filename}")
-                
+
                 zip_filename = f"{job_name}.zip"
                 zip_path = f"{base_path}/{zip_filename}"
-                
+
                 def create_zip_archive(temp_paths, **kwargs):
                     import tempfile
+
                     with tempfile.TemporaryDirectory() as temp_dir:
                         for path in temp_paths:
                             file_data = storage.get_file(path)
-                            with open(os.path.join(temp_dir, os.path.basename(path)), 'wb') as f:
+                            with open(
+                                os.path.join(temp_dir, os.path.basename(path)), "wb"
+                            ) as f:
                                 f.write(file_data.read())
-                        
+
                         # Create zip archive
                         zip_temp = shutil.make_archive(
                             base_name=os.path.join(temp_dir, job_name),
                             format="zip",
-                            root_dir=temp_dir
+                            root_dir=temp_dir,
                         )
-                        
+
                         return zip_temp
-                
+
                 zip_temp_path = create_zip_archive(temp_files)
-                with open(zip_temp_path, 'rb') as zip_file:
+                with open(zip_temp_path, "rb") as zip_file:
                     storage.save_file(zip_file, zip_path)
-                
+
                 # Clean up temp files
                 for temp_path in temp_files:
                     storage.delete_file(temp_path)
-                
+
                 task = ingest.delay(
                     settings.UPLOAD_FOLDER,
                     [
-                        ".rst", ".md", ".pdf", ".txt", ".docx", ".csv", ".epub",
-                        ".html", ".mdx", ".json", ".xlsx", ".pptx", ".png",
-                        ".jpg", ".jpeg",
+                        ".rst",
+                        ".md",
+                        ".pdf",
+                        ".txt",
+                        ".docx",
+                        ".csv",
+                        ".epub",
+                        ".html",
+                        ".mdx",
+                        ".json",
+                        ".xlsx",
+                        ".pptx",
+                        ".png",
+                        ".jpg",
+                        ".jpeg",
                     ],
                     job_name,
                     zip_filename,
@@ -479,15 +496,27 @@ class UploadFile(Resource):
                 file = files[0]
                 filename = secure_filename(file.filename)
                 file_path = f"{base_path}/{filename}"
-                
+
                 storage.save_file(file, file_path)
-                
+
                 task = ingest.delay(
                     settings.UPLOAD_FOLDER,
                     [
-                        ".rst", ".md", ".pdf", ".txt", ".docx", ".csv", ".epub",
-                        ".html", ".mdx", ".json", ".xlsx", ".pptx", ".png",
-                        ".jpg", ".jpeg",
+                        ".rst",
+                        ".md",
+                        ".pdf",
+                        ".txt",
+                        ".docx",
+                        ".csv",
+                        ".epub",
+                        ".html",
+                        ".mdx",
+                        ".json",
+                        ".xlsx",
+                        ".pptx",
+                        ".png",
+                        ".jpg",
+                        ".jpeg",
                     ],
                     job_name,
                     filename,
@@ -497,7 +526,7 @@ class UploadFile(Resource):
         except Exception as err:
             current_app.logger.error(f"Error uploading file: {err}")
             return make_response(jsonify({"success": False}), 400)
-            
+
         return make_response(jsonify({"success": True, "task_id": task.id}), 200)
 
 
@@ -1386,39 +1415,88 @@ class AgentWebhook(Resource):
         )
 
 
-@user_ns.route(f"/api/webhooks/agents/<string:webhook_token>")
-class AgentWebhookListener(Resource):
-    @api.doc(description="Webhook listener for agent events")
-    def post(self, webhook_token):
+def require_agent(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        webhook_token = kwargs.get("webhook_token")
+        if not webhook_token:
+            return make_response(
+                jsonify({"success": False, "message": "Webhook token missing"}), 400
+            )
+
         agent = agents_collection.find_one(
             {"incoming_webhook_token": webhook_token}, {"_id": 1}
         )
         if not agent:
+            current_app.logger.warning(
+                f"Webhook attempt with invalid token: {webhook_token}"
+            )
             return make_response(
                 jsonify({"success": False, "message": "Agent not found"}), 404
             )
-        data = request.get_json()
-        if not data:
-            return make_response(
-                jsonify({"success": False, "message": "No data provided"}), 400
+
+        kwargs["agent"] = agent
+        kwargs["agent_id_str"] = str(agent["_id"])
+        return func(*args, **kwargs)
+
+    return wrapper
+
+
+@user_ns.route(f"/api/webhooks/agents/<string:webhook_token>")
+class AgentWebhookListener(Resource):
+    method_decorators = [require_agent]
+
+    def _enqueue_webhook_task(self, agent_id_str, payload, source_method):
+        if not payload:
+            current_app.logger.warning(
+                f"Webhook ({source_method}) received for agent {agent_id_str} with empty payload."
             )
 
-        agent_id_str = str(agent["_id"])
         current_app.logger.info(
-            f"Incoming webhook received for agent {agent_id_str}. Enqueuing task."
+            f"Incoming {source_method} webhook for agent {agent_id_str}. Enqueuing task with payload: {payload}"
         )
 
         try:
             task = process_agent_webhook.delay(
                 agent_id=agent_id_str,
-                payload=data,
+                payload=payload,
             )
+            current_app.logger.info(
+                f"Task {task.id} enqueued for agent {agent_id_str} ({source_method})."
+            )
+            return make_response(jsonify({"success": True, "task_id": task.id}), 200)
         except Exception as err:
-            current_app.logger.error(f"Error processing webhook: {err}")
-            return make_response(
-                jsonify({"success": False, "message": "Error processing webhook"}), 400
+            current_app.logger.error(
+                f"Error enqueuing webhook task ({source_method}) for agent {agent_id_str}: {err}",
+                exc_info=True,
             )
-        return make_response(jsonify({"success": True, "task_id": task.id}), 200)
+            return make_response(
+                jsonify({"success": False, "message": "Error processing webhook"}), 500
+            )
+
+    @api.doc(
+        description="Webhook listener for agent events (POST). Expects JSON payload, which is used to trigger processing.",
+    )
+    def post(self, webhook_token, agent, agent_id_str):
+        payload = request.get_json()
+        if payload is None:
+            return make_response(
+                jsonify(
+                    {
+                        "success": False,
+                        "message": "Invalid or missing JSON data in request body",
+                    }
+                ),
+                400,
+            )
+        return self._enqueue_webhook_task(agent_id_str, payload, source_method="POST")
+
+    @api.doc(
+        description="Webhook listener for agent events (GET). Uses URL query parameters as payload to trigger processing.",
+    )
+    def get(self, webhook_token, agent, agent_id_str):
+        payload = request.args.to_dict(flat=True)
+        return self._enqueue_webhook_task(agent_id_str, payload, source_method="GET")
 
 
 @user_ns.route("/api/share")
@@ -2872,9 +2950,9 @@ class StoreAttachment(Resource):
         decoded_token = request.decoded_token
         if not decoded_token:
             return make_response(jsonify({"success": False}), 401)
-        
+
         file = request.files.get("file")
-        
+
         if not file or file.filename == "":
             return make_response(
                 jsonify({"status": "error", "message": "Missing file"}),
@@ -2882,35 +2960,33 @@ class StoreAttachment(Resource):
             )
 
         user = secure_filename(decoded_token.get("sub"))
-        
+
         try:
             attachment_id = ObjectId()
             original_filename = secure_filename(file.filename)
             relative_path = f"{settings.UPLOAD_FOLDER}/{user}/attachments/{str(attachment_id)}/{original_filename}"
-            
+
             file_content = file.read()
-            
+
             file_info = {
                 "filename": original_filename,
                 "attachment_id": str(attachment_id),
                 "path": relative_path,
-                "file_content": file_content
+                "file_content": file_content,
             }
-            
-            task = store_attachment.delay(
-                file_info,
-                user
-            )
-            
+
+            task = store_attachment.delay(file_info, user)
+
             return make_response(
-                jsonify({
-                    "success": True,
-                    "task_id": task.id,
-                    "message": "File uploaded successfully. Processing started."
-                }),
-                200
+                jsonify(
+                    {
+                        "success": True,
+                        "task_id": task.id,
+                        "message": "File uploaded successfully. Processing started.",
+                    }
+                ),
+                200,
             )
         except Exception as err:
             current_app.logger.error(f"Error storing attachment: {err}")
             return make_response(jsonify({"success": False, "error": str(err)}), 400)
-

From 330276cdf7363f4b9b0c2e0edb7dfa0b52cf73a7 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 28 Apr 2025 22:32:13 +0100
Subject: [PATCH 29/39] fix: lint for ruff

---
 application/api/user/routes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index f8e40b24..d96d6202 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -1442,7 +1442,7 @@ def require_agent(func):
     return wrapper
 
 
-@user_ns.route(f"/api/webhooks/agents/<string:webhook_token>")
+@user_ns.route("/api/webhooks/agents/<string:webhook_token>")
 class AgentWebhookListener(Resource):
     method_decorators = [require_agent]
 

From 4b2faae29aa1a619a371c66f09d062e9bedcf92e Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Thu, 1 May 2025 17:15:08 +0300
Subject: [PATCH 30/39] Update README.md

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 598cfa1d..bc4113ee 100644
--- a/README.md
+++ b/README.md
@@ -49,10 +49,10 @@
 - [x] Manually updating chunks in the app UI (Feb 2025)
 - [x] Devcontainer for easy development (Feb 2025)
 - [x] ReACT agent (March 2025)
-- [ ] Chatbots menu re-design to handle tools, agent types, and more (April 2025)
-- [ ] New input box in the conversation menu (April 2025)
-- [ ] Anthropic Tool compatibility (April 2025)
-- [ ] Add triggerable actions / tools (webhook) (April 2025)
+- [x] Chatbots menu re-design to handle tools, agent types, and more (April 2025)
+- [x] New input box in the conversation menu (April 2025)
+- [x] Add triggerable actions / tools (webhook) (April 2025)
+- [ ] Anthropic Tool compatibility (May 2025)
 - [ ] Add OAuth 2.0 authentication for tools and sources
 - [ ] Agent scheduling
 

From ae700e8f3ab9fc2655b8d0cd660b94d6ba0f7f18 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Sun, 4 May 2025 18:56:33 +0100
Subject: [PATCH 31/39] fix: display only 2 demos buttons on mobile

---
 frontend/src/Hero.tsx | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/frontend/src/Hero.tsx b/frontend/src/Hero.tsx
index 0161eac2..583f8a1b 100644
--- a/frontend/src/Hero.tsx
+++ b/frontend/src/Hero.tsx
@@ -38,9 +38,12 @@ export default function Hero({
                 <button
                   key={key}
                   onClick={() => handleQuestion({ question: demo.query })}
-                  className="w-full rounded-[66px] border bg-transparent px-6 py-[14px] text-left transition-colors
+                  className={`
+                    w-full rounded-[66px] border bg-transparent px-6 py-[14px] text-left transition-colors
                     border-dark-gray text-just-black hover:bg-cultured
-                    dark:border-dim-gray dark:text-chinese-white dark:hover:bg-charleston-green"
+                    dark:border-dim-gray dark:text-chinese-white dark:hover:bg-charleston-green
+                    ${key >= 2 ? 'hidden md:block' : ''} // Show only 2 buttons on mobile
+                  `}
                 >
                   <p className="mb-2 font-semibold text-black-1000 dark:text-bright-gray">
                     {demo.header}

From cf333873fd8a210789385767a4f12a63a6c96d9f Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 5 May 2025 00:08:56 +0100
Subject: [PATCH 32/39] fix: json body

---
 application/agents/tools/api_tool.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/application/agents/tools/api_tool.py b/application/agents/tools/api_tool.py
index 092c3569..063313c4 100644
--- a/application/agents/tools/api_tool.py
+++ b/application/agents/tools/api_tool.py
@@ -25,8 +25,8 @@ class APITool(Tool):
     def _make_api_call(self, url, method, headers, query_params, body):
         if query_params:
             url = f"{url}?{requests.compat.urlencode(query_params)}"
-        if isinstance(body, dict):
-            body = json.dumps(body)
+        # if isinstance(body, dict):
+        #     body = json.dumps(body)
         try:
             print(f"Making API call: {method} {url} with body: {body}")
             if body == "{}":

From 481df4d6047194ecadac267fa5965c9174d4691a Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 5 May 2025 13:12:39 +0100
Subject: [PATCH 33/39] fix: enhance error logging with exception info across
 multiple modules

---
 application/agents/llm_handler.py           | 3 +++
 application/api/answer/routes.py            | 7 +++----
 application/api/user/routes.py              | 8 ++++----
 application/cache.py                        | 8 ++++----
 application/llm/google_ai.py                | 4 ++--
 application/llm/openai.py                   | 6 +++---
 application/logging.py                      | 2 +-
 application/parser/embedding_pipeline.py    | 4 ++--
 application/parser/remote/crawler_loader.py | 5 +++--
 application/parser/remote/sitemap_loader.py | 3 ++-
 application/parser/remote/web_loader.py     | 5 +++--
 application/retriever/classic_rag.py        | 3 ++-
 application/vectorstore/mongodb.py          | 5 +++--
 application/worker.py                       | 4 ++--
 14 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/application/agents/llm_handler.py b/application/agents/llm_handler.py
index bf39f625..9e37408c 100644
--- a/application/agents/llm_handler.py
+++ b/application/agents/llm_handler.py
@@ -1,5 +1,6 @@
 import json
 import logging
+import logging
 from abc import ABC, abstractmethod
 
 from application.logging import build_stack_data
@@ -137,6 +138,7 @@ class OpenAILLMHandler(LLMHandler):
 
                         messages = self.prepare_messages_with_attachments(agent, messages, attachments)
                     except Exception as e:
+                        logging.error(f"Error executing tool: {str(e)}", exc_info=True)
                         messages.append(
                             {
                                 "role": "tool",
@@ -229,6 +231,7 @@ class OpenAILLMHandler(LLMHandler):
                                     )
 
                                 except Exception as e:
+                                    logging.error(f"Error executing tool: {str(e)}", exc_info=True)
                                     messages.append(
                                         {
                                             "role": "assistant",
diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index 2a8476d8..abc1f9ba 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -105,7 +105,7 @@ def get_agent_key(agent_id, user_id):
         raise Exception("Unauthorized access to the agent", 403)
 
     except Exception as e:
-        logger.error(f"Error in get_agent_key: {str(e)}")
+        logger.error(f"Error in get_agent_key: {str(e)}", exc_info=True)
         raise
 
 
@@ -351,8 +351,7 @@ def complete_stream(
         data = json.dumps({"type": "end"})
         yield f"data: {data}\n\n"
     except Exception as e:
-        logger.error(f"Error in stream: {str(e)}")
-        logger.error(traceback.format_exc())
+        logger.error(f"Error in stream: {str(e)}", exc_info=True)
         data = json.dumps(
             {
                 "type": "error",
@@ -882,6 +881,6 @@ def get_attachments_content(attachment_ids, user):
             if attachment_doc:
                 attachments.append(attachment_doc)
         except Exception as e:
-            logger.error(f"Error retrieving attachment {attachment_id}: {e}")
+            logger.error(f"Error retrieving attachment {attachment_id}: {e}", exc_info=True)
 
     return attachments
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index d96d6202..43e54177 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -2755,7 +2755,7 @@ class GetChunks(Resource):
             )
 
         except Exception as e:
-            current_app.logger.error(f"Error getting chunks: {e}")
+            current_app.logger.error(f"Error getting chunks: {e}", exc_info=True)
             return make_response(jsonify({"success": False}), 500)
 
 
@@ -2809,7 +2809,7 @@ class AddChunk(Resource):
                 201,
             )
         except Exception as e:
-            current_app.logger.error(f"Error adding chunk: {e}")
+            current_app.logger.error(f"Error adding chunk: {e}", exc_info=True)
             return make_response(jsonify({"success": False}), 500)
 
 
@@ -2849,7 +2849,7 @@ class DeleteChunk(Resource):
                     404,
                 )
         except Exception as e:
-            current_app.logger.error(f"Error deleting chunk: {e}")
+            current_app.logger.error(f"Error deleting chunk: {e}", exc_info=True)
             return make_response(jsonify({"success": False}), 500)
 
 
@@ -2931,7 +2931,7 @@ class UpdateChunk(Resource):
                 200,
             )
         except Exception as e:
-            current_app.logger.error(f"Error updating chunk: {e}")
+            current_app.logger.error(f"Error updating chunk: {e}", exc_info=True)
             return make_response(jsonify({"success": False}), 500)
 
 
diff --git a/application/cache.py b/application/cache.py
index 117b444a..3fdb6b8d 100644
--- a/application/cache.py
+++ b/application/cache.py
@@ -61,14 +61,14 @@ def gen_cache(func):
                 if cached_response:
                     return cached_response.decode("utf-8")
             except Exception as e:
-                logger.error(f"Error getting cached response: {e}")
+                logger.error(f"Error getting cached response: {e}", exc_info=True)
 
         result = func(self, model, messages, stream, tools, *args, **kwargs)
         if redis_client and isinstance(result, str):
             try:
                 redis_client.set(cache_key, result, ex=1800)
             except Exception as e:
-                logger.error(f"Error setting cache: {e}")
+                logger.error(f"Error setting cache: {e}", exc_info=True)
 
         return result
 
@@ -100,7 +100,7 @@ def stream_cache(func):
                         time.sleep(0.03)  # Simulate streaming delay
                     return
             except Exception as e:
-                logger.error(f"Error getting cached stream: {e}")
+                logger.error(f"Error getting cached stream: {e}", exc_info=True)
 
         stream_cache_data = []
         for chunk in func(self, model, messages, stream, tools, *args, **kwargs):
@@ -112,6 +112,6 @@ def stream_cache(func):
                 redis_client.set(cache_key, json.dumps(stream_cache_data), ex=1800)
                 logger.info(f"Stream cache saved for key: {cache_key}")
             except Exception as e:
-                logger.error(f"Error setting stream cache: {e}")
+                logger.error(f"Error setting stream cache: {e}", exc_info=True)
 
     return wrapper
diff --git a/application/llm/google_ai.py b/application/llm/google_ai.py
index a56616d2..b749431b 100644
--- a/application/llm/google_ai.py
+++ b/application/llm/google_ai.py
@@ -78,7 +78,7 @@ class GoogleLLM(BaseLLM):
                     logging.info(f"GoogleLLM: Successfully uploaded file, got URI: {file_uri}")
                     files.append({"file_uri": file_uri, "mime_type": mime_type})
                 except Exception as e:
-                    logging.error(f"GoogleLLM: Error uploading file: {e}")
+                    logging.error(f"GoogleLLM: Error uploading file: {e}", exc_info=True)
                     if 'content' in attachment:
                         prepared_messages[user_message_index]["content"].append({
                             "type": "text",
@@ -131,7 +131,7 @@ class GoogleLLM(BaseLLM):
 
             return file_uri
         except Exception as e:
-            logging.error(f"Error uploading file to Google AI: {e}")
+            logging.error(f"Error uploading file to Google AI: {e}", exc_info=True)
             raise
 
     def _clean_messages_google(self, messages):
diff --git a/application/llm/openai.py b/application/llm/openai.py
index 248fd7e2..c918768d 100644
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -213,7 +213,7 @@ class OpenAILLM(BaseLLM):
                         }
                     })
                 except Exception as e:
-                    logging.error(f"Error processing image attachment: {e}")
+                    logging.error(f"Error processing image attachment: {e}", exc_info=True)
                     if 'content' in attachment:
                         prepared_messages[user_message_index]["content"].append({
                             "type": "text",
@@ -228,7 +228,7 @@ class OpenAILLM(BaseLLM):
                         "file": {"file_id": file_id}
                     })
                 except Exception as e:
-                    logging.error(f"Error uploading PDF to OpenAI: {e}")
+                    logging.error(f"Error uploading PDF to OpenAI: {e}", exc_info=True)
                     if 'content' in attachment:
                         prepared_messages[user_message_index]["content"].append({
                             "type": "text",
@@ -301,7 +301,7 @@ class OpenAILLM(BaseLLM):
 
             return file_id
         except Exception as e:
-            logging.error(f"Error uploading file to OpenAI: {e}")
+            logging.error(f"Error uploading file to OpenAI: {e}", exc_info=True)
             raise
 
 
diff --git a/application/logging.py b/application/logging.py
index b447ffa8..d48fb17e 100644
--- a/application/logging.py
+++ b/application/logging.py
@@ -151,4 +151,4 @@ def _log_to_mongodb(
         logging.debug(f"Logged activity to MongoDB: {activity_id}")
 
     except Exception as e:
-        logging.error(f"Failed to log to MongoDB: {e}")
+        logging.error(f"Failed to log to MongoDB: {e}", exc_info=True)
diff --git a/application/parser/embedding_pipeline.py b/application/parser/embedding_pipeline.py
index 0435cd14..87d9a8d5 100755
--- a/application/parser/embedding_pipeline.py
+++ b/application/parser/embedding_pipeline.py
@@ -19,7 +19,7 @@ def add_text_to_store_with_retry(store, doc, source_id):
         doc.metadata["source_id"] = str(source_id)
         store.add_texts([doc.page_content], metadatas=[doc.metadata])
     except Exception as e:
-        logging.error(f"Failed to add document with retry: {e}")
+        logging.error(f"Failed to add document with retry: {e}", exc_info=True)
         raise
 
 
@@ -75,7 +75,7 @@ def embed_and_store_documents(docs, folder_name, source_id, task_status):
             # Add document to vector store
             add_text_to_store_with_retry(store, doc, source_id)
         except Exception as e:
-            logging.error(f"Error embedding document {idx}: {e}")
+            logging.error(f"Error embedding document {idx}: {e}", exc_info=True)
             logging.info(f"Saving progress at document {idx} out of {total_docs}")
             store.save_local(folder_name)
             break
diff --git a/application/parser/remote/crawler_loader.py b/application/parser/remote/crawler_loader.py
index c2da230b..2ff6cf6f 100644
--- a/application/parser/remote/crawler_loader.py
+++ b/application/parser/remote/crawler_loader.py
@@ -1,3 +1,4 @@
+import logging
 import requests
 from urllib.parse import urlparse, urljoin
 from bs4 import BeautifulSoup
@@ -42,7 +43,7 @@ class CrawlerLoader(BaseRemote):
                         )
                     )
             except Exception as e:
-                print(f"Error processing URL {current_url}: {e}")
+                logging.error(f"Error processing URL {current_url}: {e}", exc_info=True)
                 continue
 
             # Parse the HTML content to extract all links
@@ -61,4 +62,4 @@ class CrawlerLoader(BaseRemote):
             if self.limit is not None and len(visited_urls) >= self.limit:
                 break
 
-        return loaded_content
\ No newline at end of file
+        return loaded_content
diff --git a/application/parser/remote/sitemap_loader.py b/application/parser/remote/sitemap_loader.py
index 8066f4f6..6d54ea9b 100644
--- a/application/parser/remote/sitemap_loader.py
+++ b/application/parser/remote/sitemap_loader.py
@@ -1,3 +1,4 @@
+import logging
 import requests
 import re  # Import regular expression library
 import xml.etree.ElementTree as ET
@@ -32,7 +33,7 @@ class SitemapLoader(BaseRemote):
                 documents.extend(loader.load())
                 processed_urls += 1  # Increment the counter after processing each URL
             except Exception as e:
-                print(f"Error processing URL {url}: {e}")
+                logging.error(f"Error processing URL {url}: {e}", exc_info=True)
                 continue
 
         return documents
diff --git a/application/parser/remote/web_loader.py b/application/parser/remote/web_loader.py
index cc1cdcb8..77cb145b 100644
--- a/application/parser/remote/web_loader.py
+++ b/application/parser/remote/web_loader.py
@@ -1,3 +1,4 @@
+import logging
 from application.parser.remote.base import BaseRemote
 from application.parser.schema.base import Document
 from langchain_community.document_loaders import WebBaseLoader
@@ -39,6 +40,6 @@ class WebLoader(BaseRemote):
                         )
                     )
             except Exception as e:
-                print(f"Error processing URL {url}: {e}")
+                logging.error(f"Error processing URL {url}: {e}", exc_info=True)
                 continue
-        return documents
\ No newline at end of file
+        return documents
diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py
index 08771337..b8ac69e4 100644
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -1,3 +1,4 @@
+import logging
 from application.core.settings import settings
 from application.llm.llm_creator import LLMCreator
 from application.retriever.base import BaseRetriever
@@ -72,7 +73,7 @@ class ClassicRAG(BaseRetriever):
             print(f"Rephrased query: {rephrased_query}")
             return rephrased_query if rephrased_query else self.original_question
         except Exception as e:
-            print(f"Error rephrasing query: {e}")
+            logging.error(f"Error rephrasing query: {e}", exc_info=True)
             return self.original_question
 
     def _get_data(self):
diff --git a/application/vectorstore/mongodb.py b/application/vectorstore/mongodb.py
index 94b757e0..aadd4652 100644
--- a/application/vectorstore/mongodb.py
+++ b/application/vectorstore/mongodb.py
@@ -1,3 +1,4 @@
+import logging
 from application.core.settings import settings
 from application.vectorstore.base import BaseVectorStore
 from application.vectorstore.document_class import Document
@@ -146,7 +147,7 @@ class MongoDBVectorStore(BaseVectorStore):
 
             return chunks
         except Exception as e:
-            print(f"Error getting chunks: {e}")
+            logging.error(f"Error getting chunks: {e}", exc_info=True)
             return []
 
     def add_chunk(self, text, metadata=None):
@@ -172,5 +173,5 @@ class MongoDBVectorStore(BaseVectorStore):
             result = self._collection.delete_one({"_id": object_id})
             return result.deleted_count > 0
         except Exception as e:
-            print(f"Error deleting chunk: {e}")
+            logging.error(f"Error deleting chunk: {e}", exc_info=True)
             return False
diff --git a/application/worker.py b/application/worker.py
index bebd88a6..619993c9 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -75,7 +75,7 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
             zip_ref.extractall(extract_to)
         os.remove(zip_path)  # Remove the zip file after extracting
     except Exception as e:
-        logging.error(f"Error extracting zip file {zip_path}: {e}")
+        logging.error(f"Error extracting zip file {zip_path}: {e}", exc_info=True)
         return
 
     # Check for nested zip files and extract them
@@ -403,7 +403,7 @@ def sync(
             doc_id,
         )
     except Exception as e:
-        logging.error(f"Error during sync: {e}")
+        logging.error(f"Error during sync: {e}", exc_info=True)
         return {"status": "error", "error": str(e)}
     return {"status": "success"}
 

From 2147b3f06f8a81739a1d3cdb7b0f5a85e3633398 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 5 May 2025 13:14:56 +0100
Subject: [PATCH 34/39] lint: mini fix

---
 application/agents/llm_handler.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/application/agents/llm_handler.py b/application/agents/llm_handler.py
index 9e37408c..1b995f71 100644
--- a/application/agents/llm_handler.py
+++ b/application/agents/llm_handler.py
@@ -1,6 +1,5 @@
 import json
 import logging
-import logging
 from abc import ABC, abstractmethod
 
 from application.logging import build_stack_data

From 5ecfb0ce6d58a1d7cd1252b59ef17211dcf9ba85 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 5 May 2025 17:59:37 +0100
Subject: [PATCH 35/39] fix: enhance error logging

---
 application/api/user/routes.py | 94 +++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 43e54177..30399f06 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -110,7 +110,7 @@ class DeleteConversation(Resource):
                 {"_id": ObjectId(conversation_id), "user": decoded_token["sub"]}
             )
         except Exception as err:
-            current_app.logger.error(f"Error deleting conversation: {err}")
+            current_app.logger.error(f"Error deleting conversation: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
         return make_response(jsonify({"success": True}), 200)
 
@@ -128,7 +128,7 @@ class DeleteAllConversations(Resource):
         try:
             conversations_collection.delete_many({"user": user_id})
         except Exception as err:
-            current_app.logger.error(f"Error deleting all conversations: {err}")
+            current_app.logger.error(f"Error deleting all conversations: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
         return make_response(jsonify({"success": True}), 200)
 
@@ -166,7 +166,7 @@ class GetConversations(Resource):
                 for conversation in conversations
             ]
         except Exception as err:
-            current_app.logger.error(f"Error retrieving conversations: {err}")
+            current_app.logger.error(f"Error retrieving conversations: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
         return make_response(jsonify(list_conversations), 200)
 
@@ -194,7 +194,7 @@ class GetSingleConversation(Resource):
             if not conversation:
                 return make_response(jsonify({"status": "not found"}), 404)
         except Exception as err:
-            current_app.logger.error(f"Error retrieving conversation: {err}")
+            current_app.logger.error(f"Error retrieving conversation: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         data = {
@@ -236,7 +236,7 @@ class UpdateConversationName(Resource):
                 {"$set": {"name": data["name"]}},
             )
         except Exception as err:
-            current_app.logger.error(f"Error updating conversation name: {err}")
+            current_app.logger.error(f"Error updating conversation name: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": True}), 200)
@@ -314,7 +314,7 @@ class SubmitFeedback(Resource):
                 )
 
         except Exception as err:
-            current_app.logger.error(f"Error submitting feedback: {err}")
+            current_app.logger.error(f"Error submitting feedback: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": True}), 200)
@@ -338,7 +338,7 @@ class DeleteByIds(Resource):
             if result:
                 return make_response(jsonify({"success": True}), 200)
         except Exception as err:
-            current_app.logger.error(f"Error deleting indexes: {err}")
+            current_app.logger.error(f"Error deleting indexes: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": False}), 400)
@@ -377,7 +377,7 @@ class DeleteOldIndexes(Resource):
         except FileNotFoundError:
             pass
         except Exception as err:
-            current_app.logger.error(f"Error deleting old indexes: {err}")
+            current_app.logger.error(f"Error deleting old indexes: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         sources_collection.delete_one({"_id": ObjectId(source_id)})
@@ -524,7 +524,7 @@ class UploadFile(Resource):
                 )
 
         except Exception as err:
-            current_app.logger.error(f"Error uploading file: {err}")
+            current_app.logger.error(f"Error uploading file: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": True, "task_id": task.id}), 200)
@@ -577,7 +577,7 @@ class UploadRemote(Resource):
                 loader=data["source"],
             )
         except Exception as err:
-            current_app.logger.error(f"Error uploading remote source: {err}")
+            current_app.logger.error(f"Error uploading remote source: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": True, "task_id": task.id}), 200)
@@ -610,7 +610,7 @@ class TaskStatus(Resource):
             ):
                 task_meta = str(task_meta)  # Convert to a string representation
         except Exception as err:
-            current_app.logger.error(f"Error getting task status: {err}")
+            current_app.logger.error(f"Error getting task status: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"status": task.status, "result": task_meta}), 200)
@@ -689,7 +689,7 @@ class PaginatedSources(Resource):
             return make_response(jsonify(response), 200)
 
         except Exception as err:
-            current_app.logger.error(f"Error retrieving paginated sources: {err}")
+            current_app.logger.error(f"Error retrieving paginated sources: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
 
@@ -753,7 +753,7 @@ class CombinedJson(Resource):
                 )
 
         except Exception as err:
-            current_app.logger.error(f"Error retrieving sources: {err}")
+            current_app.logger.error(f"Error retrieving sources: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify(data), 200)
@@ -780,7 +780,7 @@ class CheckDocs(Resource):
             if os.path.exists(vectorstore) or data["docs"] == "default":
                 return {"status": "exists"}, 200
         except Exception as err:
-            current_app.logger.error(f"Error checking document: {err}")
+            current_app.logger.error(f"Error checking document: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"status": "not found"}), 404)
@@ -822,7 +822,7 @@ class CreatePrompt(Resource):
             )
             new_id = str(resp.inserted_id)
         except Exception as err:
-            current_app.logger.error(f"Error creating prompt: {err}")
+            current_app.logger.error(f"Error creating prompt: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"id": new_id}), 200)
@@ -853,7 +853,7 @@ class GetPrompts(Resource):
                     }
                 )
         except Exception as err:
-            current_app.logger.error(f"Error retrieving prompts: {err}")
+            current_app.logger.error(f"Error retrieving prompts: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify(list_prompts), 200)
@@ -901,7 +901,7 @@ class GetSinglePrompt(Resource):
                 {"_id": ObjectId(prompt_id), "user": user}
             )
         except Exception as err:
-            current_app.logger.error(f"Error retrieving prompt: {err}")
+            current_app.logger.error(f"Error retrieving prompt: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"content": prompt["content"]}), 200)
@@ -930,7 +930,7 @@ class DeletePrompt(Resource):
         try:
             prompts_collection.delete_one({"_id": ObjectId(data["id"]), "user": user})
         except Exception as err:
-            current_app.logger.error(f"Error deleting prompt: {err}")
+            current_app.logger.error(f"Error deleting prompt: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": True}), 200)
@@ -968,7 +968,7 @@ class UpdatePrompt(Resource):
                 {"$set": {"name": data["name"], "content": data["content"]}},
             )
         except Exception as err:
-            current_app.logger.error(f"Error updating prompt: {err}")
+            current_app.logger.error(f"Error updating prompt: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": True}), 200)
@@ -1015,7 +1015,7 @@ class GetAgent(Resource):
                 "key": f"{agent['key'][:4]}...{agent['key'][-4:]}",
             }
         except Exception as err:
-            current_app.logger.error(f"Error retrieving agent: {err}")
+            current_app.logger.error(f"Error retrieving agent: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify(data), 200)
@@ -1056,7 +1056,7 @@ class GetAgents(Resource):
                 if "source" in agent or "retriever" in agent
             ]
         except Exception as err:
-            current_app.logger.error(f"Error retrieving agents: {err}")
+            current_app.logger.error(f"Error retrieving agents: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
         return make_response(jsonify(list_agents), 200)
 
@@ -1145,7 +1145,7 @@ class CreateAgent(Resource):
             resp = agents_collection.insert_one(new_agent)
             new_id = str(resp.inserted_id)
         except Exception as err:
-            current_app.logger.error(f"Error creating agent: {err}")
+            current_app.logger.error(f"Error creating agent: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"id": new_id, "key": key}), 201)
@@ -1196,7 +1196,7 @@ class UpdateAgent(Resource):
             existing_agent = agents_collection.find_one({"_id": oid, "user": user})
         except Exception as err:
             return make_response(
-                current_app.logger.error(f"Error finding agent {agent_id}: {err}"),
+                current_app.logger.error(f"Error finding agent {agent_id}: {err}", exc_info=True),
                 jsonify({"success": False, "message": "Database error finding agent"}),
                 500,
             )
@@ -1319,7 +1319,7 @@ class UpdateAgent(Resource):
                 )
 
         except Exception as err:
-            current_app.logger.error(f"Error updating agent {agent_id}: {err}")
+            current_app.logger.error(f"Error updating agent {agent_id}: {err}", exc_info=True)
             return make_response(
                 jsonify({"success": False, "message": "Database error during update"}),
                 500,
@@ -1362,7 +1362,7 @@ class DeleteAgent(Resource):
             deleted_id = str(deleted_agent["_id"])
 
         except Exception as err:
-            current_app.logger.error(f"Error deleting agent: {err}")
+            current_app.logger.error(f"Error deleting agent: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"id": deleted_id}), 200)
@@ -1405,7 +1405,7 @@ class AgentWebhook(Resource):
             full_webhook_url = f"{base_url}/api/webhooks/agents/{webhook_token}"
 
         except Exception as err:
-            current_app.logger.error(f"Error generating webhook URL: {err}")
+            current_app.logger.error(f"Error generating webhook URL: {err}", exc_info=True)
             return make_response(
                 jsonify({"success": False, "message": "Error generating webhook URL"}),
                 400,
@@ -1694,7 +1694,7 @@ class ShareConversation(Resource):
                     201,
                 )
         except Exception as err:
-            current_app.logger.error(f"Error sharing conversation: {err}")
+            current_app.logger.error(f"Error sharing conversation: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
 
@@ -1750,7 +1750,7 @@ class GetPubliclySharedConversations(Resource):
                 res["api_key"] = shared["api_key"]
             return make_response(jsonify(res), 200)
         except Exception as err:
-            current_app.logger.error(f"Error getting shared conversation: {err}")
+            current_app.logger.error(f"Error getting shared conversation: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
 
@@ -1795,7 +1795,7 @@ class GetMessageAnalytics(Resource):
                 else None
             )
         except Exception as err:
-            current_app.logger.error(f"Error getting API key: {err}")
+            current_app.logger.error(f"Error getting API key: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         end_date = datetime.datetime.now(datetime.timezone.utc)
@@ -1870,7 +1870,7 @@ class GetMessageAnalytics(Resource):
                 daily_messages[entry["_id"]] = entry["count"]
 
         except Exception as err:
-            current_app.logger.error(f"Error getting message analytics: {err}")
+            current_app.logger.error(f"Error getting message analytics: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(
@@ -1919,7 +1919,7 @@ class GetTokenAnalytics(Resource):
                 else None
             )
         except Exception as err:
-            current_app.logger.error(f"Error getting API key: {err}")
+            current_app.logger.error(f"Error getting API key: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         end_date = datetime.datetime.now(datetime.timezone.utc)
@@ -2029,7 +2029,7 @@ class GetTokenAnalytics(Resource):
                     daily_token_usage[entry["_id"]["day"]] = entry["total_tokens"]
 
         except Exception as err:
-            current_app.logger.error(f"Error getting token analytics: {err}")
+            current_app.logger.error(f"Error getting token analytics: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(
@@ -2078,7 +2078,7 @@ class GetFeedbackAnalytics(Resource):
                 else None
             )
         except Exception as err:
-            current_app.logger.error(f"Error getting API key: {err}")
+            current_app.logger.error(f"Error getting API key: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         end_date = datetime.datetime.now(datetime.timezone.utc)
@@ -2194,7 +2194,7 @@ class GetFeedbackAnalytics(Resource):
                 }
 
         except Exception as err:
-            current_app.logger.error(f"Error getting feedback analytics: {err}")
+            current_app.logger.error(f"Error getting feedback analytics: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(
@@ -2241,7 +2241,7 @@ class GetUserLogs(Resource):
                 else None
             )
         except Exception as err:
-            current_app.logger.error(f"Error getting API key: {err}")
+            current_app.logger.error(f"Error getting API key: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         query = {"user": user}
@@ -2330,7 +2330,7 @@ class ManageSync(Resource):
                 update_data,
             )
         except Exception as err:
-            current_app.logger.error(f"Error updating sync frequency: {err}")
+            current_app.logger.error(f"Error updating sync frequency: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": True}), 200)
@@ -2366,7 +2366,7 @@ class TextToSpeech(Resource):
                 200,
             )
         except Exception as err:
-            current_app.logger.error(f"Error synthesizing audio: {err}")
+            current_app.logger.error(f"Error synthesizing audio: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
 
@@ -2391,7 +2391,7 @@ class AvailableTools(Resource):
                     }
                 )
         except Exception as err:
-            current_app.logger.error(f"Error getting available tools: {err}")
+            current_app.logger.error(f"Error getting available tools: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": True, "data": tools_metadata}), 200)
@@ -2413,7 +2413,7 @@ class GetTools(Resource):
                 tool.pop("_id")
                 user_tools.append(tool)
         except Exception as err:
-            current_app.logger.error(f"Error getting user tools: {err}")
+            current_app.logger.error(f"Error getting user tools: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": True, "tools": user_tools}), 200)
@@ -2489,7 +2489,7 @@ class CreateTool(Resource):
             resp = user_tools_collection.insert_one(new_tool)
             new_id = str(resp.inserted_id)
         except Exception as err:
-            current_app.logger.error(f"Error creating tool: {err}")
+            current_app.logger.error(f"Error creating tool: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"id": new_id}), 200)
@@ -2558,7 +2558,7 @@ class UpdateTool(Resource):
                 {"$set": update_data},
             )
         except Exception as err:
-            current_app.logger.error(f"Error updating tool: {err}")
+            current_app.logger.error(f"Error updating tool: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": True}), 200)
@@ -2595,7 +2595,7 @@ class UpdateToolConfig(Resource):
                 {"$set": {"config": data["config"]}},
             )
         except Exception as err:
-            current_app.logger.error(f"Error updating tool config: {err}")
+            current_app.logger.error(f"Error updating tool config: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": True}), 200)
@@ -2634,7 +2634,7 @@ class UpdateToolActions(Resource):
                 {"$set": {"actions": data["actions"]}},
             )
         except Exception as err:
-            current_app.logger.error(f"Error updating tool actions: {err}")
+            current_app.logger.error(f"Error updating tool actions: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": True}), 200)
@@ -2671,7 +2671,7 @@ class UpdateToolStatus(Resource):
                 {"$set": {"status": data["status"]}},
             )
         except Exception as err:
-            current_app.logger.error(f"Error updating tool status: {err}")
+            current_app.logger.error(f"Error updating tool status: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
 
         return make_response(jsonify({"success": True}), 200)
@@ -2704,7 +2704,7 @@ class DeleteTool(Resource):
             if result.deleted_count == 0:
                 return {"success": False, "message": "Tool not found"}, 404
         except Exception as err:
-            current_app.logger.error(f"Error deleting tool: {err}")
+            current_app.logger.error(f"Error deleting tool: {err}", exc_info=True)
             return {"success": False}, 400
 
         return {"success": True}, 200
@@ -2755,7 +2755,7 @@ class GetChunks(Resource):
             )
 
         except Exception as e:
-            current_app.logger.error(f"Error getting chunks: {e}", exc_info=True)
+            current_app.logger.error(f"Error getting chunks: {e}", exc_info=True, exc_info=True)
             return make_response(jsonify({"success": False}), 500)
 
 
@@ -2988,5 +2988,5 @@ class StoreAttachment(Resource):
                 200,
             )
         except Exception as err:
-            current_app.logger.error(f"Error storing attachment: {err}")
+            current_app.logger.error(f"Error storing attachment: {err}", exc_info=True)
             return make_response(jsonify({"success": False, "error": str(err)}), 400)

From ad5f70748694a751996968b17b015f63b5aa43bd Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 5 May 2025 18:03:45 +0100
Subject: [PATCH 36/39] lint: ruff fix

---
 application/api/user/routes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 30399f06..3b3cb21f 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -2755,7 +2755,7 @@ class GetChunks(Resource):
             )
 
         except Exception as e:
-            current_app.logger.error(f"Error getting chunks: {e}", exc_info=True, exc_info=True)
+            current_app.logger.error(f"Error getting chunks: {e}", exc_info=True)
             return make_response(jsonify({"success": False}), 500)
 
 

From 204936b2d05093b96ad2f05ab46b3b61fa18826a Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 5 May 2025 23:34:13 +0100
Subject: [PATCH 37/39] fix: sources icon mini fix

---
 frontend/src/agents/NewAgent.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/src/agents/NewAgent.tsx b/frontend/src/agents/NewAgent.tsx
index 0cccfbe6..0a601699 100644
--- a/frontend/src/agents/NewAgent.tsx
+++ b/frontend/src/agents/NewAgent.tsx
@@ -360,7 +360,7 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
                     sourceDocs?.map((doc: Doc) => ({
                       id: doc.id || doc.retriever || doc.name,
                       label: doc.name,
-                      icon: SourceIcon,
+                      icon: <img src={SourceIcon} alt="" />,
                     })) || []
                   }
                   selectedIds={selectedSourceIds}

From e56d54c3f0236c4c327e481b481ca44816729198 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Tue, 6 May 2025 10:59:25 +0100
Subject: [PATCH 38/39] fix: improve source and description handling in
 GetAgent and GetAgents responses

---
 application/api/user/routes.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 3b3cb21f..bacc8bb7 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -997,12 +997,8 @@ class GetAgent(Resource):
             data = {
                 "id": str(agent["_id"]),
                 "name": agent["name"],
-                "description": agent["description"],
-                "source": (
-                    str(db.dereference(agent["source"])["_id"])
-                    if "source" in agent and isinstance(agent["source"], DBRef)
-                    else ""
-                ),
+                "description": agent.get("description", ""),
+                "source": (str(source_doc["_id"]) if isinstance(agent.get("source"), DBRef) and (source_doc := db.dereference(agent.get("source"))) else ""),
                 "chunks": agent["chunks"],
                 "retriever": agent.get("retriever", ""),
                 "prompt_id": agent["prompt_id"],
@@ -1035,12 +1031,8 @@ class GetAgents(Resource):
                 {
                     "id": str(agent["_id"]),
                     "name": agent["name"],
-                    "description": agent["description"],
-                    "source": (
-                        str(db.dereference(agent["source"])["_id"])
-                        if "source" in agent and isinstance(agent["source"], DBRef)
-                        else ""
-                    ),
+                    "description": agent.get("description", ""),
+                    "source": (str(source_doc["_id"]) if isinstance(agent.get("source"), DBRef) and (source_doc := db.dereference(agent.get("source"))) else ""),
                     "chunks": agent["chunks"],
                     "retriever": agent.get("retriever", ""),
                     "prompt_id": agent["prompt_id"],

From 7858c48f11fee4865c641200e67425916069cdd5 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Tue, 6 May 2025 11:12:26 +0100
Subject: [PATCH 39/39] fix: zip file uploads

---
 application/api/user/routes.py | 111 ++++++++++++++++++++-------------
 1 file changed, 66 insertions(+), 45 deletions(-)

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index bacc8bb7..e10082d3 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -439,59 +439,80 @@ class UploadFile(Resource):
 
                 zip_filename = f"{job_name}.zip"
                 zip_path = f"{base_path}/{zip_filename}"
+                zip_temp_path = None
 
-                def create_zip_archive(temp_paths, **kwargs):
+                def create_zip_archive(temp_paths, job_name, storage):
                     import tempfile
 
-                    with tempfile.TemporaryDirectory() as temp_dir:
+                    with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as temp_zip_file:
+                        zip_output_path = temp_zip_file.name
+
+                    with tempfile.TemporaryDirectory() as stage_dir:
                         for path in temp_paths:
-                            file_data = storage.get_file(path)
-                            with open(
-                                os.path.join(temp_dir, os.path.basename(path)), "wb"
-                            ) as f:
-                                f.write(file_data.read())
+                            try:
+                                file_data = storage.get_file(path)
+                                with open(os.path.join(stage_dir, os.path.basename(path)), "wb") as f:
+                                    f.write(file_data.read())
+                            except Exception as e:
+                                current_app.logger.error(f"Error processing file {path} for zipping: {e}", exc_info=True)
+                                if os.path.exists(zip_output_path):
+                                    os.remove(zip_output_path)
+                                raise
+                        try:
+                            shutil.make_archive(
+                                base_name=zip_output_path.replace(".zip", ""),
+                                format="zip",
+                                root_dir=stage_dir,
+                            )
+                        except Exception as e:
+                            current_app.logger.error(f"Error creating zip archive: {e}", exc_info=True)
+                            if os.path.exists(zip_output_path):
+                                os.remove(zip_output_path)
+                            raise
 
-                        # Create zip archive
-                        zip_temp = shutil.make_archive(
-                            base_name=os.path.join(temp_dir, job_name),
-                            format="zip",
-                            root_dir=temp_dir,
-                        )
+                    return zip_output_path
 
-                        return zip_temp
+                try:
+                    zip_temp_path = create_zip_archive(temp_files, job_name, storage)
+                    with open(zip_temp_path, "rb") as zip_file:
+                        storage.save_file(zip_file, zip_path)
 
-                zip_temp_path = create_zip_archive(temp_files)
-                with open(zip_temp_path, "rb") as zip_file:
-                    storage.save_file(zip_file, zip_path)
+                    task = ingest.delay(
+                        settings.UPLOAD_FOLDER,
+                        [
+                            ".rst",
+                            ".md",
+                            ".pdf",
+                            ".txt",
+                            ".docx",
+                            ".csv",
+                            ".epub",
+                            ".html",
+                            ".mdx",
+                            ".json",
+                            ".xlsx",
+                            ".pptx",
+                            ".png",
+                            ".jpg",
+                            ".jpeg",
+                        ],
+                        job_name,
+                        zip_filename,
+                        user,
+                    )
+                finally:
+                    # Clean up temporary files
+                    for temp_path in temp_files:
+                        try:
+                            storage.delete_file(temp_path)
+                        except Exception as e:
+                            current_app.logger.error(f"Error deleting temporary file {temp_path}: {e}", exc_info=True)
 
-                # Clean up temp files
-                for temp_path in temp_files:
-                    storage.delete_file(temp_path)
+                    # Clean up the zip file if it was created
+                    if zip_temp_path and os.path.exists(zip_temp_path):
+                        os.remove(zip_temp_path)
 
-                task = ingest.delay(
-                    settings.UPLOAD_FOLDER,
-                    [
-                        ".rst",
-                        ".md",
-                        ".pdf",
-                        ".txt",
-                        ".docx",
-                        ".csv",
-                        ".epub",
-                        ".html",
-                        ".mdx",
-                        ".json",
-                        ".xlsx",
-                        ".pptx",
-                        ".png",
-                        ".jpg",
-                        ".jpeg",
-                    ],
-                    job_name,
-                    zip_filename,
-                    user,
-                )
-            else:
+            else: # Keep this else block for single file upload
                 # For single file
                 file = files[0]
                 filename = secure_filename(file.filename)
@@ -519,7 +540,7 @@ class UploadFile(Resource):
                         ".jpeg",
                     ],
                     job_name,
-                    filename,
+                    filename, # Corrected variable for single-file case
                     user,
                 )