Merge branch 'main' of https://github.com/arc53/DocsGPT

2026-01-20 05:50:58 +00:00 · 2025-09-16 14:59:18 +05:30
parent f74e2c9da1 e26ad3c475
commit ec0c4c3b84
42 changed files with 2463 additions and 414 deletions
--- a/application/api/answer/services/stream_processor.py
+++ b/application/api/answer/services/stream_processor.py
@@ -69,11 +69,8 @@ class StreamProcessor:
            self.decoded_token.get("sub") if self.decoded_token is not None else None
        )
        self.conversation_id = self.data.get("conversation_id")
-        self.source = (
-            {"active_docs": self.data["active_docs"]}
-            if "active_docs" in self.data
-            else {}
-        )
+        self.source = {}
+        self.all_sources = []
        self.attachments = []
        self.history = []
        self.agent_config = {}
@@ -85,6 +82,8 @@ class StreamProcessor:

    def initialize(self):
        """Initialize all required components for processing"""
+        self._configure_agent()
+        self._configure_source()
        self._configure_retriever()
        self._configure_agent()
        self._load_conversation_history()
@@ -171,13 +170,77 @@ class StreamProcessor:
        source = data.get("source")
        if isinstance(source, DBRef):
            source_doc = self.db.dereference(source)
-            data["source"] = str(source_doc["_id"])
-            data["retriever"] = source_doc.get("retriever", data.get("retriever"))
-            data["chunks"] = source_doc.get("chunks", data.get("chunks"))
+            if source_doc:
+                data["source"] = str(source_doc["_id"])
+                data["retriever"] = source_doc.get("retriever", data.get("retriever"))
+                data["chunks"] = source_doc.get("chunks", data.get("chunks"))
+            else:
+                data["source"] = None
+        elif source == "default":
+            data["source"] = "default"
        else:
            data["source"] = None
+        # Handle multiple sources
+
+        sources = data.get("sources", [])
+        if sources and isinstance(sources, list):
+            sources_list = []
+            for i, source_ref in enumerate(sources):
+                if source_ref == "default":
+                    processed_source = {
+                        "id": "default",
+                        "retriever": "classic",
+                        "chunks": data.get("chunks", "2"),
+                    }
+                    sources_list.append(processed_source)
+                elif isinstance(source_ref, DBRef):
+                    source_doc = self.db.dereference(source_ref)
+                    if source_doc:
+                        processed_source = {
+                            "id": str(source_doc["_id"]),
+                            "retriever": source_doc.get("retriever", "classic"),
+                            "chunks": source_doc.get("chunks", data.get("chunks", "2")),
+                        }
+                        sources_list.append(processed_source)
+            data["sources"] = sources_list
+        else:
+            data["sources"] = []
        return data

+    def _configure_source(self):
+        """Configure the source based on agent data"""
+        api_key = self.data.get("api_key") or self.agent_key
+
+        if api_key:
+            agent_data = self._get_data_from_api_key(api_key)
+
+            if agent_data.get("sources") and len(agent_data["sources"]) > 0:
+                source_ids = [
+                    source["id"] for source in agent_data["sources"] if source.get("id")
+                ]
+                if source_ids:
+                    self.source = {"active_docs": source_ids}
+                else:
+                    self.source = {}
+                self.all_sources = agent_data["sources"]
+            elif agent_data.get("source"):
+                self.source = {"active_docs": agent_data["source"]}
+                self.all_sources = [
+                    {
+                        "id": agent_data["source"],
+                        "retriever": agent_data.get("retriever", "classic"),
+                    }
+                ]
+            else:
+                self.source = {}
+                self.all_sources = []
+            return
+        if "active_docs" in self.data:
+            self.source = {"active_docs": self.data["active_docs"]}
+            return
+        self.source = {}
+        self.all_sources = []
+
    def _configure_agent(self):
        """Configure the agent based on request data"""
        agent_id = self.data.get("agent_id")
@@ -203,7 +266,13 @@ class StreamProcessor:
            if data_key.get("retriever"):
                self.retriever_config["retriever_name"] = data_key["retriever"]
            if data_key.get("chunks") is not None:
-                self.retriever_config["chunks"] = data_key["chunks"]
+                try:
+                    self.retriever_config["chunks"] = int(data_key["chunks"])
+                except (ValueError, TypeError):
+                    logger.warning(
+                        f"Invalid chunks value: {data_key['chunks']}, using default value 2"
+                    )
+                    self.retriever_config["chunks"] = 2
        elif self.agent_key:
            data_key = self._get_data_from_api_key(self.agent_key)
            self.agent_config.update(
@@ -224,7 +293,13 @@ class StreamProcessor:
            if data_key.get("retriever"):
                self.retriever_config["retriever_name"] = data_key["retriever"]
            if data_key.get("chunks") is not None:
-                self.retriever_config["chunks"] = data_key["chunks"]
+                try:
+                    self.retriever_config["chunks"] = int(data_key["chunks"])
+                except (ValueError, TypeError):
+                    logger.warning(
+                        f"Invalid chunks value: {data_key['chunks']}, using default value 2"
+                    )
+                    self.retriever_config["chunks"] = 2
        else:
            self.agent_config.update(
                {
@@ -243,7 +318,8 @@ class StreamProcessor:
            "token_limit": self.data.get("token_limit", settings.DEFAULT_MAX_HISTORY),
        }

-        if "isNoneDoc" in self.data and self.data["isNoneDoc"]:
+        api_key = self.data.get("api_key") or self.agent_key
+        if not api_key and "isNoneDoc" in self.data and self.data["isNoneDoc"]:
            self.retriever_config["chunks"] = 0

    def create_agent(self):
--- a/application/api/connector/routes.py
+++ b/application/api/connector/routes.py
@@ -1,6 +1,5 @@
 import datetime
 import json
-import logging


 from bson.objectid import ObjectId
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py