From 6f47aa802b245a1bf0955234d7b0791f8939f59d Mon Sep 17 00:00:00 2001
From: Ankit Matth <ankitmatth111@gmail.com>
Date: Sat, 16 Aug 2025 15:19:19 +0530
Subject: [PATCH 1/6] added support for multi select sources

---
 application/retriever/classic_rag.py          | 62 +++++++++++--------
 frontend/src/components/MessageInput.tsx      | 10 +--
 frontend/src/components/SourcesPopup.tsx      | 23 ++++---
 .../src/conversation/conversationHandlers.ts  | 46 ++++++++++----
 frontend/src/hooks/useDefaultDocument.ts      |  4 +-
 frontend/src/preferences/preferenceApi.ts     | 33 +++++-----
 frontend/src/preferences/preferenceSlice.ts   |  6 +-
 7 files changed, 115 insertions(+), 69 deletions(-)

diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py
index 9416b4f7..a9e3dee7 100644
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -20,7 +20,7 @@ class ClassicRAG(BaseRetriever):
         api_key=settings.API_KEY,
         decoded_token=None,
     ):
-        self.original_question = ""
+        self.original_question = source.get("question", "")
         self.chat_history = chat_history if chat_history is not None else []
         self.prompt = prompt
         self.chunks = chunks
@@ -44,7 +44,18 @@ class ClassicRAG(BaseRetriever):
             user_api_key=self.user_api_key,
             decoded_token=decoded_token,
         )
-        self.vectorstore = source["active_docs"] if "active_docs" in source else None
+        if "active_docs" in source:
+            if isinstance(source["active_docs"], list):
+                self.vectorstores = source["active_docs"]
+            elif isinstance(source["active_docs"], str) and "," in source["active_docs"]:
+                # ✅ split multiple IDs from comma string
+                self.vectorstores = [doc_id.strip() for doc_id in source["active_docs"].split(",") if doc_id.strip()]
+            else:
+                self.vectorstores = [source["active_docs"]]
+        else:
+            self.vectorstores = []
+
+        self.vectorstore = None
         self.question = self._rephrase_query()
         self.decoded_token = decoded_token
 
@@ -79,29 +90,30 @@ class ClassicRAG(BaseRetriever):
             return self.original_question
 
     def _get_data(self):
-        if self.chunks == 0 or self.vectorstore is None:
-            docs = []
-        else:
-            docsearch = VectorCreator.create_vectorstore(
-                settings.VECTOR_STORE, self.vectorstore, settings.EMBEDDINGS_KEY
-            )
-            docs_temp = docsearch.search(self.question, k=self.chunks)
-            docs = [
-                {
-                    "title": i.metadata.get(
-                        "title", i.metadata.get("post_title", i.page_content)
-                    ).split("/")[-1],
-                    "text": i.page_content,
-                    "source": (
-                        i.metadata.get("source")
-                        if i.metadata.get("source")
-                        else "local"
-                    ),
-                }
-                for i in docs_temp
-            ]
+        if self.chunks == 0 or not self.vectorstores:
+            return []
 
-        return docs
+        all_docs = []
+        chunks_per_source = max(1, self.chunks // len(self.vectorstores))
+
+        for vectorstore in self.vectorstores:
+            if vectorstore:
+                try:
+                    docsearch = VectorCreator.create_vectorstore(
+                        settings.VECTOR_STORE, vectorstore, settings.EMBEDDINGS_KEY
+                    )
+                    docs_temp = docsearch.search(self.question, k=chunks_per_source)
+                    for i in docs_temp:
+                        all_docs.append({
+                            "title": i.metadata.get("title", i.metadata.get("post_title", i.page_content)).split("/")[-1],
+                            "text": i.page_content,
+                            "source": i.metadata.get("source") or vectorstore,
+                        })
+                except Exception as e:
+                    logging.error(f"Error searching vectorstore {vectorstore}: {e}")
+                    continue
+
+        return all_docs
 
     def gen():
         pass
@@ -116,7 +128,7 @@ class ClassicRAG(BaseRetriever):
         return {
             "question": self.original_question,
             "rephrased_question": self.question,
-            "source": self.vectorstore,
+            "sources": self.vectorstores,
             "chunks": self.chunks,
             "token_limit": self.token_limit,
             "gpt_model": self.gpt_model,
diff --git a/frontend/src/components/MessageInput.tsx b/frontend/src/components/MessageInput.tsx
index d9bcea3e..6ae5678e 100644
--- a/frontend/src/components/MessageInput.tsx
+++ b/frontend/src/components/MessageInput.tsx
@@ -368,8 +368,8 @@ export default function MessageInput({
                 className="xs:px-3 xs:py-1.5 dark:border-purple-taupe flex max-w-[130px] items-center rounded-[32px] border border-[#AAAAAA] px-2 py-1 transition-colors hover:bg-gray-100 sm:max-w-[150px] dark:hover:bg-[#2C2E3C]"
                 onClick={() => setIsSourcesPopupOpen(!isSourcesPopupOpen)}
                 title={
-                  selectedDocs
-                    ? selectedDocs.name
+                  selectedDocs && selectedDocs.length > 0
+                    ? selectedDocs.map(doc => doc.name).join(', ')
                     : t('conversation.sources.title')
                 }
               >
@@ -379,8 +379,10 @@ export default function MessageInput({
                   className="mr-1 h-3.5 w-3.5 shrink-0 sm:mr-1.5 sm:h-4"
                 />
                 <span className="xs:text-[12px] dark:text-bright-gray truncate overflow-hidden text-[10px] font-medium text-[#5D5D5D] sm:text-[14px]">
-                  {selectedDocs
-                    ? selectedDocs.name
+                  {selectedDocs && selectedDocs.length > 0
+                    ? selectedDocs.length === 1 
+                      ? selectedDocs[0].name
+                      : `${selectedDocs.length} sources selected`
                     : t('conversation.sources.title')}
                 </span>
                 {!isTouch && (
diff --git a/frontend/src/components/SourcesPopup.tsx b/frontend/src/components/SourcesPopup.tsx
index 906f75cd..c3a61e01 100644
--- a/frontend/src/components/SourcesPopup.tsx
+++ b/frontend/src/components/SourcesPopup.tsx
@@ -149,9 +149,10 @@ export default function SourcesPopup({
                 if (option.model === embeddingsName) {
                   const isSelected =
                     selectedDocs &&
-                    (option.id
-                      ? selectedDocs.id === option.id
-                      : selectedDocs.date === option.date);
+                    Array.isArray(selectedDocs) && selectedDocs.length > 0 &&
+                    selectedDocs.some(doc => 
+                      option.id ? doc.id === option.id : doc.date === option.date
+                    );
 
                   return (
                     <div
@@ -159,11 +160,19 @@ export default function SourcesPopup({
                       className="border-opacity-80 dark:border-dim-gray flex cursor-pointer items-center border-b border-[#D9D9D9] p-3 transition-colors hover:bg-gray-100 dark:text-[14px] dark:hover:bg-[#2C2E3C]"
                       onClick={() => {
                         if (isSelected) {
-                          dispatch(setSelectedDocs(null));
-                          handlePostDocumentSelect(null);
+                          const updatedDocs = (selectedDocs && Array.isArray(selectedDocs)) 
+                            ? selectedDocs.filter(doc => 
+                                option.id ? doc.id !== option.id : doc.date !== option.date
+                              ) 
+                            : [];
+                          dispatch(setSelectedDocs(updatedDocs.length > 0 ? updatedDocs : null));
+                          handlePostDocumentSelect(updatedDocs.length > 0 ? updatedDocs : null);
                         } else {
-                          dispatch(setSelectedDocs(option));
-                          handlePostDocumentSelect(option);
+                          const updatedDocs = (selectedDocs && Array.isArray(selectedDocs)) 
+                            ? [...selectedDocs, option] 
+                            : [option];
+                          dispatch(setSelectedDocs(updatedDocs));
+                          handlePostDocumentSelect(updatedDocs);
                         }
                       }}
                     >
diff --git a/frontend/src/conversation/conversationHandlers.ts b/frontend/src/conversation/conversationHandlers.ts
index fb6e1b59..ae60b070 100644
--- a/frontend/src/conversation/conversationHandlers.ts
+++ b/frontend/src/conversation/conversationHandlers.ts
@@ -7,7 +7,7 @@ export function handleFetchAnswer(
   question: string,
   signal: AbortSignal,
   token: string | null,
-  selectedDocs: Doc | null,
+  selectedDocs: Doc | Doc[] | null,
   conversationId: string | null,
   promptId: string | null,
   chunks: string,
@@ -52,10 +52,17 @@ export function handleFetchAnswer(
     payload.attachments = attachments;
   }
 
-  if (selectedDocs && 'id' in selectedDocs) {
-    payload.active_docs = selectedDocs.id as string;
+  if (selectedDocs) {
+    if (Array.isArray(selectedDocs)) {
+      // Handle multiple documents
+      payload.active_docs = selectedDocs.map(doc => doc.id).join(',');
+      payload.retriever = selectedDocs[0]?.retriever as string;
+    } else if ('id' in selectedDocs) {
+      // Handle single document (backward compatibility)
+      payload.active_docs = selectedDocs.id as string;
+      payload.retriever = selectedDocs.retriever as string;
+    }
   }
-  payload.retriever = selectedDocs?.retriever as string;
   return conversationService
     .answer(payload, token, signal)
     .then((response) => {
@@ -84,7 +91,7 @@ export function handleFetchAnswerSteaming(
   question: string,
   signal: AbortSignal,
   token: string | null,
-  selectedDocs: Doc | null,
+  selectedDocs: Doc | Doc[] | null,
   conversationId: string | null,
   promptId: string | null,
   chunks: string,
@@ -112,10 +119,17 @@ export function handleFetchAnswerSteaming(
     payload.attachments = attachments;
   }
 
-  if (selectedDocs && 'id' in selectedDocs) {
-    payload.active_docs = selectedDocs.id as string;
+  if (selectedDocs) {
+    if (Array.isArray(selectedDocs)) {
+      // Handle multiple documents
+      payload.active_docs = selectedDocs.map(doc => doc.id).join(',');
+      payload.retriever = selectedDocs[0]?.retriever as string;
+    } else if ('id' in selectedDocs) {
+      // Handle single document (backward compatibility)
+      payload.active_docs = selectedDocs.id as string;
+      payload.retriever = selectedDocs.retriever as string;
+    }
   }
-  payload.retriever = selectedDocs?.retriever as string;
 
   return new Promise<Answer>((resolve, reject) => {
     conversationService
@@ -171,7 +185,7 @@ export function handleFetchAnswerSteaming(
 export function handleSearch(
   question: string,
   token: string | null,
-  selectedDocs: Doc | null,
+  selectedDocs: Doc | Doc[] | null,
   conversation_id: string | null,
   chunks: string,
   token_limit: number,
@@ -183,9 +197,17 @@ export function handleSearch(
     token_limit: token_limit,
     isNoneDoc: selectedDocs === null,
   };
-  if (selectedDocs && 'id' in selectedDocs)
-    payload.active_docs = selectedDocs.id as string;
-  payload.retriever = selectedDocs?.retriever as string;
+  if (selectedDocs) {
+    if (Array.isArray(selectedDocs)) {
+      // Handle multiple documents
+      payload.active_docs = selectedDocs.map(doc => doc.id).join(',');
+      payload.retriever = selectedDocs[0]?.retriever as string;
+    } else if ('id' in selectedDocs) {
+      // Handle single document (backward compatibility)
+      payload.active_docs = selectedDocs.id as string;
+      payload.retriever = selectedDocs.retriever as string;
+    }
+  }
   return conversationService
     .search(payload, token)
     .then((response) => response.json())
diff --git a/frontend/src/hooks/useDefaultDocument.ts b/frontend/src/hooks/useDefaultDocument.ts
index a2642dc5..004e4bb1 100644
--- a/frontend/src/hooks/useDefaultDocument.ts
+++ b/frontend/src/hooks/useDefaultDocument.ts
@@ -18,11 +18,11 @@ export default function useDefaultDocument() {
   const fetchDocs = () => {
     getDocs(token).then((data) => {
       dispatch(setSourceDocs(data));
-      if (!selectedDoc)
+      if (!selectedDoc || (Array.isArray(selectedDoc) && selectedDoc.length === 0))
         Array.isArray(data) &&
           data?.forEach((doc: Doc) => {
             if (doc.model && doc.name === 'default') {
-              dispatch(setSelectedDocs(doc));
+              dispatch(setSelectedDocs([doc]));
             }
           });
     });
diff --git a/frontend/src/preferences/preferenceApi.ts b/frontend/src/preferences/preferenceApi.ts
index 7fb907b3..40dc4bcc 100644
--- a/frontend/src/preferences/preferenceApi.ts
+++ b/frontend/src/preferences/preferenceApi.ts
@@ -90,9 +90,9 @@ export function getLocalApiKey(): string | null {
   return key;
 }
 
-export function getLocalRecentDocs(): string | null {
-  const doc = localStorage.getItem('DocsGPTRecentDocs');
-  return doc;
+export function getLocalRecentDocs(): Doc[] | null {
+  const docs = localStorage.getItem('DocsGPTRecentDocs');
+  return docs ? JSON.parse(docs) as Doc[] : null;
 }
 
 export function getLocalPrompt(): string | null {
@@ -108,19 +108,20 @@ export function setLocalPrompt(prompt: string): void {
   localStorage.setItem('DocsGPTPrompt', prompt);
 }
 
-export function setLocalRecentDocs(doc: Doc | null): void {
-  localStorage.setItem('DocsGPTRecentDocs', JSON.stringify(doc));
+export function setLocalRecentDocs(docs: Doc[] | null): void {
+  if (docs && docs.length > 0) {
+    localStorage.setItem('DocsGPTRecentDocs', JSON.stringify(docs));
 
-  let docPath = 'default';
-  if (doc?.type === 'local') {
-    docPath = 'local' + '/' + doc.name + '/';
+    docs.forEach((doc) => {
+      let docPath = 'default';
+      if (doc.type === 'local') {
+        docPath = 'local' + '/' + doc.name + '/';
+      }
+      userService
+        .checkDocs({ docs: docPath }, null)
+        .then((response) => response.json());
+    });
+  } else {
+    localStorage.removeItem('DocsGPTRecentDocs');
   }
-  userService
-    .checkDocs(
-      {
-        docs: docPath,
-      },
-      null,
-    )
-    .then((response) => response.json());
 }
diff --git a/frontend/src/preferences/preferenceSlice.ts b/frontend/src/preferences/preferenceSlice.ts
index a0825039..6abbef4d 100644
--- a/frontend/src/preferences/preferenceSlice.ts
+++ b/frontend/src/preferences/preferenceSlice.ts
@@ -15,7 +15,7 @@ export interface Preference {
   prompt: { name: string; id: string; type: string };
   chunks: string;
   token_limit: number;
-  selectedDocs: Doc | null;
+  selectedDocs: Doc[] | null;
   sourceDocs: Doc[] | null;
   conversations: {
     data: { name: string; id: string }[] | null;
@@ -34,7 +34,7 @@ const initialState: Preference = {
   prompt: { name: 'default', id: 'default', type: 'public' },
   chunks: '2',
   token_limit: 2000,
-  selectedDocs: {
+  selectedDocs: [{
     id: 'default',
     name: 'default',
     type: 'remote',
@@ -42,7 +42,7 @@ const initialState: Preference = {
     docLink: 'default',
     model: 'openai_text-embedding-ada-002',
     retriever: 'classic',
-  } as Doc,
+  }] as Doc[],
   sourceDocs: null,
   conversations: {
     data: null,

From bd73fa9ae716e9c0a31604aea171591752ffaaef Mon Sep 17 00:00:00 2001
From: Siddhant Rai <siddhant.rai.5686@gmail.com>
Date: Wed, 20 Aug 2025 22:25:31 +0530
Subject: [PATCH 2/6] refactor: remove unused abstract method and improve
 retrievers

---
 application/retriever/base.py        |  4 --
 application/retriever/classic_rag.py | 80 +++++++++++++++++++++-------
 application/vectorstore/base.py      | 77 +++++++++++++++++++-------
 3 files changed, 121 insertions(+), 40 deletions(-)

diff --git a/application/retriever/base.py b/application/retriever/base.py
index fd99dbdd..36ac2e93 100644
--- a/application/retriever/base.py
+++ b/application/retriever/base.py
@@ -5,10 +5,6 @@ class BaseRetriever(ABC):
     def __init__(self):
         pass
 
-    @abstractmethod
-    def gen(self, *args, **kwargs):
-        pass
-
     @abstractmethod
     def search(self, *args, **kwargs):
         pass
diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py
index a9e3dee7..b558c8f0 100644
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -1,4 +1,5 @@
 import logging
+
 from application.core.settings import settings
 from application.llm.llm_creator import LLMCreator
 from application.retriever.base import BaseRetriever
@@ -20,6 +21,7 @@ class ClassicRAG(BaseRetriever):
         api_key=settings.API_KEY,
         decoded_token=None,
     ):
+        """Initialize ClassicRAG retriever with vectorstore sources and LLM configuration"""
         self.original_question = source.get("question", "")
         self.chat_history = chat_history if chat_history is not None else []
         self.prompt = prompt
@@ -47,25 +49,46 @@ class ClassicRAG(BaseRetriever):
         if "active_docs" in source:
             if isinstance(source["active_docs"], list):
                 self.vectorstores = source["active_docs"]
-            elif isinstance(source["active_docs"], str) and "," in source["active_docs"]:
-                # ✅ split multiple IDs from comma string
-                self.vectorstores = [doc_id.strip() for doc_id in source["active_docs"].split(",") if doc_id.strip()]
+            elif (
+                isinstance(source["active_docs"], str) and "," in source["active_docs"]
+            ):
+                self.vectorstores = [
+                    doc_id.strip()
+                    for doc_id in source["active_docs"].split(",")
+                    if doc_id.strip()
+                ]
             else:
                 self.vectorstores = [source["active_docs"]]
         else:
             self.vectorstores = []
 
-        self.vectorstore = None
         self.question = self._rephrase_query()
         self.decoded_token = decoded_token
+        self._validate_vectorstore_config()
+
+    def _validate_vectorstore_config(self):
+        """Validate vectorstore IDs and remove any empty/invalid entries"""
+        if not self.vectorstores:
+            logging.warning("No vectorstores configured for retrieval")
+            return
+
+        invalid_ids = [
+            vs_id for vs_id in self.vectorstores if not vs_id or not vs_id.strip()
+        ]
+        if invalid_ids:
+            logging.warning(f"Found invalid vectorstore IDs: {invalid_ids}")
+            self.vectorstores = [
+                vs_id for vs_id in self.vectorstores if vs_id and vs_id.strip()
+            ]
 
     def _rephrase_query(self):
+        """Rephrase user query with chat history context for better retrieval"""
         if (
             not self.original_question
             or not self.chat_history
             or self.chat_history == []
             or self.chunks == 0
-            or self.vectorstore is None
+            or not self.vectorstores
         ):
             return self.original_question
 
@@ -90,41 +113,62 @@ class ClassicRAG(BaseRetriever):
             return self.original_question
 
     def _get_data(self):
+        """Retrieve relevant documents from configured vectorstores"""
         if self.chunks == 0 or not self.vectorstores:
             return []
 
         all_docs = []
         chunks_per_source = max(1, self.chunks // len(self.vectorstores))
 
-        for vectorstore in self.vectorstores:
-            if vectorstore:
+        for vectorstore_id in self.vectorstores:
+            if vectorstore_id:
                 try:
                     docsearch = VectorCreator.create_vectorstore(
-                        settings.VECTOR_STORE, vectorstore, settings.EMBEDDINGS_KEY
+                        settings.VECTOR_STORE, vectorstore_id, settings.EMBEDDINGS_KEY
                     )
                     docs_temp = docsearch.search(self.question, k=chunks_per_source)
-                    for i in docs_temp:
-                        all_docs.append({
-                            "title": i.metadata.get("title", i.metadata.get("post_title", i.page_content)).split("/")[-1],
-                            "text": i.page_content,
-                            "source": i.metadata.get("source") or vectorstore,
-                        })
+
+                    for doc in docs_temp:
+                        if hasattr(doc, "page_content") and hasattr(doc, "metadata"):
+                            page_content = doc.page_content
+                            metadata = doc.metadata
+                        else:
+                            page_content = doc.get("text", doc.get("page_content", ""))
+                            metadata = doc.get("metadata", {})
+
+                        title = metadata.get(
+                            "title", metadata.get("post_title", page_content)
+                        )
+                        if isinstance(title, str):
+                            title = title.split("/")[-1]
+                        else:
+                            title = str(title).split("/")[-1]
+
+                        all_docs.append(
+                            {
+                                "title": title,
+                                "text": page_content,
+                                "source": metadata.get("source") or vectorstore_id,
+                            }
+                        )
                 except Exception as e:
-                    logging.error(f"Error searching vectorstore {vectorstore}: {e}")
+                    logging.error(
+                        f"Error searching vectorstore {vectorstore_id}: {e}",
+                        exc_info=True,
+                    )
                     continue
 
         return all_docs
 
-    def gen():
-        pass
-
     def search(self, query: str = ""):
+        """Search for documents using optional query override"""
         if query:
             self.original_question = query
             self.question = self._rephrase_query()
         return self._get_data()
 
     def get_params(self):
+        """Return current retriever configuration parameters"""
         return {
             "question": self.original_question,
             "rephrased_question": self.question,
diff --git a/application/vectorstore/base.py b/application/vectorstore/base.py
index a6b206c9..ea4885cd 100644
--- a/application/vectorstore/base.py
+++ b/application/vectorstore/base.py
@@ -1,20 +1,28 @@
-from abc import ABC, abstractmethod
 import os
-from sentence_transformers import SentenceTransformer
+from abc import ABC, abstractmethod
+
 from langchain_openai import OpenAIEmbeddings
+from sentence_transformers import SentenceTransformer
+
 from application.core.settings import settings
 
+
 class EmbeddingsWrapper:
     def __init__(self, model_name, *args, **kwargs):
-        self.model = SentenceTransformer(model_name, config_kwargs={'allow_dangerous_deserialization': True}, *args, **kwargs)
+        self.model = SentenceTransformer(
+            model_name,
+            config_kwargs={"allow_dangerous_deserialization": True},
+            *args,
+            **kwargs
+        )
         self.dimension = self.model.get_sentence_embedding_dimension()
 
     def embed_query(self, query: str):
         return self.model.encode(query).tolist()
-    
+
     def embed_documents(self, documents: list):
         return self.model.encode(documents).tolist()
-    
+
     def __call__(self, text):
         if isinstance(text, str):
             return self.embed_query(text)
@@ -24,15 +32,14 @@ class EmbeddingsWrapper:
             raise ValueError("Input must be a string or a list of strings")
 
 
-
 class EmbeddingsSingleton:
     _instances = {}
 
     @staticmethod
     def get_instance(embeddings_name, *args, **kwargs):
         if embeddings_name not in EmbeddingsSingleton._instances:
-            EmbeddingsSingleton._instances[embeddings_name] = EmbeddingsSingleton._create_instance(
-                embeddings_name, *args, **kwargs
+            EmbeddingsSingleton._instances[embeddings_name] = (
+                EmbeddingsSingleton._create_instance(embeddings_name, *args, **kwargs)
             )
         return EmbeddingsSingleton._instances[embeddings_name]
 
@@ -40,9 +47,15 @@ class EmbeddingsSingleton:
     def _create_instance(embeddings_name, *args, **kwargs):
         embeddings_factory = {
             "openai_text-embedding-ada-002": OpenAIEmbeddings,
-            "huggingface_sentence-transformers/all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
-            "huggingface_sentence-transformers-all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
-            "huggingface_hkunlp/instructor-large": lambda: EmbeddingsWrapper("hkunlp/instructor-large"),
+            "huggingface_sentence-transformers/all-mpnet-base-v2": lambda: EmbeddingsWrapper(
+                "sentence-transformers/all-mpnet-base-v2"
+            ),
+            "huggingface_sentence-transformers-all-mpnet-base-v2": lambda: EmbeddingsWrapper(
+                "sentence-transformers/all-mpnet-base-v2"
+            ),
+            "huggingface_hkunlp/instructor-large": lambda: EmbeddingsWrapper(
+                "hkunlp/instructor-large"
+            ),
         }
 
         if embeddings_name in embeddings_factory:
@@ -50,34 +63,63 @@ class EmbeddingsSingleton:
         else:
             return EmbeddingsWrapper(embeddings_name, *args, **kwargs)
 
+
 class BaseVectorStore(ABC):
     def __init__(self):
         pass
 
     @abstractmethod
     def search(self, *args, **kwargs):
+        """Search for similar documents/chunks in the vectorstore"""
+        pass
+
+    @abstractmethod
+    def add_texts(self, texts, metadatas=None, *args, **kwargs):
+        """Add texts with their embeddings to the vectorstore"""
+        pass
+
+    def delete_index(self, *args, **kwargs):
+        """Delete the entire index/collection"""
+        pass
+
+    def save_local(self, *args, **kwargs):
+        """Save vectorstore to local storage"""
+        pass
+
+    def get_chunks(self, *args, **kwargs):
+        """Get all chunks from the vectorstore"""
+        pass
+
+    def add_chunk(self, text, metadata=None, *args, **kwargs):
+        """Add a single chunk to the vectorstore"""
+        pass
+
+    def delete_chunk(self, chunk_id, *args, **kwargs):
+        """Delete a specific chunk from the vectorstore"""
         pass
 
     def is_azure_configured(self):
-        return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
+        return (
+            settings.OPENAI_API_BASE
+            and settings.OPENAI_API_VERSION
+            and settings.AZURE_DEPLOYMENT_NAME
+        )
 
     def _get_embeddings(self, embeddings_name, embeddings_key=None):
         if embeddings_name == "openai_text-embedding-ada-002":
             if self.is_azure_configured():
                 os.environ["OPENAI_API_TYPE"] = "azure"
                 embedding_instance = EmbeddingsSingleton.get_instance(
-                    embeddings_name,
-                    model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME
+                    embeddings_name, model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME
                 )
             else:
                 embedding_instance = EmbeddingsSingleton.get_instance(
-                    embeddings_name,
-                    openai_api_key=embeddings_key
+                    embeddings_name, openai_api_key=embeddings_key
                 )
         elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2":
             if os.path.exists("./models/all-mpnet-base-v2"):
                 embedding_instance = EmbeddingsSingleton.get_instance(
-                    embeddings_name = "./models/all-mpnet-base-v2",
+                    embeddings_name="./models/all-mpnet-base-v2",
                 )
             else:
                 embedding_instance = EmbeddingsSingleton.get_instance(
@@ -87,4 +129,3 @@ class BaseVectorStore(ABC):
             embedding_instance = EmbeddingsSingleton.get_instance(embeddings_name)
 
         return embedding_instance
-    

From 07d59b66406e4b5bfb193056f956abbbdb85c322 Mon Sep 17 00:00:00 2001
From: Ankit Matth <ankitmatth111@gmail.com>
Date: Sat, 23 Aug 2025 20:25:29 +0530
Subject: [PATCH 3/6] refactor: use list instead of string parsing

---
 application/retriever/classic_rag.py          | 11 +----
 frontend/src/components/SourcesPopup.tsx      | 41 +++++++++++-------
 .../src/conversation/conversationHandlers.ts  | 42 +++++++++----------
 .../src/conversation/conversationModels.ts    |  2 +-
 .../src/modals/ShareConversationModal.tsx     | 14 +++----
 frontend/src/preferences/preferenceSlice.ts   | 19 +++++----
 6 files changed, 68 insertions(+), 61 deletions(-)

diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py
index b558c8f0..82423bb5 100644
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -46,17 +46,10 @@ class ClassicRAG(BaseRetriever):
             user_api_key=self.user_api_key,
             decoded_token=decoded_token,
         )
-        if "active_docs" in source:
+        
+        if "active_docs" in source and source["active_docs"] is not None:
             if isinstance(source["active_docs"], list):
                 self.vectorstores = source["active_docs"]
-            elif (
-                isinstance(source["active_docs"], str) and "," in source["active_docs"]
-            ):
-                self.vectorstores = [
-                    doc_id.strip()
-                    for doc_id in source["active_docs"].split(",")
-                    if doc_id.strip()
-                ]
             else:
                 self.vectorstores = [source["active_docs"]]
         else:
diff --git a/frontend/src/components/SourcesPopup.tsx b/frontend/src/components/SourcesPopup.tsx
index c3a61e01..f13ee25a 100644
--- a/frontend/src/components/SourcesPopup.tsx
+++ b/frontend/src/components/SourcesPopup.tsx
@@ -17,7 +17,7 @@ type SourcesPopupProps = {
   isOpen: boolean;
   onClose: () => void;
   anchorRef: React.RefObject<HTMLButtonElement | null>;
-  handlePostDocumentSelect: (doc: Doc | null) => void;
+  handlePostDocumentSelect: (doc: Doc[] | null) => void;
   setUploadModalState: React.Dispatch<React.SetStateAction<ActiveState>>;
 };
 
@@ -149,9 +149,12 @@ export default function SourcesPopup({
                 if (option.model === embeddingsName) {
                   const isSelected =
                     selectedDocs &&
-                    Array.isArray(selectedDocs) && selectedDocs.length > 0 &&
-                    selectedDocs.some(doc => 
-                      option.id ? doc.id === option.id : doc.date === option.date
+                    Array.isArray(selectedDocs) &&
+                    selectedDocs.length > 0 &&
+                    selectedDocs.some((doc) =>
+                      option.id
+                        ? doc.id === option.id
+                        : doc.date === option.date,
                     );
 
                   return (
@@ -160,17 +163,27 @@ export default function SourcesPopup({
                       className="border-opacity-80 dark:border-dim-gray flex cursor-pointer items-center border-b border-[#D9D9D9] p-3 transition-colors hover:bg-gray-100 dark:text-[14px] dark:hover:bg-[#2C2E3C]"
                       onClick={() => {
                         if (isSelected) {
-                          const updatedDocs = (selectedDocs && Array.isArray(selectedDocs)) 
-                            ? selectedDocs.filter(doc => 
-                                option.id ? doc.id !== option.id : doc.date !== option.date
-                              ) 
-                            : [];
-                          dispatch(setSelectedDocs(updatedDocs.length > 0 ? updatedDocs : null));
-                          handlePostDocumentSelect(updatedDocs.length > 0 ? updatedDocs : null);
+                          const updatedDocs =
+                            selectedDocs && Array.isArray(selectedDocs)
+                              ? selectedDocs.filter((doc) =>
+                                  option.id
+                                    ? doc.id !== option.id
+                                    : doc.date !== option.date,
+                                )
+                              : [];
+                          dispatch(
+                            setSelectedDocs(
+                              updatedDocs.length > 0 ? updatedDocs : null,
+                            ),
+                          );
+                          handlePostDocumentSelect(
+                            updatedDocs.length > 0 ? updatedDocs : null,
+                          );
                         } else {
-                          const updatedDocs = (selectedDocs && Array.isArray(selectedDocs)) 
-                            ? [...selectedDocs, option] 
-                            : [option];
+                          const updatedDocs =
+                            selectedDocs && Array.isArray(selectedDocs)
+                              ? [...selectedDocs, option]
+                              : [option];
                           dispatch(setSelectedDocs(updatedDocs));
                           handlePostDocumentSelect(updatedDocs);
                         }
diff --git a/frontend/src/conversation/conversationHandlers.ts b/frontend/src/conversation/conversationHandlers.ts
index ae60b070..63557924 100644
--- a/frontend/src/conversation/conversationHandlers.ts
+++ b/frontend/src/conversation/conversationHandlers.ts
@@ -7,7 +7,7 @@ export function handleFetchAnswer(
   question: string,
   signal: AbortSignal,
   token: string | null,
-  selectedDocs: Doc | Doc[] | null,
+  selectedDocs: Doc[] | null,
   conversationId: string | null,
   promptId: string | null,
   chunks: string,
@@ -52,15 +52,15 @@ export function handleFetchAnswer(
     payload.attachments = attachments;
   }
 
-  if (selectedDocs) {
-    if (Array.isArray(selectedDocs)) {
+  if (selectedDocs && Array.isArray(selectedDocs)) {
+    if (selectedDocs.length > 1) {
       // Handle multiple documents
-      payload.active_docs = selectedDocs.map(doc => doc.id).join(',');
+      payload.active_docs = selectedDocs.map((doc) => doc.id!);
       payload.retriever = selectedDocs[0]?.retriever as string;
-    } else if ('id' in selectedDocs) {
+    } else if (selectedDocs.length === 1 && 'id' in selectedDocs[0]) {
       // Handle single document (backward compatibility)
-      payload.active_docs = selectedDocs.id as string;
-      payload.retriever = selectedDocs.retriever as string;
+      payload.active_docs = selectedDocs[0].id as string;
+      payload.retriever = selectedDocs[0].retriever as string;
     }
   }
   return conversationService
@@ -91,7 +91,7 @@ export function handleFetchAnswerSteaming(
   question: string,
   signal: AbortSignal,
   token: string | null,
-  selectedDocs: Doc | Doc[] | null,
+  selectedDocs: Doc[] | null,
   conversationId: string | null,
   promptId: string | null,
   chunks: string,
@@ -119,15 +119,15 @@ export function handleFetchAnswerSteaming(
     payload.attachments = attachments;
   }
 
-  if (selectedDocs) {
-    if (Array.isArray(selectedDocs)) {
+  if (selectedDocs && Array.isArray(selectedDocs)) {
+    if (selectedDocs.length > 1) {
       // Handle multiple documents
-      payload.active_docs = selectedDocs.map(doc => doc.id).join(',');
+      payload.active_docs = selectedDocs.map((doc) => doc.id!);
       payload.retriever = selectedDocs[0]?.retriever as string;
-    } else if ('id' in selectedDocs) {
+    } else if (selectedDocs.length === 1 && 'id' in selectedDocs[0]) {
       // Handle single document (backward compatibility)
-      payload.active_docs = selectedDocs.id as string;
-      payload.retriever = selectedDocs.retriever as string;
+      payload.active_docs = selectedDocs[0].id as string;
+      payload.retriever = selectedDocs[0].retriever as string;
     }
   }
 
@@ -185,7 +185,7 @@ export function handleFetchAnswerSteaming(
 export function handleSearch(
   question: string,
   token: string | null,
-  selectedDocs: Doc | Doc[] | null,
+  selectedDocs: Doc[] | null,
   conversation_id: string | null,
   chunks: string,
   token_limit: number,
@@ -197,15 +197,15 @@ export function handleSearch(
     token_limit: token_limit,
     isNoneDoc: selectedDocs === null,
   };
-  if (selectedDocs) {
-    if (Array.isArray(selectedDocs)) {
+  if (selectedDocs && Array.isArray(selectedDocs)) {
+    if (selectedDocs.length > 1) {
       // Handle multiple documents
-      payload.active_docs = selectedDocs.map(doc => doc.id).join(',');
+      payload.active_docs = selectedDocs.map((doc) => doc.id!);
       payload.retriever = selectedDocs[0]?.retriever as string;
-    } else if ('id' in selectedDocs) {
+    } else if (selectedDocs.length === 1 && 'id' in selectedDocs[0]) {
       // Handle single document (backward compatibility)
-      payload.active_docs = selectedDocs.id as string;
-      payload.retriever = selectedDocs.retriever as string;
+      payload.active_docs = selectedDocs[0].id as string;
+      payload.retriever = selectedDocs[0].retriever as string;
     }
   }
   return conversationService
diff --git a/frontend/src/conversation/conversationModels.ts b/frontend/src/conversation/conversationModels.ts
index 08743e73..2b9f6ee3 100644
--- a/frontend/src/conversation/conversationModels.ts
+++ b/frontend/src/conversation/conversationModels.ts
@@ -54,7 +54,7 @@ export interface Query {
 
 export interface RetrievalPayload {
   question: string;
-  active_docs?: string;
+  active_docs?: string | string[];
   retriever?: string;
   conversation_id: string | null;
   prompt_id?: string | null;
diff --git a/frontend/src/modals/ShareConversationModal.tsx b/frontend/src/modals/ShareConversationModal.tsx
index 99262f01..624d64f5 100644
--- a/frontend/src/modals/ShareConversationModal.tsx
+++ b/frontend/src/modals/ShareConversationModal.tsx
@@ -60,7 +60,7 @@ export const ShareConversationModal = ({
   const [sourcePath, setSourcePath] = useState<{
     label: string;
     value: string;
-  } | null>(preSelectedDoc ? extractDocPaths([preSelectedDoc])[0] : null);
+  } | null>(preSelectedDoc ? extractDocPaths(preSelectedDoc)[0] : null);
 
   const handleCopyKey = (url: string) => {
     navigator.clipboard.writeText(url);
@@ -105,14 +105,14 @@ export const ShareConversationModal = ({
   return (
     <WrapperModal close={close}>
       <div className="flex flex-col gap-2">
-        <h2 className="text-xl font-medium text-eerie-black dark:text-chinese-white">
+        <h2 className="text-eerie-black dark:text-chinese-white text-xl font-medium">
           {t('modals.shareConv.label')}
         </h2>
-        <p className="text-sm text-eerie-black dark:text-silver/60">
+        <p className="text-eerie-black dark:text-silver/60 text-sm">
           {t('modals.shareConv.note')}
         </p>
         <div className="flex items-center justify-between">
-          <span className="text-lg text-eerie-black dark:text-white">
+          <span className="text-eerie-black text-lg dark:text-white">
             {t('modals.shareConv.option')}
           </span>
           <ToggleSwitch
@@ -136,19 +136,19 @@ export const ShareConversationModal = ({
           </div>
         )}
         <div className="flex items-baseline justify-between gap-2">
-          <span className="no-scrollbar w-full overflow-x-auto whitespace-nowrap rounded-full border-2 border-silver px-4 py-3 text-eerie-black dark:border-silver/40 dark:text-white">
+          <span className="no-scrollbar border-silver text-eerie-black dark:border-silver/40 w-full overflow-x-auto rounded-full border-2 px-4 py-3 whitespace-nowrap dark:text-white">
             {`${domain}/share/${identifier ?? '....'}`}
           </span>
           {status === 'fetched' ? (
             <button
-              className="my-1 h-10 w-28 rounded-full bg-purple-30 p-2 text-sm text-white hover:bg-violets-are-blue"
+              className="bg-purple-30 hover:bg-violets-are-blue my-1 h-10 w-28 rounded-full p-2 text-sm text-white"
               onClick={() => handleCopyKey(`${domain}/share/${identifier}`)}
             >
               {isCopied ? t('modals.saveKey.copied') : t('modals.saveKey.copy')}
             </button>
           ) : (
             <button
-              className="my-1 flex h-10 w-28 items-center justify-evenly rounded-full bg-purple-30 p-2 text-center text-sm font-normal text-white hover:bg-violets-are-blue"
+              className="bg-purple-30 hover:bg-violets-are-blue my-1 flex h-10 w-28 items-center justify-evenly rounded-full p-2 text-center text-sm font-normal text-white"
               onClick={() => {
                 shareCoversationPublicly(allowPrompt);
               }}
diff --git a/frontend/src/preferences/preferenceSlice.ts b/frontend/src/preferences/preferenceSlice.ts
index 6abbef4d..6da8be95 100644
--- a/frontend/src/preferences/preferenceSlice.ts
+++ b/frontend/src/preferences/preferenceSlice.ts
@@ -34,15 +34,16 @@ const initialState: Preference = {
   prompt: { name: 'default', id: 'default', type: 'public' },
   chunks: '2',
   token_limit: 2000,
-  selectedDocs: [{
-    id: 'default',
-    name: 'default',
-    type: 'remote',
-    date: 'default',
-    docLink: 'default',
-    model: 'openai_text-embedding-ada-002',
-    retriever: 'classic',
-  }] as Doc[],
+  selectedDocs: [
+    {
+      id: 'default',
+      name: 'default',
+      type: 'remote',
+      date: 'default',
+      model: 'openai_text-embedding-ada-002',
+      retriever: 'classic',
+    },
+  ] as Doc[],
   sourceDocs: null,
   conversations: {
     data: null,

From 2f88890c9404af7e89d7c3c194f9e4ffae33a45b Mon Sep 17 00:00:00 2001
From: Siddhant Rai <siddhant.rai.5686@gmail.com>
Date: Mon, 8 Sep 2025 22:10:08 +0530
Subject: [PATCH 4/6] feat: add support for multiple sources in agent
 configuration and update related components

---
 .../api/answer/services/stream_processor.py   |  82 ++-
 application/api/user/routes.py                | 482 +++++++++++++-----
 application/retriever/classic_rag.py          |  14 +-
 frontend/src/agents/NewAgent.tsx              | 171 ++++++-
 frontend/src/agents/types/index.ts            |   1 +
 5 files changed, 592 insertions(+), 158 deletions(-)

diff --git a/application/api/answer/services/stream_processor.py b/application/api/answer/services/stream_processor.py
index dfcfcdd2..6f57c2fc 100644
--- a/application/api/answer/services/stream_processor.py
+++ b/application/api/answer/services/stream_processor.py
@@ -69,11 +69,8 @@ class StreamProcessor:
             self.decoded_token.get("sub") if self.decoded_token is not None else None
         )
         self.conversation_id = self.data.get("conversation_id")
-        self.source = (
-            {"active_docs": self.data["active_docs"]}
-            if "active_docs" in self.data
-            else {}
-        )
+        self.source = {}
+        self.all_sources = []
         self.attachments = []
         self.history = []
         self.agent_config = {}
@@ -86,6 +83,7 @@ class StreamProcessor:
     def initialize(self):
         """Initialize all required components for processing"""
         self._configure_agent()
+        self._configure_source()
         self._configure_retriever()
         self._load_conversation_history()
         self._process_attachments()
@@ -171,12 +169,77 @@ class StreamProcessor:
         source = data.get("source")
         if isinstance(source, DBRef):
             source_doc = self.db.dereference(source)
-            data["source"] = str(source_doc["_id"])
-            data["retriever"] = source_doc.get("retriever", data.get("retriever"))
+            if source_doc:
+                data["source"] = str(source_doc["_id"])
+                data["retriever"] = source_doc.get("retriever", data.get("retriever"))
+                data["chunks"] = source_doc.get("chunks", data.get("chunks"))
+            else:
+                data["source"] = None
+        elif source == "default":
+            data["source"] = "default"
         else:
-            data["source"] = {}
+            data["source"] = None
+        # Handle multiple sources
+
+        sources = data.get("sources", [])
+        if sources and isinstance(sources, list):
+            sources_list = []
+            for i, source_ref in enumerate(sources):
+                if source_ref == "default":
+                    processed_source = {
+                        "id": "default",
+                        "retriever": "classic",
+                        "chunks": data.get("chunks", "2"),
+                    }
+                    sources_list.append(processed_source)
+                elif isinstance(source_ref, DBRef):
+                    source_doc = self.db.dereference(source_ref)
+                    if source_doc:
+                        processed_source = {
+                            "id": str(source_doc["_id"]),
+                            "retriever": source_doc.get("retriever", "classic"),
+                            "chunks": source_doc.get("chunks", data.get("chunks", "2")),
+                        }
+                        sources_list.append(processed_source)
+            data["sources"] = sources_list
+        else:
+            data["sources"] = []
         return data
 
+    def _configure_source(self):
+        """Configure the source based on agent data"""
+        api_key = self.data.get("api_key") or self.agent_key
+
+        if api_key:
+            agent_data = self._get_data_from_api_key(api_key)
+
+            if agent_data.get("sources") and len(agent_data["sources"]) > 0:
+                source_ids = [
+                    source["id"] for source in agent_data["sources"] if source.get("id")
+                ]
+                if source_ids:
+                    self.source = {"active_docs": source_ids}
+                else:
+                    self.source = {}
+                self.all_sources = agent_data["sources"]
+            elif agent_data.get("source"):
+                self.source = {"active_docs": agent_data["source"]}
+                self.all_sources = [
+                    {
+                        "id": agent_data["source"],
+                        "retriever": agent_data.get("retriever", "classic"),
+                    }
+                ]
+            else:
+                self.source = {}
+                self.all_sources = []
+            return
+        if "active_docs" in self.data:
+            self.source = {"active_docs": self.data["active_docs"]}
+            return
+        self.source = {}
+        self.all_sources = []
+
     def _configure_agent(self):
         """Configure the agent based on request data"""
         agent_id = self.data.get("agent_id")
@@ -230,7 +293,8 @@ class StreamProcessor:
             "token_limit": self.data.get("token_limit", settings.DEFAULT_MAX_HISTORY),
         }
 
-        if "isNoneDoc" in self.data and self.data["isNoneDoc"]:
+        api_key = self.data.get("api_key") or self.agent_key
+        if not api_key and "isNoneDoc" in self.data and self.data["isNoneDoc"]:
             self.retriever_config["chunks"] = 0
 
     def create_agent(self):
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 9a2febbc..2e9bae81 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -492,9 +492,9 @@ class DeleteOldIndexes(Resource):
         )
         if not doc:
             return make_response(jsonify({"status": "not found"}), 404)
-        
+
         storage = StorageCreator.get_storage()
-        
+
         try:
             # Delete vector index
             if settings.VECTOR_STORE == "faiss":
@@ -508,7 +508,7 @@ class DeleteOldIndexes(Resource):
                     settings.VECTOR_STORE, source_id=str(doc["_id"])
                 )
                 vectorstore.delete_index()
-                
+
             if "file_path" in doc and doc["file_path"]:
                 file_path = doc["file_path"]
                 if storage.is_directory(file_path):
@@ -517,7 +517,7 @@ class DeleteOldIndexes(Resource):
                         storage.delete_file(f)
                 else:
                     storage.delete_file(file_path)
-                    
+
         except FileNotFoundError:
             pass
         except Exception as err:
@@ -525,7 +525,7 @@ class DeleteOldIndexes(Resource):
                 f"Error deleting files and indexes: {err}", exc_info=True
             )
             return make_response(jsonify({"success": False}), 400)
-            
+
         sources_collection.delete_one({"_id": ObjectId(source_id)})
         return make_response(jsonify({"success": True}), 200)
 
@@ -573,55 +573,75 @@ class UploadFile(Resource):
 
         try:
             storage = StorageCreator.get_storage()
-            
-            
+
             for file in files:
                 original_filename = file.filename
                 safe_file = safe_filename(original_filename)
-                
+
                 with tempfile.TemporaryDirectory() as temp_dir:
                     temp_file_path = os.path.join(temp_dir, safe_file)
                     file.save(temp_file_path)
-                    
+
                     if zipfile.is_zipfile(temp_file_path):
                         try:
-                            with zipfile.ZipFile(temp_file_path, 'r') as zip_ref:
+                            with zipfile.ZipFile(temp_file_path, "r") as zip_ref:
                                 zip_ref.extractall(path=temp_dir)
-                                
+
                                 # Walk through extracted files and upload them
                                 for root, _, files in os.walk(temp_dir):
                                     for extracted_file in files:
-                                        if os.path.join(root, extracted_file) == temp_file_path:
+                                        if (
+                                            os.path.join(root, extracted_file)
+                                            == temp_file_path
+                                        ):
                                             continue
-                                            
-                                        rel_path = os.path.relpath(os.path.join(root, extracted_file), temp_dir)
+
+                                        rel_path = os.path.relpath(
+                                            os.path.join(root, extracted_file), temp_dir
+                                        )
                                         storage_path = f"{base_path}/{rel_path}"
-                                        
-                                        with open(os.path.join(root, extracted_file), 'rb') as f:
+
+                                        with open(
+                                            os.path.join(root, extracted_file), "rb"
+                                        ) as f:
                                             storage.save_file(f, storage_path)
                         except Exception as e:
-                            current_app.logger.error(f"Error extracting zip: {e}", exc_info=True)
+                            current_app.logger.error(
+                                f"Error extracting zip: {e}", exc_info=True
+                            )
                             # If zip extraction fails, save the original zip file
                             file_path = f"{base_path}/{safe_file}"
-                            with open(temp_file_path, 'rb') as f:
+                            with open(temp_file_path, "rb") as f:
                                 storage.save_file(f, file_path)
                     else:
                         # For non-zip files, save directly
                         file_path = f"{base_path}/{safe_file}"
-                        with open(temp_file_path, 'rb') as f:
+                        with open(temp_file_path, "rb") as f:
                             storage.save_file(f, file_path)
-            
+
             task = ingest.delay(
                 settings.UPLOAD_FOLDER,
                 [
-                    ".rst", ".md", ".pdf", ".txt", ".docx", ".csv", ".epub",
-                    ".html", ".mdx", ".json", ".xlsx", ".pptx", ".png",
-                    ".jpg", ".jpeg",
+                    ".rst",
+                    ".md",
+                    ".pdf",
+                    ".txt",
+                    ".docx",
+                    ".csv",
+                    ".epub",
+                    ".html",
+                    ".mdx",
+                    ".json",
+                    ".xlsx",
+                    ".pptx",
+                    ".png",
+                    ".jpg",
+                    ".jpeg",
                 ],
                 job_name,
                 user,
                 file_path=base_path,
-                filename=dir_name
+                filename=dir_name,
             )
         except Exception as err:
             current_app.logger.error(f"Error uploading file: {err}", exc_info=True)
@@ -635,12 +655,29 @@ class ManageSourceFiles(Resource):
         api.model(
             "ManageSourceFilesModel",
             {
-                "source_id": fields.String(required=True, description="Source ID to modify"),
-                "operation": fields.String(required=True, description="Operation: 'add', 'remove', or 'remove_directory'"),
-                "file_paths": fields.List(fields.String, required=False, description="File paths to remove (for remove operation)"),
-                "directory_path": fields.String(required=False, description="Directory path to remove (for remove_directory operation)"),
-                "file": fields.Raw(required=False, description="Files to add (for add operation)"),
-                "parent_dir": fields.String(required=False, description="Parent directory path relative to source root"),
+                "source_id": fields.String(
+                    required=True, description="Source ID to modify"
+                ),
+                "operation": fields.String(
+                    required=True,
+                    description="Operation: 'add', 'remove', or 'remove_directory'",
+                ),
+                "file_paths": fields.List(
+                    fields.String,
+                    required=False,
+                    description="File paths to remove (for remove operation)",
+                ),
+                "directory_path": fields.String(
+                    required=False,
+                    description="Directory path to remove (for remove_directory operation)",
+                ),
+                "file": fields.Raw(
+                    required=False, description="Files to add (for add operation)"
+                ),
+                "parent_dir": fields.String(
+                    required=False,
+                    description="Parent directory path relative to source root",
+                ),
             },
         )
     )
@@ -650,7 +687,9 @@ class ManageSourceFiles(Resource):
     def post(self):
         decoded_token = request.decoded_token
         if not decoded_token:
-            return make_response(jsonify({"success": False, "message": "Unauthorized"}), 401)
+            return make_response(
+                jsonify({"success": False, "message": "Unauthorized"}), 401
+            )
 
         user = decoded_token.get("sub")
         source_id = request.form.get("source_id")
@@ -658,12 +697,24 @@ class ManageSourceFiles(Resource):
 
         if not source_id or not operation:
             return make_response(
-                jsonify({"success": False, "message": "source_id and operation are required"}), 400
+                jsonify(
+                    {
+                        "success": False,
+                        "message": "source_id and operation are required",
+                    }
+                ),
+                400,
             )
 
         if operation not in ["add", "remove", "remove_directory"]:
             return make_response(
-                jsonify({"success": False, "message": "operation must be 'add', 'remove', or 'remove_directory'"}), 400
+                jsonify(
+                    {
+                        "success": False,
+                        "message": "operation must be 'add', 'remove', or 'remove_directory'",
+                    }
+                ),
+                400,
             )
 
         try:
@@ -674,34 +725,53 @@ class ManageSourceFiles(Resource):
             )
 
         try:
-            source = sources_collection.find_one({"_id": ObjectId(source_id), "user": user})
+            source = sources_collection.find_one(
+                {"_id": ObjectId(source_id), "user": user}
+            )
             if not source:
                 return make_response(
-                    jsonify({"success": False, "message": "Source not found or access denied"}), 404
+                    jsonify(
+                        {
+                            "success": False,
+                            "message": "Source not found or access denied",
+                        }
+                    ),
+                    404,
                 )
         except Exception as err:
             current_app.logger.error(f"Error finding source: {err}", exc_info=True)
-            return make_response(jsonify({"success": False, "message": "Database error"}), 500)
+            return make_response(
+                jsonify({"success": False, "message": "Database error"}), 500
+            )
 
         try:
             storage = StorageCreator.get_storage()
             source_file_path = source.get("file_path", "")
-            parent_dir = request.form.get("parent_dir", "") 
-            
+            parent_dir = request.form.get("parent_dir", "")
+
             if parent_dir and (parent_dir.startswith("/") or ".." in parent_dir):
                 return make_response(
-                    jsonify({"success": False, "message": "Invalid parent directory path"}), 400
+                    jsonify(
+                        {"success": False, "message": "Invalid parent directory path"}
+                    ),
+                    400,
                 )
 
             if operation == "add":
                 files = request.files.getlist("file")
                 if not files or all(file.filename == "" for file in files):
                     return make_response(
-                        jsonify({"success": False, "message": "No files provided for add operation"}), 400
+                        jsonify(
+                            {
+                                "success": False,
+                                "message": "No files provided for add operation",
+                            }
+                        ),
+                        400,
                     )
 
                 added_files = []
-                
+
                 target_dir = source_file_path
                 if parent_dir:
                     target_dir = f"{source_file_path}/{parent_dir}"
@@ -720,26 +790,44 @@ class ManageSourceFiles(Resource):
 
                 task = reingest_source_task.delay(source_id=source_id, user=user)
 
-                return make_response(jsonify({
-                    "success": True,
-                    "message": f"Added {len(added_files)} files",
-                    "added_files": added_files,
-                    "parent_dir": parent_dir,
-                    "reingest_task_id": task.id
-                }), 200)
+                return make_response(
+                    jsonify(
+                        {
+                            "success": True,
+                            "message": f"Added {len(added_files)} files",
+                            "added_files": added_files,
+                            "parent_dir": parent_dir,
+                            "reingest_task_id": task.id,
+                        }
+                    ),
+                    200,
+                )
 
             elif operation == "remove":
                 file_paths_str = request.form.get("file_paths")
                 if not file_paths_str:
                     return make_response(
-                        jsonify({"success": False, "message": "file_paths required for remove operation"}), 400
+                        jsonify(
+                            {
+                                "success": False,
+                                "message": "file_paths required for remove operation",
+                            }
+                        ),
+                        400,
                     )
 
                 try:
-                    file_paths = json.loads(file_paths_str) if isinstance(file_paths_str, str) else file_paths_str
+                    file_paths = (
+                        json.loads(file_paths_str)
+                        if isinstance(file_paths_str, str)
+                        else file_paths_str
+                    )
                 except Exception:
                     return make_response(
-                        jsonify({"success": False, "message": "Invalid file_paths format"}), 400
+                        jsonify(
+                            {"success": False, "message": "Invalid file_paths format"}
+                        ),
+                        400,
                     )
 
                 # Remove files from storage and directory structure
@@ -757,18 +845,29 @@ class ManageSourceFiles(Resource):
 
                 task = reingest_source_task.delay(source_id=source_id, user=user)
 
-                return make_response(jsonify({
-                    "success": True,
-                    "message": f"Removed {len(removed_files)} files",
-                    "removed_files": removed_files,
-                    "reingest_task_id": task.id
-                }), 200)
+                return make_response(
+                    jsonify(
+                        {
+                            "success": True,
+                            "message": f"Removed {len(removed_files)} files",
+                            "removed_files": removed_files,
+                            "reingest_task_id": task.id,
+                        }
+                    ),
+                    200,
+                )
 
             elif operation == "remove_directory":
                 directory_path = request.form.get("directory_path")
                 if not directory_path:
                     return make_response(
-                        jsonify({"success": False, "message": "directory_path required for remove_directory operation"}), 400
+                        jsonify(
+                            {
+                                "success": False,
+                                "message": "directory_path required for remove_directory operation",
+                            }
+                        ),
+                        400,
                     )
 
                 # Validate directory path (prevent path traversal)
@@ -778,10 +877,17 @@ class ManageSourceFiles(Resource):
                         f"User: {user}, Source ID: {source_id}, Directory path: {directory_path}"
                     )
                     return make_response(
-                        jsonify({"success": False, "message": "Invalid directory path"}), 400
+                        jsonify(
+                            {"success": False, "message": "Invalid directory path"}
+                        ),
+                        400,
                     )
 
-                full_directory_path = f"{source_file_path}/{directory_path}" if directory_path else source_file_path
+                full_directory_path = (
+                    f"{source_file_path}/{directory_path}"
+                    if directory_path
+                    else source_file_path
+                )
 
                 if not storage.is_directory(full_directory_path):
                     current_app.logger.warning(
@@ -790,7 +896,13 @@ class ManageSourceFiles(Resource):
                         f"Full path: {full_directory_path}"
                     )
                     return make_response(
-                        jsonify({"success": False, "message": "Directory not found or is not a directory"}), 404
+                        jsonify(
+                            {
+                                "success": False,
+                                "message": "Directory not found or is not a directory",
+                            }
+                        ),
+                        404,
                     )
 
                 success = storage.remove_directory(full_directory_path)
@@ -802,7 +914,10 @@ class ManageSourceFiles(Resource):
                         f"Full path: {full_directory_path}"
                     )
                     return make_response(
-                        jsonify({"success": False, "message": "Failed to remove directory"}), 500
+                        jsonify(
+                            {"success": False, "message": "Failed to remove directory"}
+                        ),
+                        500,
                     )
 
                 current_app.logger.info(
@@ -816,12 +931,17 @@ class ManageSourceFiles(Resource):
 
                 task = reingest_source_task.delay(source_id=source_id, user=user)
 
-                return make_response(jsonify({
-                    "success": True,
-                    "message": f"Successfully removed directory: {directory_path}",
-                    "removed_directory": directory_path,
-                    "reingest_task_id": task.id
-                }), 200)
+                return make_response(
+                    jsonify(
+                        {
+                            "success": True,
+                            "message": f"Successfully removed directory: {directory_path}",
+                            "removed_directory": directory_path,
+                            "reingest_task_id": task.id,
+                        }
+                    ),
+                    200,
+                )
 
         except Exception as err:
             error_context = f"operation={operation}, user={user}, source_id={source_id}"
@@ -835,8 +955,12 @@ class ManageSourceFiles(Resource):
                 parent_dir = request.form.get("parent_dir", "")
                 error_context += f", parent_dir={parent_dir}"
 
-            current_app.logger.error(f"Error managing source files: {err} ({error_context})", exc_info=True)
-            return make_response(jsonify({"success": False, "message": "Operation failed"}), 500)
+            current_app.logger.error(
+                f"Error managing source files: {err} ({error_context})", exc_info=True
+            )
+            return make_response(
+                jsonify({"success": False, "message": "Operation failed"}), 500
+            )
 
 
 @user_ns.route("/api/remote")
@@ -984,7 +1108,7 @@ class PaginatedSources(Resource):
                     "tokens": doc.get("tokens", ""),
                     "retriever": doc.get("retriever", "classic"),
                     "syncFrequency": doc.get("sync_frequency", ""),
-                    "isNested": bool(doc.get("directory_structure"))
+                    "isNested": bool(doc.get("directory_structure")),
                 }
                 paginated_docs.append(doc_data)
             response = {
@@ -1032,7 +1156,7 @@ class CombinedJson(Resource):
                         "tokens": index.get("tokens", ""),
                         "retriever": index.get("retriever", "classic"),
                         "syncFrequency": index.get("sync_frequency", ""),
-                        "is_nested": bool(index.get("directory_structure"))
+                        "is_nested": bool(index.get("directory_structure")),
                     }
                 )
         except Exception as err:
@@ -1272,6 +1396,16 @@ class GetAgent(Resource):
                     and (source_doc := db.dereference(agent.get("source")))
                     else ""
                 ),
+                "sources": [
+                    (
+                        str(db.dereference(source_ref)["_id"])
+                        if isinstance(source_ref, DBRef) and db.dereference(source_ref)
+                        else source_ref
+                    )
+                    for source_ref in agent.get("sources", [])
+                    if (isinstance(source_ref, DBRef) and db.dereference(source_ref))
+                    or source_ref == "default"
+                ],
                 "chunks": agent["chunks"],
                 "retriever": agent.get("retriever", ""),
                 "prompt_id": agent.get("prompt_id", ""),
@@ -1325,8 +1459,24 @@ class GetAgents(Resource):
                         str(source_doc["_id"])
                         if isinstance(agent.get("source"), DBRef)
                         and (source_doc := db.dereference(agent.get("source")))
-                        else ""
+                        else (
+                            agent.get("source", "")
+                            if agent.get("source") == "default"
+                            else ""
+                        )
                     ),
+                    "sources": [
+                        (
+                            source_ref
+                            if source_ref == "default"
+                            else str(db.dereference(source_ref)["_id"])
+                        )
+                        for source_ref in agent.get("sources", [])
+                        if source_ref == "default"
+                        or (
+                            isinstance(source_ref, DBRef) and db.dereference(source_ref)
+                        )
+                    ],
                     "chunks": agent["chunks"],
                     "retriever": agent.get("retriever", ""),
                     "prompt_id": agent.get("prompt_id", ""),
@@ -1351,6 +1501,7 @@ class GetAgents(Resource):
                 for agent in agents
                 if "source" in agent or "retriever" in agent
             ]
+
         except Exception as err:
             current_app.logger.error(f"Error retrieving agents: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
@@ -1369,7 +1520,14 @@ class CreateAgent(Resource):
             "image": fields.Raw(
                 required=False, description="Image file upload", type="file"
             ),
-            "source": fields.String(required=True, description="Source ID"),
+            "source": fields.String(
+                required=False, description="Source ID (legacy single source)"
+            ),
+            "sources": fields.List(
+                fields.String,
+                required=False,
+                description="List of source identifiers for multiple sources",
+            ),
             "chunks": fields.Integer(required=True, description="Chunks count"),
             "retriever": fields.String(required=True, description="Retriever ID"),
             "prompt_id": fields.String(required=True, description="Prompt ID"),
@@ -1381,7 +1539,8 @@ class CreateAgent(Resource):
                 required=True, description="Status of the agent (draft or published)"
             ),
             "json_schema": fields.Raw(
-                required=False, description="JSON schema for enforcing structured output format"
+                required=False,
+                description="JSON schema for enforcing structured output format",
             ),
         },
     )
@@ -1401,13 +1560,18 @@ class CreateAgent(Resource):
                     data["tools"] = json.loads(data["tools"])
                 except json.JSONDecodeError:
                     data["tools"] = []
+            if "sources" in data:
+                try:
+                    data["sources"] = json.loads(data["sources"])
+                except json.JSONDecodeError:
+                    data["sources"] = []
             if "json_schema" in data:
                 try:
                     data["json_schema"] = json.loads(data["json_schema"])
                 except json.JSONDecodeError:
                     data["json_schema"] = None
         print(f"Received data: {data}")
-        
+
         # Validate JSON schema if provided
         if data.get("json_schema"):
             try:
@@ -1415,20 +1579,32 @@ class CreateAgent(Resource):
                 json_schema = data.get("json_schema")
                 if not isinstance(json_schema, dict):
                     return make_response(
-                        jsonify({"success": False, "message": "JSON schema must be a valid JSON object"}), 
-                        400
+                        jsonify(
+                            {
+                                "success": False,
+                                "message": "JSON schema must be a valid JSON object",
+                            }
+                        ),
+                        400,
                     )
-                
+
                 # Validate that it has either a 'schema' property or is itself a schema
                 if "schema" not in json_schema and "type" not in json_schema:
                     return make_response(
-                        jsonify({"success": False, "message": "JSON schema must contain either a 'schema' property or be a valid JSON schema with 'type' property"}), 
-                        400
+                        jsonify(
+                            {
+                                "success": False,
+                                "message": "JSON schema must contain either a 'schema' property or be a valid JSON schema with 'type' property",
+                            }
+                        ),
+                        400,
                     )
             except Exception as e:
                 return make_response(
-                    jsonify({"success": False, "message": f"Invalid JSON schema: {str(e)}"}), 
-                    400
+                    jsonify(
+                        {"success": False, "message": f"Invalid JSON schema: {str(e)}"}
+                    ),
+                    400,
                 )
 
         if data.get("status") not in ["draft", "published"]:
@@ -1446,12 +1622,22 @@ class CreateAgent(Resource):
             required_fields = [
                 "name",
                 "description",
-                "source",
                 "chunks",
                 "retriever",
                 "prompt_id",
                 "agent_type",
             ]
+            # Require either source or sources (but not both)
+            if not data.get("source") and not data.get("sources"):
+                return make_response(
+                    jsonify(
+                        {
+                            "success": False,
+                            "message": "Either 'source' or 'sources' field is required for published agents",
+                        }
+                    ),
+                    400,
+                )
             validate_fields = ["name", "description", "prompt_id", "agent_type"]
         else:
             required_fields = ["name"]
@@ -1471,16 +1657,31 @@ class CreateAgent(Resource):
 
         try:
             key = str(uuid.uuid4()) if data.get("status") == "published" else ""
+
+            sources_list = []
+            if data.get("sources") and len(data.get("sources", [])) > 0:
+                for source_id in data.get("sources", []):
+                    if source_id == "default":
+                        sources_list.append("default")
+                    elif ObjectId.is_valid(source_id):
+                        sources_list.append(DBRef("sources", ObjectId(source_id)))
+                source_field = ""
+            else:
+                source_value = data.get("source", "")
+                if source_value == "default":
+                    source_field = "default"
+                elif ObjectId.is_valid(source_value):
+                    source_field = DBRef("sources", ObjectId(source_value))
+                else:
+                    source_field = ""
+
             new_agent = {
                 "user": user,
                 "name": data.get("name"),
                 "description": data.get("description", ""),
                 "image": image_url,
-                "source": (
-                    DBRef("sources", ObjectId(data.get("source")))
-                    if ObjectId.is_valid(data.get("source"))
-                    else ""
-                ),
+                "source": source_field,
+                "sources": sources_list,
                 "chunks": data.get("chunks", ""),
                 "retriever": data.get("retriever", ""),
                 "prompt_id": data.get("prompt_id", ""),
@@ -1495,7 +1696,11 @@ class CreateAgent(Resource):
             }
             if new_agent["chunks"] == "":
                 new_agent["chunks"] = "0"
-            if new_agent["source"] == "" and new_agent["retriever"] == "":
+            if (
+                new_agent["source"] == ""
+                and new_agent["retriever"] == ""
+                and not new_agent["sources"]
+            ):
                 new_agent["retriever"] = "classic"
             resp = agents_collection.insert_one(new_agent)
             new_id = str(resp.inserted_id)
@@ -1517,7 +1722,14 @@ class UpdateAgent(Resource):
             "image": fields.String(
                 required=False, description="New image URL or identifier"
             ),
-            "source": fields.String(required=True, description="Source ID"),
+            "source": fields.String(
+                required=False, description="Source ID (legacy single source)"
+            ),
+            "sources": fields.List(
+                fields.String,
+                required=False,
+                description="List of source identifiers for multiple sources",
+            ),
             "chunks": fields.Integer(required=True, description="Chunks count"),
             "retriever": fields.String(required=True, description="Retriever ID"),
             "prompt_id": fields.String(required=True, description="Prompt ID"),
@@ -1529,7 +1741,8 @@ class UpdateAgent(Resource):
                 required=True, description="Status of the agent (draft or published)"
             ),
             "json_schema": fields.Raw(
-                required=False, description="JSON schema for enforcing structured output format"
+                required=False,
+                description="JSON schema for enforcing structured output format",
             ),
         },
     )
@@ -1549,6 +1762,11 @@ class UpdateAgent(Resource):
                     data["tools"] = json.loads(data["tools"])
                 except json.JSONDecodeError:
                     data["tools"] = []
+            if "sources" in data:
+                try:
+                    data["sources"] = json.loads(data["sources"])
+                except json.JSONDecodeError:
+                    data["sources"] = []
             if "json_schema" in data:
                 try:
                     data["json_schema"] = json.loads(data["json_schema"])
@@ -1593,6 +1811,7 @@ class UpdateAgent(Resource):
             "description",
             "image",
             "source",
+            "sources",
             "chunks",
             "retriever",
             "prompt_id",
@@ -1616,7 +1835,10 @@ class UpdateAgent(Resource):
                     update_fields[field] = new_status
                 elif field == "source":
                     source_id = data.get("source")
-                    if source_id and ObjectId.is_valid(source_id):
+                    if source_id == "default":
+                        # Handle special "default" source
+                        update_fields[field] = "default"
+                    elif source_id and ObjectId.is_valid(source_id):
                         update_fields[field] = DBRef("sources", ObjectId(source_id))
                     elif source_id:
                         return make_response(
@@ -1630,6 +1852,30 @@ class UpdateAgent(Resource):
                         )
                     else:
                         update_fields[field] = ""
+                elif field == "sources":
+                    sources_list = data.get("sources", [])
+                    if sources_list and isinstance(sources_list, list):
+                        valid_sources = []
+                        for source_id in sources_list:
+                            if source_id == "default":
+                                valid_sources.append("default")
+                            elif ObjectId.is_valid(source_id):
+                                valid_sources.append(
+                                    DBRef("sources", ObjectId(source_id))
+                                )
+                            else:
+                                return make_response(
+                                    jsonify(
+                                        {
+                                            "success": False,
+                                            "message": f"Invalid source ID format: {source_id}",
+                                        }
+                                    ),
+                                    400,
+                                )
+                        update_fields[field] = valid_sources
+                    else:
+                        update_fields[field] = []
                 elif field == "chunks":
                     chunks_value = data.get("chunks")
                     if chunks_value == "":
@@ -3532,7 +3778,7 @@ class GetChunks(Resource):
             "page": "Page number for pagination",
             "per_page": "Number of chunks per page",
             "path": "Optional: Filter chunks by relative file path",
-            "search": "Optional: Search term to filter chunks by title or content"
+            "search": "Optional: Search term to filter chunks by title or content",
         },
     )
     def get(self):
@@ -3556,7 +3802,7 @@ class GetChunks(Resource):
         try:
             store = get_vector_store(doc_id)
             chunks = store.get_chunks()
-            
+
             filtered_chunks = []
             for chunk in chunks:
                 metadata = chunk.get("metadata", {})
@@ -3577,9 +3823,9 @@ class GetChunks(Resource):
                         continue
 
                 filtered_chunks.append(chunk)
-            
+
             chunks = filtered_chunks
-            
+
             total_chunks = len(chunks)
             start = (page - 1) * per_page
             end = start + per_page
@@ -3593,7 +3839,7 @@ class GetChunks(Resource):
                         "total": total_chunks,
                         "chunks": paginated_chunks,
                         "path": path if path else None,
-                        "search": search_term if search_term else None
+                        "search": search_term if search_term else None,
                     }
                 ),
                 200,
@@ -3602,6 +3848,7 @@ class GetChunks(Resource):
             current_app.logger.error(f"Error getting chunks: {e}", exc_info=True)
             return make_response(jsonify({"success": False}), 500)
 
+
 @user_ns.route("/api/add_chunk")
 class AddChunk(Resource):
     @api.expect(
@@ -3768,7 +4015,9 @@ class UpdateChunk(Resource):
 
                 deleted = store.delete_chunk(chunk_id)
                 if not deleted:
-                    current_app.logger.warning(f"Failed to delete old chunk {chunk_id}, but new chunk {new_chunk_id} was created")
+                    current_app.logger.warning(
+                        f"Failed to delete old chunk {chunk_id}, but new chunk {new_chunk_id} was created"
+                    )
 
                 return make_response(
                     jsonify(
@@ -3900,39 +4149,38 @@ class DirectoryStructure(Resource):
         decoded_token = request.decoded_token
         if not decoded_token:
             return make_response(jsonify({"success": False}), 401)
-        
+
         user = decoded_token.get("sub")
         doc_id = request.args.get("id")
-        
+
         if not doc_id:
-            return make_response(
-                jsonify({"error": "Document ID is required"}), 400
-            )
-            
+            return make_response(jsonify({"error": "Document ID is required"}), 400)
+
         if not ObjectId.is_valid(doc_id):
             return make_response(jsonify({"error": "Invalid document ID"}), 400)
-            
+
         try:
             doc = sources_collection.find_one({"_id": ObjectId(doc_id), "user": user})
             if not doc:
                 return make_response(
                     jsonify({"error": "Document not found or access denied"}), 404
                 )
-                
+
             directory_structure = doc.get("directory_structure", {})
-            
+
             return make_response(
-                jsonify({
-                    "success": True,
-                    "directory_structure": directory_structure,
-                    "base_path": doc.get("file_path", "")
-                }), 200
+                jsonify(
+                    {
+                        "success": True,
+                        "directory_structure": directory_structure,
+                        "base_path": doc.get("file_path", ""),
+                    }
+                ),
+                200,
             )
-            
+
         except Exception as e:
             current_app.logger.error(
                 f"Error retrieving directory structure: {e}", exc_info=True
             )
-            return make_response(
-                jsonify({"success": False, "error": str(e)}), 500
-            )
+            return make_response(jsonify({"success": False, "error": str(e)}), 500)
diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py
index 82423bb5..ce1b937b 100644
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -46,7 +46,7 @@ class ClassicRAG(BaseRetriever):
             user_api_key=self.user_api_key,
             decoded_token=decoded_token,
         )
-        
+
         if "active_docs" in source and source["active_docs"] is not None:
             if isinstance(source["active_docs"], list):
                 self.vectorstores = source["active_docs"]
@@ -54,7 +54,6 @@ class ClassicRAG(BaseRetriever):
                 self.vectorstores = [source["active_docs"]]
         else:
             self.vectorstores = []
-
         self.question = self._rephrase_query()
         self.decoded_token = decoded_token
         self._validate_vectorstore_config()
@@ -64,7 +63,6 @@ class ClassicRAG(BaseRetriever):
         if not self.vectorstores:
             logging.warning("No vectorstores configured for retrieval")
             return
-
         invalid_ids = [
             vs_id for vs_id in self.vectorstores if not vs_id or not vs_id.strip()
         ]
@@ -84,12 +82,16 @@ class ClassicRAG(BaseRetriever):
             or not self.vectorstores
         ):
             return self.original_question
-
         prompt = f"""Given the following conversation history:
+
         {self.chat_history}
 
+
+
         Rephrase the following user question to be a standalone search query 
+
         that captures all relevant context from the conversation:
+
         """
 
         messages = [
@@ -109,7 +111,6 @@ class ClassicRAG(BaseRetriever):
         """Retrieve relevant documents from configured vectorstores"""
         if self.chunks == 0 or not self.vectorstores:
             return []
-
         all_docs = []
         chunks_per_source = max(1, self.chunks // len(self.vectorstores))
 
@@ -128,7 +129,6 @@ class ClassicRAG(BaseRetriever):
                         else:
                             page_content = doc.get("text", doc.get("page_content", ""))
                             metadata = doc.get("metadata", {})
-
                         title = metadata.get(
                             "title", metadata.get("post_title", page_content)
                         )
@@ -136,7 +136,6 @@ class ClassicRAG(BaseRetriever):
                             title = title.split("/")[-1]
                         else:
                             title = str(title).split("/")[-1]
-
                         all_docs.append(
                             {
                                 "title": title,
@@ -150,7 +149,6 @@ class ClassicRAG(BaseRetriever):
                         exc_info=True,
                     )
                     continue
-
         return all_docs
 
     def search(self, query: str = ""):
diff --git a/frontend/src/agents/NewAgent.tsx b/frontend/src/agents/NewAgent.tsx
index da8cef5d..f1fc5e50 100644
--- a/frontend/src/agents/NewAgent.tsx
+++ b/frontend/src/agents/NewAgent.tsx
@@ -45,6 +45,7 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
     description: '',
     image: '',
     source: '',
+    sources: [],
     chunks: '',
     retriever: '',
     prompt_id: 'default',
@@ -150,7 +151,41 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
     const formData = new FormData();
     formData.append('name', agent.name);
     formData.append('description', agent.description);
-    formData.append('source', agent.source);
+
+    if (selectedSourceIds.size > 1) {
+      const sourcesArray = Array.from(selectedSourceIds)
+        .map((id) => {
+          const sourceDoc = sourceDocs?.find(
+            (source) =>
+              source.id === id || source.retriever === id || source.name === id,
+          );
+          if (sourceDoc?.name === 'Default' && !sourceDoc?.id) {
+            return 'default';
+          }
+          return sourceDoc?.id || id;
+        })
+        .filter(Boolean);
+      formData.append('sources', JSON.stringify(sourcesArray));
+      formData.append('source', '');
+    } else if (selectedSourceIds.size === 1) {
+      const singleSourceId = Array.from(selectedSourceIds)[0];
+      const sourceDoc = sourceDocs?.find(
+        (source) =>
+          source.id === singleSourceId ||
+          source.retriever === singleSourceId ||
+          source.name === singleSourceId,
+      );
+      let finalSourceId;
+      if (sourceDoc?.name === 'Default' && !sourceDoc?.id)
+        finalSourceId = 'default';
+      else finalSourceId = sourceDoc?.id || singleSourceId;
+      formData.append('source', String(finalSourceId));
+      formData.append('sources', JSON.stringify([]));
+    } else {
+      formData.append('source', '');
+      formData.append('sources', JSON.stringify([]));
+    }
+
     formData.append('chunks', agent.chunks);
     formData.append('retriever', agent.retriever);
     formData.append('prompt_id', agent.prompt_id);
@@ -196,7 +231,41 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
     const formData = new FormData();
     formData.append('name', agent.name);
     formData.append('description', agent.description);
-    formData.append('source', agent.source);
+
+    if (selectedSourceIds.size > 1) {
+      const sourcesArray = Array.from(selectedSourceIds)
+        .map((id) => {
+          const sourceDoc = sourceDocs?.find(
+            (source) =>
+              source.id === id || source.retriever === id || source.name === id,
+          );
+          if (sourceDoc?.name === 'Default' && !sourceDoc?.id) {
+            return 'default';
+          }
+          return sourceDoc?.id || id;
+        })
+        .filter(Boolean);
+      formData.append('sources', JSON.stringify(sourcesArray));
+      formData.append('source', '');
+    } else if (selectedSourceIds.size === 1) {
+      const singleSourceId = Array.from(selectedSourceIds)[0];
+      const sourceDoc = sourceDocs?.find(
+        (source) =>
+          source.id === singleSourceId ||
+          source.retriever === singleSourceId ||
+          source.name === singleSourceId,
+      );
+      let finalSourceId;
+      if (sourceDoc?.name === 'Default' && !sourceDoc?.id)
+        finalSourceId = 'default';
+      else finalSourceId = sourceDoc?.id || singleSourceId;
+      formData.append('source', String(finalSourceId));
+      formData.append('sources', JSON.stringify([]));
+    } else {
+      formData.append('source', '');
+      formData.append('sources', JSON.stringify([]));
+    }
+
     formData.append('chunks', agent.chunks);
     formData.append('retriever', agent.retriever);
     formData.append('prompt_id', agent.prompt_id);
@@ -293,9 +362,33 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
           throw new Error('Failed to fetch agent');
         }
         const data = await response.json();
-        if (data.source) setSelectedSourceIds(new Set([data.source]));
-        else if (data.retriever)
+
+        if (data.sources && data.sources.length > 0) {
+          const mappedSources = data.sources.map((sourceId: string) => {
+            if (sourceId === 'default') {
+              const defaultSource = sourceDocs?.find(
+                (source) => source.name === 'Default',
+              );
+              return defaultSource?.retriever || 'classic';
+            }
+            return sourceId;
+          });
+          setSelectedSourceIds(new Set(mappedSources));
+        } else if (data.source) {
+          if (data.source === 'default') {
+            const defaultSource = sourceDocs?.find(
+              (source) => source.name === 'Default',
+            );
+            setSelectedSourceIds(
+              new Set([defaultSource?.retriever || 'classic']),
+            );
+          } else {
+            setSelectedSourceIds(new Set([data.source]));
+          }
+        } else if (data.retriever) {
           setSelectedSourceIds(new Set([data.retriever]));
+        }
+
         if (data.tools) setSelectedToolIds(new Set(data.tools));
         if (data.status === 'draft') setEffectiveMode('draft');
         if (data.json_schema) {
@@ -311,25 +404,57 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
   }, [agentId, mode, token]);
 
   useEffect(() => {
-    const selectedSource = Array.from(selectedSourceIds).map((id) =>
-      sourceDocs?.find(
-        (source) =>
-          source.id === id || source.retriever === id || source.name === id,
-      ),
-    );
-    if (selectedSource[0]?.model === embeddingsName) {
-      if (selectedSource[0] && 'id' in selectedSource[0]) {
+    const selectedSources = Array.from(selectedSourceIds)
+      .map((id) =>
+        sourceDocs?.find(
+          (source) =>
+            source.id === id || source.retriever === id || source.name === id,
+        ),
+      )
+      .filter(Boolean);
+
+    if (selectedSources.length > 0) {
+      // Handle multiple sources
+      if (selectedSources.length > 1) {
+        // Multiple sources selected - store in sources array
+        const sourceIds = selectedSources
+          .map((source) => source?.id)
+          .filter((id): id is string => Boolean(id));
         setAgent((prev) => ({
           ...prev,
-          source: selectedSource[0]?.id || 'default',
+          sources: sourceIds,
+          source: '', // Clear single source for multiple sources
           retriever: '',
         }));
-      } else
-        setAgent((prev) => ({
-          ...prev,
-          source: '',
-          retriever: selectedSource[0]?.retriever || 'classic',
-        }));
+      } else {
+        // Single source selected - maintain backward compatibility
+        const selectedSource = selectedSources[0];
+        if (selectedSource?.model === embeddingsName) {
+          if (selectedSource && 'id' in selectedSource) {
+            setAgent((prev) => ({
+              ...prev,
+              source: selectedSource?.id || 'default',
+              sources: [], // Clear sources array for single source
+              retriever: '',
+            }));
+          } else {
+            setAgent((prev) => ({
+              ...prev,
+              source: '',
+              sources: [], // Clear sources array
+              retriever: selectedSource?.retriever || 'classic',
+            }));
+          }
+        }
+      }
+    } else {
+      // No sources selected
+      setAgent((prev) => ({
+        ...prev,
+        source: '',
+        sources: [],
+        retriever: '',
+      }));
     }
   }, [selectedSourceIds]);
 
@@ -510,7 +635,7 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
                         )
                         .filter(Boolean)
                         .join(', ')
-                    : 'Select source'}
+                    : 'Select sources'}
                 </button>
                 <MultiSelectPopup
                   isOpen={isSourcePopupOpen}
@@ -526,12 +651,10 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
                   selectedIds={selectedSourceIds}
                   onSelectionChange={(newSelectedIds: Set<string | number>) => {
                     setSelectedSourceIds(newSelectedIds);
-                    setIsSourcePopupOpen(false);
                   }}
-                  title="Select Source"
+                  title="Select Sources"
                   searchPlaceholder="Search sources..."
-                  noOptionsMessage="No source available"
-                  singleSelect={true}
+                  noOptionsMessage="No sources available"
                 />
               </div>
               <div className="mt-3">
diff --git a/frontend/src/agents/types/index.ts b/frontend/src/agents/types/index.ts
index e841cb0a..442097a1 100644
--- a/frontend/src/agents/types/index.ts
+++ b/frontend/src/agents/types/index.ts
@@ -10,6 +10,7 @@ export type Agent = {
   description: string;
   image: string;
   source: string;
+  sources?: string[];
   chunks: string;
   retriever: string;
   prompt_id: string;

From adcdce8d764ca4de31009af52487fe40581156fd Mon Sep 17 00:00:00 2001
From: Siddhant Rai <siddhant.rai.5686@gmail.com>
Date: Wed, 10 Sep 2025 22:10:11 +0530
Subject: [PATCH 5/6] fix: handle invalid chunks value in StreamProcessor and
 ClassicRAG

---
 .../api/answer/services/stream_processor.py      | 16 ++++++++++++++--
 application/retriever/classic_rag.py             | 11 ++++++++++-
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/application/api/answer/services/stream_processor.py b/application/api/answer/services/stream_processor.py
index a04020cb..f6e639ef 100644
--- a/application/api/answer/services/stream_processor.py
+++ b/application/api/answer/services/stream_processor.py
@@ -266,7 +266,13 @@ class StreamProcessor:
             if data_key.get("retriever"):
                 self.retriever_config["retriever_name"] = data_key["retriever"]
             if data_key.get("chunks") is not None:
-                self.retriever_config["chunks"] = data_key["chunks"]
+                try:
+                    self.retriever_config["chunks"] = int(data_key["chunks"])
+                except (ValueError, TypeError):
+                    logger.warning(
+                        f"Invalid chunks value: {data_key['chunks']}, using default value 2"
+                    )
+                    self.retriever_config["chunks"] = 2
         elif self.agent_key:
             data_key = self._get_data_from_api_key(self.agent_key)
             self.agent_config.update(
@@ -287,7 +293,13 @@ class StreamProcessor:
             if data_key.get("retriever"):
                 self.retriever_config["retriever_name"] = data_key["retriever"]
             if data_key.get("chunks") is not None:
-                self.retriever_config["chunks"] = data_key["chunks"]
+                try:
+                    self.retriever_config["chunks"] = int(data_key["chunks"])
+                except (ValueError, TypeError):
+                    logger.warning(
+                        f"Invalid chunks value: {data_key['chunks']}, using default value 2"
+                    )
+                    self.retriever_config["chunks"] = 2
         else:
             self.agent_config.update(
                 {
diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py
index ce1b937b..2ce863c2 100644
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -25,7 +25,16 @@ class ClassicRAG(BaseRetriever):
         self.original_question = source.get("question", "")
         self.chat_history = chat_history if chat_history is not None else []
         self.prompt = prompt
-        self.chunks = chunks
+        if isinstance(chunks, str):
+            try:
+                self.chunks = int(chunks)
+            except ValueError:
+                logging.warning(
+                    f"Invalid chunks value '{chunks}', using default value 2"
+                )
+                self.chunks = 2
+        else:
+            self.chunks = chunks
         self.gpt_model = gpt_model
         self.token_limit = (
             token_limit

From 188d118fc0c689870b0c89ff3d32d5721f24c757 Mon Sep 17 00:00:00 2001
From: Siddhant Rai <siddhant.rai.5686@gmail.com>
Date: Wed, 10 Sep 2025 22:14:31 +0530
Subject: [PATCH 6/6] refactor: remove unused logging import from routes.py

---
 application/api/connector/routes.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/application/api/connector/routes.py b/application/api/connector/routes.py
index f203a703..1647aa78 100644
--- a/application/api/connector/routes.py
+++ b/application/api/connector/routes.py
@@ -1,6 +1,5 @@
 import datetime
 import json
-import logging
 
 
 from bson.objectid import ObjectId