diff --git a/application/agents/base.py b/application/agents/base.py index 77729fe6..134de1c3 100644 --- a/application/agents/base.py +++ b/application/agents/base.py @@ -264,7 +264,15 @@ class BaseAgent(ABC): query: str, retrieved_data: List[Dict], ) -> List[Dict]: - docs_together = "\n".join([doc["text"] for doc in retrieved_data]) + docs_with_filenames = [] + for doc in retrieved_data: + filename = doc.get("filename") or doc.get("title") or doc.get("source") + if filename: + chunk_header = str(filename) + docs_with_filenames.append(f"{chunk_header}\n{doc['text']}") + else: + docs_with_filenames.append(doc["text"]) + docs_together = "\n\n".join(docs_with_filenames) p_chat_combine = system_prompt.replace("{summaries}", docs_together) messages_combine = [{"role": "system", "content": p_chat_combine}] diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py index 2ce863c2..f90a751c 100644 --- a/application/retriever/classic_rag.py +++ b/application/retriever/classic_rag.py @@ -1,4 +1,5 @@ import logging +import os from application.core.settings import settings from application.llm.llm_creator import LLMCreator @@ -141,15 +142,28 @@ class ClassicRAG(BaseRetriever): title = metadata.get( "title", metadata.get("post_title", page_content) ) - if isinstance(title, str): - title = title.split("/")[-1] + if not isinstance(title, str): + title = str(title) + title = title.split("/")[-1] + + filename = ( + metadata.get("filename") + or metadata.get("file_name") + or metadata.get("source") + ) + if isinstance(filename, str): + filename = os.path.basename(filename) or filename else: - title = str(title).split("/")[-1] + filename = title + if not filename: + filename = title + source_path = metadata.get("source") or vectorstore_id all_docs.append( { "title": title, "text": page_content, - "source": metadata.get("source") or vectorstore_id, + "source": source_path, + "filename": filename, } ) except Exception as e: diff --git a/frontend/src/conversation/ConversationMessages.tsx b/frontend/src/conversation/ConversationMessages.tsx index 4bc2bb08..717023a4 100644 --- a/frontend/src/conversation/ConversationMessages.tsx +++ b/frontend/src/conversation/ConversationMessages.tsx @@ -210,7 +210,7 @@ export default function ConversationMessages({ )}