diff --git a/application/agents/base.py b/application/agents/base.py
index 2361ef9a..14ecad49 100644
--- a/application/agents/base.py
+++ b/application/agents/base.py
@@ -23,6 +23,7 @@ class BaseAgent(ABC):
prompt: str = "",
chat_history: Optional[List[Dict]] = None,
decoded_token: Optional[Dict] = None,
+ attachments: Optional[List[Dict]]=None,
):
self.endpoint = endpoint
self.llm_name = llm_name
@@ -43,6 +44,7 @@ class BaseAgent(ABC):
decoded_token=decoded_token,
)
self.llm_handler = get_llm_handler(llm_name)
+ self.attachments = attachments or []
@log_activity()
def gen(
@@ -241,8 +243,9 @@ class BaseAgent(ABC):
tools_dict: Dict,
messages: List[Dict],
log_context: Optional[LogContext] = None,
+ attachments: Optional[List[Dict]] = None
):
- resp = self.llm_handler.handle_response(self, resp, tools_dict, messages)
+ resp = self.llm_handler.handle_response(self, resp, tools_dict, messages, attachments)
if log_context:
data = build_stack_data(self.llm_handler)
log_context.stacks.append({"component": "llm_handler", "data": data})
diff --git a/application/agents/classic_agent.py b/application/agents/classic_agent.py
index ce01e2e9..f0915747 100644
--- a/application/agents/classic_agent.py
+++ b/application/agents/classic_agent.py
@@ -4,7 +4,8 @@ from application.agents.base import BaseAgent
from application.logging import LogContext
from application.retriever.base import BaseRetriever
-
+import logging
+logger = logging.getLogger(__name__)
class ClassicAgent(BaseAgent):
def _gen_inner(
@@ -18,6 +19,8 @@ class ClassicAgent(BaseAgent):
messages = self._build_messages(self.prompt, query, retrieved_data)
resp = self._llm_gen(messages, log_context)
+
+ attachments = self.attachments
if isinstance(resp, str):
yield {"answer": resp}
@@ -30,7 +33,7 @@ class ClassicAgent(BaseAgent):
yield {"answer": resp.message.content}
return
- resp = self._llm_handler(resp, tools_dict, messages, log_context)
+ resp = self._llm_handler(resp, tools_dict, messages, log_context,attachments)
if isinstance(resp, str):
yield {"answer": resp}
diff --git a/application/agents/llm_handler.py b/application/agents/llm_handler.py
index a70357f8..21d972d9 100644
--- a/application/agents/llm_handler.py
+++ b/application/agents/llm_handler.py
@@ -1,8 +1,11 @@
import json
+import logging
from abc import ABC, abstractmethod
from application.logging import build_stack_data
+logger = logging.getLogger(__name__)
+
class LLMHandler(ABC):
def __init__(self):
@@ -10,12 +13,61 @@ class LLMHandler(ABC):
self.tool_calls = []
@abstractmethod
- def handle_response(self, agent, resp, tools_dict, messages, **kwargs):
+ def handle_response(self, agent, resp, tools_dict, messages, attachments=None, **kwargs):
pass
-
+
+ def prepare_messages_with_attachments(self, agent, messages, attachments=None):
+ """
+ Prepare messages with attachment content if available.
+
+ Args:
+ agent: The current agent instance.
+ messages (list): List of message dictionaries.
+ attachments (list): List of attachment dictionaries with content.
+
+ Returns:
+ list: Messages with attachment context added to the system prompt.
+ """
+ if not attachments:
+ return messages
+
+ logger.info(f"Preparing messages with {len(attachments)} attachments")
+
+ # Check if the LLM has its own custom attachment handling implementation
+ if hasattr(agent.llm, "prepare_messages_with_attachments") and agent.llm.__class__.__name__ != "BaseLLM":
+ logger.info(f"Using {agent.llm.__class__.__name__}'s own prepare_messages_with_attachments method")
+ return agent.llm.prepare_messages_with_attachments(messages, attachments)
+
+ # Otherwise, append attachment content to the system prompt
+ prepared_messages = messages.copy()
+
+ # Build attachment content string
+ attachment_texts = []
+ for attachment in attachments:
+ logger.info(f"Adding attachment {attachment.get('id')} to context")
+ if 'content' in attachment:
+ attachment_texts.append(f"Attached file content:\n\n{attachment['content']}")
+
+ if attachment_texts:
+ combined_attachment_text = "\n\n".join(attachment_texts)
+
+ system_found = False
+ for i in range(len(prepared_messages)):
+ if prepared_messages[i].get("role") == "system":
+ prepared_messages[i]["content"] += f"\n\n{combined_attachment_text}"
+ system_found = True
+ break
+
+ if not system_found:
+ prepared_messages.insert(0, {"role": "system", "content": combined_attachment_text})
+
+ return prepared_messages
class OpenAILLMHandler(LLMHandler):
- def handle_response(self, agent, resp, tools_dict, messages, stream: bool = True):
+ def handle_response(self, agent, resp, tools_dict, messages, attachments=None, stream: bool = True):
+
+ messages = self.prepare_messages_with_attachments(agent, messages, attachments)
+ logger.info(f"Messages with attachments: {messages}")
if not stream:
while hasattr(resp, "finish_reason") and resp.finish_reason == "tool_calls":
message = json.loads(resp.model_dump_json())["message"]
@@ -54,6 +106,7 @@ class OpenAILLMHandler(LLMHandler):
{"role": "tool", "content": [function_response_dict]}
)
+ messages = self.prepare_messages_with_attachments(agent, messages, attachments)
except Exception as e:
messages.append(
{
@@ -69,6 +122,7 @@ class OpenAILLMHandler(LLMHandler):
return resp
else:
+
while True:
tool_calls = {}
for chunk in resp:
@@ -160,7 +214,8 @@ class OpenAILLMHandler(LLMHandler):
return
elif isinstance(chunk, str) and len(chunk) == 0:
continue
-
+
+ logger.info(f"Regenerating with messages: {messages}")
resp = agent.llm.gen_stream(
model=agent.gpt_model, messages=messages, tools=agent.tools
)
@@ -168,8 +223,10 @@ class OpenAILLMHandler(LLMHandler):
class GoogleLLMHandler(LLMHandler):
- def handle_response(self, agent, resp, tools_dict, messages, stream: bool = True):
+ def handle_response(self, agent, resp, tools_dict, messages, attachments=None, stream: bool = True):
from google.genai import types
+
+ messages = self.prepare_messages_with_attachments(agent, messages, attachments)
while True:
if not stream:
diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index 8ecd218f..7f61880d 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -29,6 +29,7 @@ sources_collection = db["sources"]
prompts_collection = db["prompts"]
api_key_collection = db["api_keys"]
user_logs_collection = db["user_logs"]
+attachments_collection = db["attachments"]
answer = Blueprint("answer", __name__)
answer_ns = Namespace("answer", description="Answer related operations", path="/")
@@ -127,7 +128,7 @@ def save_conversation(
llm,
decoded_token,
index=None,
- api_key=None,
+ api_key=None
):
current_time = datetime.datetime.now(datetime.timezone.utc)
if conversation_id is not None and index is not None:
@@ -232,9 +233,15 @@ def complete_stream(
isNoneDoc=False,
index=None,
should_save_conversation=True,
+ attachments=None,
):
try:
response_full, thought, source_log_docs, tool_calls = "", "", [], []
+ attachment_ids = []
+
+ if attachments:
+ attachment_ids = [attachment["id"] for attachment in attachments]
+ logger.info(f"Processing request with {len(attachments)} attachments: {attachment_ids}")
answer = agent.gen(query=question, retriever=retriever)
@@ -287,7 +294,7 @@ def complete_stream(
llm,
decoded_token,
index,
- api_key=user_api_key,
+ api_key=user_api_key
)
else:
conversation_id = None
@@ -307,6 +314,7 @@ def complete_stream(
"response": response_full,
"sources": source_log_docs,
"retriever_params": retriever_params,
+ "attachments": attachment_ids,
"timestamp": datetime.datetime.now(datetime.timezone.utc),
}
)
@@ -355,10 +363,13 @@ class Stream(Resource):
required=False, description="Flag indicating if no document is used"
),
"index": fields.Integer(
- required=False, description="The position where query is to be updated"
+ required=False, description="Index of the query to update"
),
"save_conversation": fields.Boolean(
- required=False, default=True, description="Flag to save conversation"
+ required=False, default=True, description="Whether to save the conversation"
+ ),
+ "attachments": fields.List(
+ fields.String, required=False, description="List of attachment IDs"
),
},
)
@@ -383,6 +394,7 @@ class Stream(Resource):
)
conversation_id = data.get("conversation_id")
prompt_id = data.get("prompt_id", "default")
+ attachment_ids = data.get("attachments", [])
index = data.get("index", None)
chunks = int(data.get("chunks", 2))
@@ -411,9 +423,11 @@ class Stream(Resource):
if not decoded_token:
return make_response({"error": "Unauthorized"}, 401)
+
+ attachments = get_attachments_content(attachment_ids, decoded_token.get("sub"))
logger.info(
- f"/stream - request_data: {data}, source: {source}",
+ f"/stream - request_data: {data}, source: {source}, attachments: {len(attachments)}",
extra={"data": json.dumps({"request_data": data, "source": source})},
)
@@ -431,6 +445,7 @@ class Stream(Resource):
prompt=prompt,
chat_history=history,
decoded_token=decoded_token,
+ attachments=attachments,
)
retriever = RetrieverCreator.create_retriever(
@@ -791,3 +806,38 @@ class Search(Resource):
return bad_request(500, str(e))
return make_response(docs, 200)
+
+
+def get_attachments_content(attachment_ids, user):
+ """
+ Retrieve content from attachment documents based on their IDs.
+
+ Args:
+ attachment_ids (list): List of attachment document IDs
+ user (str): User identifier to verify ownership
+
+ Returns:
+ list: List of dictionaries containing attachment content and metadata
+ """
+ if not attachment_ids:
+ return []
+
+ attachments = []
+ for attachment_id in attachment_ids:
+ try:
+ attachment_doc = attachments_collection.find_one({
+ "_id": ObjectId(attachment_id),
+ "user": user
+ })
+
+ if attachment_doc:
+ attachments.append({
+ "id": str(attachment_doc["_id"]),
+ "content": attachment_doc["content"],
+ "token_count": attachment_doc.get("token_count", 0),
+ "path": attachment_doc.get("path", "")
+ })
+ except Exception as e:
+ logger.error(f"Error retrieving attachment {attachment_id}: {e}")
+
+ return attachments
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index f3599c7e..8f374aa7 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -14,7 +14,7 @@ from werkzeug.utils import secure_filename
from application.agents.tools.tool_manager import ToolManager
-from application.api.user.tasks import ingest, ingest_remote
+from application.api.user.tasks import ingest, ingest_remote, store_attachment
from application.core.mongo_db import MongoDB
from application.core.settings import settings
from application.extensions import api
@@ -2476,3 +2476,71 @@ class UpdateChunk(Resource):
except Exception as e:
current_app.logger.error(f"Error updating chunk: {e}")
return make_response(jsonify({"success": False}), 500)
+
+
+@user_ns.route("/api/store_attachment")
+class StoreAttachment(Resource):
+ @api.expect(
+ api.model(
+ "AttachmentModel",
+ {
+ "file": fields.Raw(required=True, description="File to upload"),
+ },
+ )
+ )
+ @api.doc(description="Stores a single attachment without vectorization or training")
+ def post(self):
+ decoded_token = request.decoded_token
+ if not decoded_token:
+ return make_response(jsonify({"success": False}), 401)
+
+ # Get single file instead of list
+ file = request.files.get("file")
+
+ if not file or file.filename == "":
+ return make_response(
+ jsonify({"status": "error", "message": "Missing file"}),
+ 400,
+ )
+
+ user = secure_filename(decoded_token.get("sub"))
+
+ try:
+ original_filename = secure_filename(file.filename)
+ folder_name = original_filename
+ save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, "attachments",folder_name)
+ os.makedirs(save_dir, exist_ok=True)
+ # Create directory structure: user/attachments/filename/
+ file_path = os.path.join(save_dir, original_filename)
+
+ # Handle filename conflicts
+ if os.path.exists(file_path):
+ name_parts = os.path.splitext(original_filename)
+ timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+ new_filename = f"{name_parts[0]}_{timestamp}{name_parts[1]}"
+ file_path = os.path.join(save_dir, new_filename)
+ original_filename = new_filename
+
+ file.save(file_path)
+ file_info = {"folder": folder_name, "filename": original_filename}
+ current_app.logger.info(f"Saved file: {file_path}")
+
+ # Start async task to process single file
+ task = store_attachment.delay(
+ save_dir,
+ file_info,
+ user
+ )
+
+ return make_response(
+ jsonify({
+ "success": True,
+ "task_id": task.id,
+ "message": "File uploaded successfully. Processing started."
+ }),
+ 200
+ )
+
+ except Exception as err:
+ current_app.logger.error(f"Error storing attachment: {err}")
+ return make_response(jsonify({"success": False, "error": str(err)}), 400)
diff --git a/application/api/user/tasks.py b/application/api/user/tasks.py
index 73ad716e..24cff3c6 100644
--- a/application/api/user/tasks.py
+++ b/application/api/user/tasks.py
@@ -1,7 +1,7 @@
from datetime import timedelta
from application.celery_init import celery
-from application.worker import ingest_worker, remote_worker, sync_worker
+from application.worker import ingest_worker, remote_worker, sync_worker, attachment_worker
@celery.task(bind=True)
@@ -22,6 +22,12 @@ def schedule_syncs(self, frequency):
return resp
+@celery.task(bind=True)
+def store_attachment(self, directory, saved_files, user):
+ resp = attachment_worker(self, directory, saved_files, user)
+ return resp
+
+
@celery.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
sender.add_periodic_task(
diff --git a/application/worker.py b/application/worker.py
index df0bbe7d..23ff0422 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -312,3 +312,85 @@ def sync_worker(self, frequency):
key: sync_counts[key]
for key in ["total_sync_count", "sync_success", "sync_failure"]
}
+
+def attachment_worker(self, directory, file_info, user):
+ """
+ Process and store a single attachment without vectorization.
+
+ Args:
+ self: Reference to the instance of the task.
+ directory (str): Base directory for storing files.
+ file_info (dict): Dictionary with folder and filename info.
+ user (str): User identifier.
+
+ Returns:
+ dict: Information about processed attachment.
+ """
+ import datetime
+ import os
+ from application.utils import num_tokens_from_string
+
+ mongo = MongoDB.get_client()
+ db = mongo["docsgpt"]
+ attachments_collection = db["attachments"]
+
+ job_name = file_info["folder"]
+ logging.info(f"Processing attachment: {job_name}", extra={"user": user, "job": job_name})
+
+ self.update_state(state="PROGRESS", meta={"current": 10})
+
+ folder_name = file_info["folder"]
+ filename = file_info["filename"]
+
+ file_path = os.path.join(directory, filename)
+
+
+ logging.info(f"Processing file: {file_path}", extra={"user": user, "job": job_name})
+
+ if not os.path.exists(file_path):
+ logging.warning(f"File not found: {file_path}", extra={"user": user, "job": job_name})
+ return {"error": "File not found"}
+
+ try:
+ reader = SimpleDirectoryReader(
+ input_files=[file_path]
+ )
+
+ documents = reader.load_data()
+
+ self.update_state(state="PROGRESS", meta={"current": 50})
+
+ if documents:
+ content = documents[0].text
+ token_count = num_tokens_from_string(content)
+
+ file_path_relative = f"{user}/attachments/{folder_name}/{filename}"
+
+ attachment_id = attachments_collection.insert_one({
+ "user": user,
+ "path": file_path_relative,
+ "content": content,
+ "token_count": token_count,
+ "date": datetime.datetime.now(),
+ }).inserted_id
+
+ logging.info(f"Stored attachment with ID: {attachment_id}",
+ extra={"user": user, "job": job_name})
+
+ self.update_state(state="PROGRESS", meta={"current": 100})
+
+ return {
+ "attachment_id": str(attachment_id),
+ "filename": filename,
+ "folder": folder_name,
+ "path": file_path_relative,
+ "token_count": token_count
+ }
+ else:
+ logging.warning("No content was extracted from the file",
+ extra={"user": user, "job": job_name})
+ return {"error": "No content was extracted from the file"}
+ except Exception as e:
+ logging.error(f"Error processing file {filename}: {e}",
+ extra={"user": user, "job": job_name}, exc_info=True)
+ return {"error": f"Error processing file: {str(e)}"}
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 64c4c486..33c66bd1 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -63,7 +63,7 @@ export default function App() {
>
} />
} />
- } />
+ } />
} />
} />
diff --git a/frontend/src/Navigation.tsx b/frontend/src/Navigation.tsx
index 9e1889aa..0068c3d3 100644
--- a/frontend/src/Navigation.tsx
+++ b/frontend/src/Navigation.tsx
@@ -18,7 +18,6 @@ import Spinner from './assets/spinner.svg';
import Twitter from './assets/TwitterX.svg';
import UploadIcon from './assets/upload.svg';
import Help from './components/Help';
-import SourceDropdown from './components/SourceDropdown';
import {
handleAbort,
selectQueries,
@@ -31,22 +30,16 @@ import useDefaultDocument from './hooks/useDefaultDocument';
import useTokenAuth from './hooks/useTokenAuth';
import DeleteConvModal from './modals/DeleteConvModal';
import JWTModal from './modals/JWTModal';
-import { ActiveState, Doc } from './models/misc';
-import { getConversations, getDocs } from './preferences/preferenceApi';
+import { ActiveState } from './models/misc';
+import { getConversations } from './preferences/preferenceApi';
import {
selectApiKeyStatus,
selectConversationId,
selectConversations,
selectModalStateDeleteConv,
- selectPaginatedDocuments,
- selectSelectedDocs,
- selectSourceDocs,
selectToken,
setConversations,
setModalStateDeleteConv,
- setPaginatedDocuments,
- setSelectedDocs,
- setSourceDocs,
} from './preferences/preferenceSlice';
import Upload from './upload/Upload';
@@ -59,17 +52,13 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
const dispatch = useDispatch();
const token = useSelector(selectToken);
const queries = useSelector(selectQueries);
- const docs = useSelector(selectSourceDocs);
- const selectedDocs = useSelector(selectSelectedDocs);
const conversations = useSelector(selectConversations);
const modalStateDeleteConv = useSelector(selectModalStateDeleteConv);
const conversationId = useSelector(selectConversationId);
- const paginatedDocuments = useSelector(selectPaginatedDocuments);
const [isDeletingConversation, setIsDeletingConversation] = useState(false);
const { isMobile } = useMediaQuery();
const [isDarkTheme] = useDarkTheme();
- const [isDocsListOpen, setIsDocsListOpen] = useState(false);
const { t } = useTranslation();
const isApiKeySet = useSelector(selectApiKeyStatus);
@@ -124,32 +113,6 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
.catch((error) => console.error(error));
};
- const handleDeleteClick = (doc: Doc) => {
- userService
- .deletePath(doc.id ?? '', token)
- .then(() => {
- return getDocs(token);
- })
- .then((updatedDocs) => {
- dispatch(setSourceDocs(updatedDocs));
- const updatedPaginatedDocs = paginatedDocuments?.filter(
- (document) => document.id !== doc.id,
- );
- dispatch(
- setPaginatedDocuments(updatedPaginatedDocs || paginatedDocuments),
- );
- dispatch(
- setSelectedDocs(
- Array.isArray(updatedDocs) &&
- updatedDocs?.find(
- (doc: Doc) => doc.name.toLowerCase() === 'default',
- ),
- ),
- );
- })
- .catch((error) => console.error(error));
- };
-
const handleConversationClick = (index: string) => {
conversationService
.getConversation(index, token)
@@ -174,11 +137,13 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
}),
);
};
+
const newChat = () => {
if (queries && queries?.length > 0) {
resetConversation();
}
};
+
async function updateConversationName(updatedConversation: {
name: string;
id: string;
@@ -197,10 +162,6 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
});
}
- /*
- Needed to fix bug where if mobile nav was closed and then window was resized to desktop, nav would still be closed but the button to open would be gone, as per #1 on issue #146
- */
-
useEffect(() => {
setNavOpen(!isMobile);
}, [isMobile]);
@@ -209,7 +170,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
return (
<>
{!navOpen && (
-
+