Merge branch 'main' into feat/enhance-agents

This commit is contained in:
Siddhant Rai
2025-05-12 13:45:18 +05:30
49 changed files with 2518 additions and 642 deletions

View File

@@ -57,4 +57,8 @@ class ClassicAgent(BaseAgent):
)
yield {"sources": retrieved_data}
# clean tool_call_data only send first 50 characters of tool_call['result']
for tool_call in self.tool_calls:
if len(str(tool_call["result"])) > 50:
tool_call["result"] = str(tool_call["result"])[:50] + "..."
yield {"tool_calls": self.tool_calls.copy()}

View File

@@ -87,6 +87,10 @@ class ReActAgent(BaseAgent):
)
yield {"sources": retrieved_data}
# clean tool_call_data only send first 50 characters of tool_call['result']
for tool_call in self.tool_calls:
if len(str(tool_call["result"])) > 50:
tool_call["result"] = str(tool_call["result"])[:50] + "..."
yield {"tool_calls": self.tool_calls.copy()}
final_answer = self._create_final_answer(query, self.observations, log_context)

View File

@@ -28,6 +28,9 @@ from application.extensions import api
from application.tts.google_tts import GoogleTTS
from application.utils import check_required_fields, validate_function_name
from application.vectorstore.vector_creator import VectorCreator
from application.storage.storage_creator import StorageCreator
storage = StorageCreator.get_storage()
mongo = MongoDB.get_client()
db = mongo[settings.MONGO_DB_NAME]
@@ -461,59 +464,93 @@ class UploadFile(Resource):
zip_filename = f"{job_name}.zip"
zip_path = f"{base_path}/{zip_filename}"
zip_temp_path = None
def create_zip_archive(temp_paths, **kwargs):
def create_zip_archive(temp_paths, job_name, storage):
import tempfile
with tempfile.TemporaryDirectory() as temp_dir:
with tempfile.NamedTemporaryFile(
delete=False, suffix=".zip"
) as temp_zip_file:
zip_output_path = temp_zip_file.name
with tempfile.TemporaryDirectory() as stage_dir:
for path in temp_paths:
file_data = storage.get_file(path)
with open(
os.path.join(temp_dir, os.path.basename(path)), "wb"
) as f:
f.write(file_data.read())
try:
file_data = storage.get_file(path)
with open(
os.path.join(stage_dir, os.path.basename(path)),
"wb",
) as f:
f.write(file_data.read())
except Exception as e:
current_app.logger.error(
f"Error processing file {path} for zipping: {e}",
exc_info=True,
)
if os.path.exists(zip_output_path):
os.remove(zip_output_path)
raise
try:
shutil.make_archive(
base_name=zip_output_path.replace(".zip", ""),
format="zip",
root_dir=stage_dir,
)
except Exception as e:
current_app.logger.error(
f"Error creating zip archive: {e}", exc_info=True
)
if os.path.exists(zip_output_path):
os.remove(zip_output_path)
raise
# Create zip archive
zip_temp = shutil.make_archive(
base_name=os.path.join(temp_dir, job_name),
format="zip",
root_dir=temp_dir,
)
return zip_output_path
return zip_temp
try:
zip_temp_path = create_zip_archive(temp_files, job_name, storage)
with open(zip_temp_path, "rb") as zip_file:
storage.save_file(zip_file, zip_path)
zip_temp_path = create_zip_archive(temp_files)
with open(zip_temp_path, "rb") as zip_file:
storage.save_file(zip_file, zip_path)
task = ingest.delay(
settings.UPLOAD_FOLDER,
[
".rst",
".md",
".pdf",
".txt",
".docx",
".csv",
".epub",
".html",
".mdx",
".json",
".xlsx",
".pptx",
".png",
".jpg",
".jpeg",
],
job_name,
zip_filename,
user,
)
finally:
# Clean up temporary files
for temp_path in temp_files:
try:
storage.delete_file(temp_path)
except Exception as e:
current_app.logger.error(
f"Error deleting temporary file {temp_path}: {e}",
exc_info=True,
)
# Clean up temp files
for temp_path in temp_files:
storage.delete_file(temp_path)
# Clean up the zip file if it was created
if zip_temp_path and os.path.exists(zip_temp_path):
os.remove(zip_temp_path)
task = ingest.delay(
settings.UPLOAD_FOLDER,
[
".rst",
".md",
".pdf",
".txt",
".docx",
".csv",
".epub",
".html",
".mdx",
".json",
".xlsx",
".pptx",
".png",
".jpg",
".jpeg",
],
job_name,
zip_filename,
user,
)
else:
else: # Keep this else block for single file upload
# For single file
file = files[0]
filename = secure_filename(file.filename)
@@ -541,7 +578,7 @@ class UploadFile(Resource):
".jpeg",
],
job_name,
filename,
filename, # Corrected variable for single-file case
user,
)
@@ -1022,14 +1059,15 @@ class GetAgent(Resource):
return make_response(jsonify({"status": "Not found"}), 404)
data = {
"id": str(agent["_id"]),
"name": agent.get("name", ""),
"name": agent["name"],
"description": agent.get("description", ""),
"source": (
str(db.dereference(agent["source"])["_id"])
if "source" in agent and isinstance(agent["source"], DBRef)
str(source_doc["_id"])
if isinstance(agent.get("source"), DBRef)
and (source_doc := db.dereference(agent.get("source")))
else ""
),
"chunks": agent.get("chunks", ""),
"chunks": agent["chunks"],
"retriever": agent.get("retriever", ""),
"prompt_id": agent.get("prompt_id", ""),
"tools": agent.get("tools", []),
@@ -1068,14 +1106,15 @@ class GetAgents(Resource):
list_agents = [
{
"id": str(agent["_id"]),
"name": agent.get("name", ""),
"name": agent["name"],
"description": agent.get("description", ""),
"source": (
str(db.dereference(agent["source"])["_id"])
if "source" in agent and isinstance(agent["source"], DBRef)
str(source_doc["_id"])
if isinstance(agent.get("source"), DBRef)
and (source_doc := db.dereference(agent.get("source")))
else ""
),
"chunks": agent.get("chunks", ""),
"chunks": agent["chunks"],
"retriever": agent.get("retriever", ""),
"prompt_id": agent.get("prompt_id", ""),
"tools": agent.get("tools", []),
@@ -3300,16 +3339,16 @@ class StoreAttachment(Resource):
try:
attachment_id = ObjectId()
original_filename = secure_filename(file.filename)
original_filename = secure_filename(os.path.basename(file.filename))
relative_path = f"{settings.UPLOAD_FOLDER}/{user}/attachments/{str(attachment_id)}/{original_filename}"
file_content = file.read()
metadata = storage.save_file(file, relative_path)
file_info = {
"filename": original_filename,
"attachment_id": str(attachment_id),
"path": relative_path,
"file_content": file_content,
"metadata": metadata,
}
task = store_attachment.delay(file_info, user)

View File

@@ -158,7 +158,7 @@ class SimpleDirectoryReader(BaseReader):
data = f.read()
# Prepare metadata for this file
if self.file_metadata is not None:
file_metadata = self.file_metadata(str(input_file))
file_metadata = self.file_metadata(input_file.name)
else:
# Provide a default empty metadata
file_metadata = {'title': '', 'store': ''}

View File

@@ -1,5 +1,4 @@
import datetime
import io
import json
import logging
import mimetypes
@@ -445,76 +444,61 @@ def attachment_worker(self, file_info, user):
filename = file_info["filename"]
attachment_id = file_info["attachment_id"]
relative_path = file_info["path"]
file_content = file_info["file_content"]
metadata = file_info.get("metadata", {})
try:
self.update_state(state="PROGRESS", meta={"current": 10})
storage_type = getattr(settings, "STORAGE_TYPE", "local")
storage = StorageCreator.create_storage(storage_type)
storage = StorageCreator.get_storage()
self.update_state(
state="PROGRESS", meta={"current": 30, "status": "Processing content"}
)
with tempfile.NamedTemporaryFile(
suffix=os.path.splitext(filename)[1]
) as temp_file:
temp_file.write(file_content)
temp_file.flush()
reader = SimpleDirectoryReader(
input_files=[temp_file.name], exclude_hidden=True, errors="ignore"
)
documents = reader.load_data()
content = storage.process_file(
relative_path,
lambda local_path, **kwargs: SimpleDirectoryReader(
input_files=[local_path], exclude_hidden=True, errors="ignore"
).load_data()[0].text
)
token_count = num_tokens_from_string(content)
if not documents:
logging.warning(f"No content extracted from file: {filename}")
raise ValueError(f"Failed to extract content from file: {filename}")
self.update_state(
state="PROGRESS", meta={"current": 80, "status": "Storing in database"}
)
content = documents[0].text
token_count = num_tokens_from_string(content)
mime_type = mimetypes.guess_type(filename)[0] or "application/octet-stream"
self.update_state(
state="PROGRESS", meta={"current": 60, "status": "Saving file"}
)
file_obj = io.BytesIO(file_content)
metadata = storage.save_file(file_obj, relative_path)
mime_type = mimetypes.guess_type(filename)[0] or "application/octet-stream"
self.update_state(
state="PROGRESS", meta={"current": 80, "status": "Storing in database"}
)
doc_id = ObjectId(attachment_id)
attachments_collection.insert_one(
{
"_id": doc_id,
"user": user,
"path": relative_path,
"content": content,
"token_count": token_count,
"mime_type": mime_type,
"date": datetime.datetime.now(),
"metadata": metadata,
}
)
logging.info(
f"Stored attachment with ID: {attachment_id}", extra={"user": user}
)
self.update_state(
state="PROGRESS", meta={"current": 100, "status": "Complete"}
)
return {
"filename": filename,
doc_id = ObjectId(attachment_id)
attachments_collection.insert_one(
{
"_id": doc_id,
"user": user,
"path": relative_path,
"content": content,
"token_count": token_count,
"attachment_id": attachment_id,
"mime_type": mime_type,
"date": datetime.datetime.now(),
"metadata": metadata,
}
)
logging.info(
f"Stored attachment with ID: {attachment_id}", extra={"user": user}
)
self.update_state(
state="PROGRESS", meta={"current": 100, "status": "Complete"}
)
return {
"filename": filename,
"path": relative_path,
"token_count": token_count,
"attachment_id": attachment_id,
"mime_type": mime_type,
"metadata": metadata,
}
except Exception as e:
logging.error(