mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 16:43:16 +00:00
Merge branch 'main' into feat/enhance-agents
This commit is contained in:
@@ -57,4 +57,8 @@ class ClassicAgent(BaseAgent):
|
||||
)
|
||||
|
||||
yield {"sources": retrieved_data}
|
||||
# clean tool_call_data only send first 50 characters of tool_call['result']
|
||||
for tool_call in self.tool_calls:
|
||||
if len(str(tool_call["result"])) > 50:
|
||||
tool_call["result"] = str(tool_call["result"])[:50] + "..."
|
||||
yield {"tool_calls": self.tool_calls.copy()}
|
||||
|
||||
@@ -87,6 +87,10 @@ class ReActAgent(BaseAgent):
|
||||
)
|
||||
|
||||
yield {"sources": retrieved_data}
|
||||
# clean tool_call_data only send first 50 characters of tool_call['result']
|
||||
for tool_call in self.tool_calls:
|
||||
if len(str(tool_call["result"])) > 50:
|
||||
tool_call["result"] = str(tool_call["result"])[:50] + "..."
|
||||
yield {"tool_calls": self.tool_calls.copy()}
|
||||
|
||||
final_answer = self._create_final_answer(query, self.observations, log_context)
|
||||
|
||||
@@ -28,6 +28,9 @@ from application.extensions import api
|
||||
from application.tts.google_tts import GoogleTTS
|
||||
from application.utils import check_required_fields, validate_function_name
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
from application.storage.storage_creator import StorageCreator
|
||||
|
||||
storage = StorageCreator.get_storage()
|
||||
|
||||
mongo = MongoDB.get_client()
|
||||
db = mongo[settings.MONGO_DB_NAME]
|
||||
@@ -461,59 +464,93 @@ class UploadFile(Resource):
|
||||
|
||||
zip_filename = f"{job_name}.zip"
|
||||
zip_path = f"{base_path}/{zip_filename}"
|
||||
zip_temp_path = None
|
||||
|
||||
def create_zip_archive(temp_paths, **kwargs):
|
||||
def create_zip_archive(temp_paths, job_name, storage):
|
||||
import tempfile
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
with tempfile.NamedTemporaryFile(
|
||||
delete=False, suffix=".zip"
|
||||
) as temp_zip_file:
|
||||
zip_output_path = temp_zip_file.name
|
||||
|
||||
with tempfile.TemporaryDirectory() as stage_dir:
|
||||
for path in temp_paths:
|
||||
file_data = storage.get_file(path)
|
||||
with open(
|
||||
os.path.join(temp_dir, os.path.basename(path)), "wb"
|
||||
) as f:
|
||||
f.write(file_data.read())
|
||||
try:
|
||||
file_data = storage.get_file(path)
|
||||
with open(
|
||||
os.path.join(stage_dir, os.path.basename(path)),
|
||||
"wb",
|
||||
) as f:
|
||||
f.write(file_data.read())
|
||||
except Exception as e:
|
||||
current_app.logger.error(
|
||||
f"Error processing file {path} for zipping: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
if os.path.exists(zip_output_path):
|
||||
os.remove(zip_output_path)
|
||||
raise
|
||||
try:
|
||||
shutil.make_archive(
|
||||
base_name=zip_output_path.replace(".zip", ""),
|
||||
format="zip",
|
||||
root_dir=stage_dir,
|
||||
)
|
||||
except Exception as e:
|
||||
current_app.logger.error(
|
||||
f"Error creating zip archive: {e}", exc_info=True
|
||||
)
|
||||
if os.path.exists(zip_output_path):
|
||||
os.remove(zip_output_path)
|
||||
raise
|
||||
|
||||
# Create zip archive
|
||||
zip_temp = shutil.make_archive(
|
||||
base_name=os.path.join(temp_dir, job_name),
|
||||
format="zip",
|
||||
root_dir=temp_dir,
|
||||
)
|
||||
return zip_output_path
|
||||
|
||||
return zip_temp
|
||||
try:
|
||||
zip_temp_path = create_zip_archive(temp_files, job_name, storage)
|
||||
with open(zip_temp_path, "rb") as zip_file:
|
||||
storage.save_file(zip_file, zip_path)
|
||||
|
||||
zip_temp_path = create_zip_archive(temp_files)
|
||||
with open(zip_temp_path, "rb") as zip_file:
|
||||
storage.save_file(zip_file, zip_path)
|
||||
task = ingest.delay(
|
||||
settings.UPLOAD_FOLDER,
|
||||
[
|
||||
".rst",
|
||||
".md",
|
||||
".pdf",
|
||||
".txt",
|
||||
".docx",
|
||||
".csv",
|
||||
".epub",
|
||||
".html",
|
||||
".mdx",
|
||||
".json",
|
||||
".xlsx",
|
||||
".pptx",
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
],
|
||||
job_name,
|
||||
zip_filename,
|
||||
user,
|
||||
)
|
||||
finally:
|
||||
# Clean up temporary files
|
||||
for temp_path in temp_files:
|
||||
try:
|
||||
storage.delete_file(temp_path)
|
||||
except Exception as e:
|
||||
current_app.logger.error(
|
||||
f"Error deleting temporary file {temp_path}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Clean up temp files
|
||||
for temp_path in temp_files:
|
||||
storage.delete_file(temp_path)
|
||||
# Clean up the zip file if it was created
|
||||
if zip_temp_path and os.path.exists(zip_temp_path):
|
||||
os.remove(zip_temp_path)
|
||||
|
||||
task = ingest.delay(
|
||||
settings.UPLOAD_FOLDER,
|
||||
[
|
||||
".rst",
|
||||
".md",
|
||||
".pdf",
|
||||
".txt",
|
||||
".docx",
|
||||
".csv",
|
||||
".epub",
|
||||
".html",
|
||||
".mdx",
|
||||
".json",
|
||||
".xlsx",
|
||||
".pptx",
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
],
|
||||
job_name,
|
||||
zip_filename,
|
||||
user,
|
||||
)
|
||||
else:
|
||||
else: # Keep this else block for single file upload
|
||||
# For single file
|
||||
file = files[0]
|
||||
filename = secure_filename(file.filename)
|
||||
@@ -541,7 +578,7 @@ class UploadFile(Resource):
|
||||
".jpeg",
|
||||
],
|
||||
job_name,
|
||||
filename,
|
||||
filename, # Corrected variable for single-file case
|
||||
user,
|
||||
)
|
||||
|
||||
@@ -1022,14 +1059,15 @@ class GetAgent(Resource):
|
||||
return make_response(jsonify({"status": "Not found"}), 404)
|
||||
data = {
|
||||
"id": str(agent["_id"]),
|
||||
"name": agent.get("name", ""),
|
||||
"name": agent["name"],
|
||||
"description": agent.get("description", ""),
|
||||
"source": (
|
||||
str(db.dereference(agent["source"])["_id"])
|
||||
if "source" in agent and isinstance(agent["source"], DBRef)
|
||||
str(source_doc["_id"])
|
||||
if isinstance(agent.get("source"), DBRef)
|
||||
and (source_doc := db.dereference(agent.get("source")))
|
||||
else ""
|
||||
),
|
||||
"chunks": agent.get("chunks", ""),
|
||||
"chunks": agent["chunks"],
|
||||
"retriever": agent.get("retriever", ""),
|
||||
"prompt_id": agent.get("prompt_id", ""),
|
||||
"tools": agent.get("tools", []),
|
||||
@@ -1068,14 +1106,15 @@ class GetAgents(Resource):
|
||||
list_agents = [
|
||||
{
|
||||
"id": str(agent["_id"]),
|
||||
"name": agent.get("name", ""),
|
||||
"name": agent["name"],
|
||||
"description": agent.get("description", ""),
|
||||
"source": (
|
||||
str(db.dereference(agent["source"])["_id"])
|
||||
if "source" in agent and isinstance(agent["source"], DBRef)
|
||||
str(source_doc["_id"])
|
||||
if isinstance(agent.get("source"), DBRef)
|
||||
and (source_doc := db.dereference(agent.get("source")))
|
||||
else ""
|
||||
),
|
||||
"chunks": agent.get("chunks", ""),
|
||||
"chunks": agent["chunks"],
|
||||
"retriever": agent.get("retriever", ""),
|
||||
"prompt_id": agent.get("prompt_id", ""),
|
||||
"tools": agent.get("tools", []),
|
||||
@@ -3300,16 +3339,16 @@ class StoreAttachment(Resource):
|
||||
|
||||
try:
|
||||
attachment_id = ObjectId()
|
||||
original_filename = secure_filename(file.filename)
|
||||
original_filename = secure_filename(os.path.basename(file.filename))
|
||||
relative_path = f"{settings.UPLOAD_FOLDER}/{user}/attachments/{str(attachment_id)}/{original_filename}"
|
||||
|
||||
file_content = file.read()
|
||||
metadata = storage.save_file(file, relative_path)
|
||||
|
||||
file_info = {
|
||||
"filename": original_filename,
|
||||
"attachment_id": str(attachment_id),
|
||||
"path": relative_path,
|
||||
"file_content": file_content,
|
||||
"metadata": metadata,
|
||||
}
|
||||
|
||||
task = store_attachment.delay(file_info, user)
|
||||
|
||||
@@ -158,7 +158,7 @@ class SimpleDirectoryReader(BaseReader):
|
||||
data = f.read()
|
||||
# Prepare metadata for this file
|
||||
if self.file_metadata is not None:
|
||||
file_metadata = self.file_metadata(str(input_file))
|
||||
file_metadata = self.file_metadata(input_file.name)
|
||||
else:
|
||||
# Provide a default empty metadata
|
||||
file_metadata = {'title': '', 'store': ''}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import datetime
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
@@ -445,76 +444,61 @@ def attachment_worker(self, file_info, user):
|
||||
filename = file_info["filename"]
|
||||
attachment_id = file_info["attachment_id"]
|
||||
relative_path = file_info["path"]
|
||||
file_content = file_info["file_content"]
|
||||
metadata = file_info.get("metadata", {})
|
||||
|
||||
try:
|
||||
self.update_state(state="PROGRESS", meta={"current": 10})
|
||||
storage_type = getattr(settings, "STORAGE_TYPE", "local")
|
||||
storage = StorageCreator.create_storage(storage_type)
|
||||
storage = StorageCreator.get_storage()
|
||||
|
||||
self.update_state(
|
||||
state="PROGRESS", meta={"current": 30, "status": "Processing content"}
|
||||
)
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
suffix=os.path.splitext(filename)[1]
|
||||
) as temp_file:
|
||||
temp_file.write(file_content)
|
||||
temp_file.flush()
|
||||
reader = SimpleDirectoryReader(
|
||||
input_files=[temp_file.name], exclude_hidden=True, errors="ignore"
|
||||
)
|
||||
documents = reader.load_data()
|
||||
content = storage.process_file(
|
||||
relative_path,
|
||||
lambda local_path, **kwargs: SimpleDirectoryReader(
|
||||
input_files=[local_path], exclude_hidden=True, errors="ignore"
|
||||
).load_data()[0].text
|
||||
)
|
||||
|
||||
token_count = num_tokens_from_string(content)
|
||||
|
||||
if not documents:
|
||||
logging.warning(f"No content extracted from file: {filename}")
|
||||
raise ValueError(f"Failed to extract content from file: {filename}")
|
||||
self.update_state(
|
||||
state="PROGRESS", meta={"current": 80, "status": "Storing in database"}
|
||||
)
|
||||
|
||||
content = documents[0].text
|
||||
token_count = num_tokens_from_string(content)
|
||||
mime_type = mimetypes.guess_type(filename)[0] or "application/octet-stream"
|
||||
|
||||
self.update_state(
|
||||
state="PROGRESS", meta={"current": 60, "status": "Saving file"}
|
||||
)
|
||||
file_obj = io.BytesIO(file_content)
|
||||
|
||||
metadata = storage.save_file(file_obj, relative_path)
|
||||
|
||||
mime_type = mimetypes.guess_type(filename)[0] or "application/octet-stream"
|
||||
|
||||
self.update_state(
|
||||
state="PROGRESS", meta={"current": 80, "status": "Storing in database"}
|
||||
)
|
||||
|
||||
doc_id = ObjectId(attachment_id)
|
||||
attachments_collection.insert_one(
|
||||
{
|
||||
"_id": doc_id,
|
||||
"user": user,
|
||||
"path": relative_path,
|
||||
"content": content,
|
||||
"token_count": token_count,
|
||||
"mime_type": mime_type,
|
||||
"date": datetime.datetime.now(),
|
||||
"metadata": metadata,
|
||||
}
|
||||
)
|
||||
|
||||
logging.info(
|
||||
f"Stored attachment with ID: {attachment_id}", extra={"user": user}
|
||||
)
|
||||
|
||||
self.update_state(
|
||||
state="PROGRESS", meta={"current": 100, "status": "Complete"}
|
||||
)
|
||||
|
||||
return {
|
||||
"filename": filename,
|
||||
doc_id = ObjectId(attachment_id)
|
||||
attachments_collection.insert_one(
|
||||
{
|
||||
"_id": doc_id,
|
||||
"user": user,
|
||||
"path": relative_path,
|
||||
"content": content,
|
||||
"token_count": token_count,
|
||||
"attachment_id": attachment_id,
|
||||
"mime_type": mime_type,
|
||||
"date": datetime.datetime.now(),
|
||||
"metadata": metadata,
|
||||
}
|
||||
)
|
||||
|
||||
logging.info(
|
||||
f"Stored attachment with ID: {attachment_id}", extra={"user": user}
|
||||
)
|
||||
|
||||
self.update_state(
|
||||
state="PROGRESS", meta={"current": 100, "status": "Complete"}
|
||||
)
|
||||
|
||||
return {
|
||||
"filename": filename,
|
||||
"path": relative_path,
|
||||
"token_count": token_count,
|
||||
"attachment_id": attachment_id,
|
||||
"mime_type": mime_type,
|
||||
"metadata": metadata,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logging.error(
|
||||
|
||||
Reference in New Issue
Block a user