diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 4efc4cb7..13cab96c 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -19,7 +19,7 @@ from application.core.settings import settings from application.extensions import api from application.tools.tool_manager import ToolManager from application.tts.google_tts import GoogleTTS -from application.utils import check_required_fields +from application.utils import check_required_fields, validate_function_name from application.vectorstore.vector_creator import VectorCreator mongo = MongoDB.get_client() @@ -1511,7 +1511,7 @@ class GetFeedbackAnalytics(Resource): except Exception as err: current_app.logger.error(f"Error getting API key: {err}") return make_response(jsonify({"success": False}), 400) - + end_date = datetime.datetime.now(datetime.timezone.utc) if filter_option == "last_hour": @@ -1558,11 +1558,8 @@ class GetFeedbackAnalytics(Resource): {"$match": {"queries.feedback": {"$exists": True}}}, { "$group": { - "_id": { - "time": date_field, - "feedback": "$queries.feedback" - }, - "count": {"$sum": 1} + "_id": {"time": date_field, "feedback": "$queries.feedback"}, + "count": {"$sum": 1}, } }, { @@ -1573,7 +1570,7 @@ class GetFeedbackAnalytics(Resource): "$cond": [ {"$eq": ["$_id.feedback", "LIKE"]}, "$count", - 0 + 0, ] } }, @@ -1582,13 +1579,13 @@ class GetFeedbackAnalytics(Resource): "$cond": [ {"$eq": ["$_id.feedback", "DISLIKE"]}, "$count", - 0 + 0, ] } - } + }, } }, - {"$sort": {"_id": 1}} + {"$sort": {"_id": 1}}, ] feedback_data = conversations_collection.aggregate(pipeline) @@ -1607,7 +1604,7 @@ class GetFeedbackAnalytics(Resource): for entry in feedback_data: daily_feedback[entry["_id"]] = { "positive": entry["positive"], - "negative": entry["negative"] + "negative": entry["negative"], } except Exception as err: @@ -1935,6 +1932,16 @@ class UpdateTool(Resource): if "actions" in data: update_data["actions"] = data["actions"] if "config" in data: + if "actions" in data["config"]: + for action_name in list(data["config"]["actions"].keys()): + if not validate_function_name(action_name): + return make_response( + jsonify({ + "success": False, + "message": f"Invalid function name '{action_name}'. Function names must match pattern '^[a-zA-Z0-9_-]+$'.", + "param": "tools[].function.name" + }), 400 + ) update_data["config"] = data["config"] if "status" in data: update_data["status"] = data["status"] @@ -2076,3 +2083,199 @@ class DeleteTool(Resource): return {"success": False}, 400 return {"success": True}, 200 + + +def get_vector_store(source_id): + """ + Get the Vector Store + Args: + source_id (str): source id of the document + """ + + store = VectorCreator.create_vectorstore( + settings.VECTOR_STORE, + source_id=source_id, + embeddings_key=os.getenv("EMBEDDINGS_KEY"), + ) + return store + + +@user_ns.route("/api/get_chunks") +class GetChunks(Resource): + @api.doc( + description="Retrieves all chunks associated with a document", + params={"id": "The document ID"}, + ) + def get(self): + doc_id = request.args.get("id") + page = int(request.args.get("page", 1)) + per_page = int(request.args.get("per_page", 10)) + + if not ObjectId.is_valid(doc_id): + return make_response(jsonify({"error": "Invalid doc_id"}), 400) + + try: + store = get_vector_store(doc_id) + chunks = store.get_chunks() + total_chunks = len(chunks) + start = (page - 1) * per_page + end = start + per_page + paginated_chunks = chunks[start:end] + + return make_response( + jsonify( + { + "page": page, + "per_page": per_page, + "total": total_chunks, + "chunks": paginated_chunks, + } + ), + 200, + ) + + except Exception as e: + return make_response(jsonify({"error": str(e)}), 500) + + +@user_ns.route("/api/add_chunk") +class AddChunk(Resource): + @api.expect( + api.model( + "AddChunkModel", + { + "id": fields.String(required=True, description="Document ID"), + "text": fields.String(required=True, description="Text of the chunk"), + "metadata": fields.Raw( + required=False, + description="Metadata associated with the chunk", + ), + }, + ) + ) + @api.doc( + description="Adds a new chunk to the document", + ) + def post(self): + data = request.get_json() + required_fields = ["id", "text"] + missing_fields = check_required_fields(data, required_fields) + if missing_fields: + return missing_fields + + doc_id = data.get("id") + text = data.get("text") + metadata = data.get("metadata", {}) + + if not ObjectId.is_valid(doc_id): + return make_response(jsonify({"error": "Invalid doc_id"}), 400) + + try: + store = get_vector_store(doc_id) + chunk_id = store.add_chunk(text, metadata) + return make_response( + jsonify({"message": "Chunk added successfully", "chunk_id": chunk_id}), + 201, + ) + except Exception as e: + return make_response(jsonify({"error": str(e)}), 500) + + +@user_ns.route("/api/delete_chunk") +class DeleteChunk(Resource): + @api.doc( + description="Deletes a specific chunk from the document.", + params={"id": "The document ID", "chunk_id": "The ID of the chunk to delete"}, + ) + def delete(self): + doc_id = request.args.get("id") + chunk_id = request.args.get("chunk_id") + + if not ObjectId.is_valid(doc_id): + return make_response(jsonify({"error": "Invalid doc_id"}), 400) + + try: + store = get_vector_store(doc_id) + deleted = store.delete_chunk(chunk_id) + if deleted: + return make_response( + jsonify({"message": "Chunk deleted successfully"}), 200 + ) + else: + return make_response( + jsonify({"message": "Chunk not found or could not be deleted"}), + 404, + ) + except Exception as e: + return make_response(jsonify({"error": str(e)}), 500) + + +@user_ns.route("/api/update_chunk") +class UpdateChunk(Resource): + @api.expect( + api.model( + "UpdateChunkModel", + { + "id": fields.String(required=True, description="Document ID"), + "chunk_id": fields.String( + required=True, description="Chunk ID to update" + ), + "text": fields.String( + required=False, description="New text of the chunk" + ), + "metadata": fields.Raw( + required=False, + description="Updated metadata associated with the chunk", + ), + }, + ) + ) + @api.doc( + description="Updates an existing chunk in the document.", + ) + def put(self): + data = request.get_json() + required_fields = ["id", "chunk_id"] + missing_fields = check_required_fields(data, required_fields) + if missing_fields: + return missing_fields + + doc_id = data.get("id") + chunk_id = data.get("chunk_id") + text = data.get("text") + metadata = data.get("metadata") + + if not ObjectId.is_valid(doc_id): + return make_response(jsonify({"error": "Invalid doc_id"}), 400) + + try: + store = get_vector_store(doc_id) + chunks = store.get_chunks() + existing_chunk = next((c for c in chunks if c["doc_id"] == chunk_id), None) + if not existing_chunk: + return make_response(jsonify({"error": "Chunk not found"}), 404) + + deleted = store.delete_chunk(chunk_id) + if not deleted: + return make_response( + jsonify({"error": "Failed to delete existing chunk"}), 500 + ) + + new_text = text if text is not None else existing_chunk["text"] + new_metadata = ( + metadata if metadata is not None else existing_chunk["metadata"] + ) + + new_chunk_id = store.add_chunk(new_text, new_metadata) + + return make_response( + jsonify( + { + "message": "Chunk updated successfully", + "new_chunk_id": new_chunk_id, + } + ), + 200, + ) + except Exception as e: + return make_response(jsonify({"error": str(e)}), 500) diff --git a/application/requirements.txt b/application/requirements.txt index 5732809b..fa4e9178 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -39,7 +39,7 @@ langsmith==0.2.10 lazy-object-proxy==1.10.0 lxml==5.3.0 markupsafe==3.0.2 -marshmallow==3.24.1 +marshmallow==3.26.1 mpmath==1.3.0 multidict==6.1.0 mypy-extensions==1.0.0 diff --git a/application/tools/agent.py b/application/tools/agent.py index de8ad725..d0743cd9 100644 --- a/application/tools/agent.py +++ b/application/tools/agent.py @@ -52,6 +52,10 @@ class Agent: }, } for tool_id, tool in tools_dict.items() + if ( + (tool["name"] == "api_tool" and "actions" in tool.get("config", {})) + or (tool["name"] != "api_tool" and "actions" in tool) + ) for action in ( tool["config"]["actions"].values() if tool["name"] == "api_tool" diff --git a/application/utils.py b/application/utils.py index 690eac5e..54d2086f 100644 --- a/application/utils.py +++ b/application/utils.py @@ -1,6 +1,7 @@ import tiktoken import hashlib from flask import jsonify, make_response +import re _encoding = None @@ -95,3 +96,9 @@ def limit_chat_history(history, max_token_limit=None, gpt_model="docsgpt"): break return trimmed_history + +def validate_function_name(function_name): + """Validates if a function name matches the allowed pattern.""" + if not re.match(r"^[a-zA-Z0-9_-]+$", function_name): + return False + return True \ No newline at end of file diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py index afa55db9..87ffcccb 100644 --- a/application/vectorstore/faiss.py +++ b/application/vectorstore/faiss.py @@ -1,8 +1,12 @@ -from langchain_community.vectorstores import FAISS -from application.vectorstore.base import BaseVectorStore -from application.core.settings import settings import os +from langchain_community.vectorstores import FAISS + +from application.core.settings import settings +from application.parser.schema.base import Document +from application.vectorstore.base import BaseVectorStore + + def get_vectorstore(path: str) -> str: if path: vectorstore = os.path.join("application", "indexes", path) @@ -10,21 +14,25 @@ def get_vectorstore(path: str) -> str: vectorstore = os.path.join("application") return vectorstore + class FaissStore(BaseVectorStore): def __init__(self, source_id: str, embeddings_key: str, docs_init=None): super().__init__() + self.source_id = source_id self.path = get_vectorstore(source_id) - embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key) + self.embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key) try: if docs_init: - self.docsearch = FAISS.from_documents(docs_init, embeddings) + self.docsearch = FAISS.from_documents(docs_init, self.embeddings) else: - self.docsearch = FAISS.load_local(self.path, embeddings, allow_dangerous_deserialization=True) + self.docsearch = FAISS.load_local( + self.path, self.embeddings, allow_dangerous_deserialization=True + ) except Exception: raise - self.assert_embedding_dimensions(embeddings) + self.assert_embedding_dimensions(self.embeddings) def search(self, *args, **kwargs): return self.docsearch.similarity_search(*args, **kwargs) @@ -40,11 +48,42 @@ class FaissStore(BaseVectorStore): def assert_embedding_dimensions(self, embeddings): """Check that the word embedding dimension of the docsearch index matches the dimension of the word embeddings used.""" - if settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2": - word_embedding_dimension = getattr(embeddings, 'dimension', None) + if ( + settings.EMBEDDINGS_NAME + == "huggingface_sentence-transformers/all-mpnet-base-v2" + ): + word_embedding_dimension = getattr(embeddings, "dimension", None) if word_embedding_dimension is None: - raise AttributeError("'dimension' attribute not found in embeddings instance.") - + raise AttributeError( + "'dimension' attribute not found in embeddings instance." + ) + docsearch_index_dimension = self.docsearch.index.d if word_embedding_dimension != docsearch_index_dimension: - raise ValueError(f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) != docsearch index dimension ({docsearch_index_dimension})") + raise ValueError( + f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) != docsearch index dimension ({docsearch_index_dimension})" + ) + + def get_chunks(self): + chunks = [] + if self.docsearch: + for doc_id, doc in self.docsearch.docstore._dict.items(): + chunk_data = { + "doc_id": doc_id, + "text": doc.page_content, + "metadata": doc.metadata, + } + chunks.append(chunk_data) + return chunks + + def add_chunk(self, text, metadata=None): + metadata = metadata or {} + doc = Document(text=text, extra_info=metadata).to_langchain_format() + doc_id = self.docsearch.add_documents([doc]) + self.save_local(self.path) + return doc_id + + def delete_chunk(self, chunk_id): + self.delete_index([chunk_id]) + self.save_local(self.path) + return True diff --git a/application/vectorstore/mongodb.py b/application/vectorstore/mongodb.py index c577a5d5..94b757e0 100644 --- a/application/vectorstore/mongodb.py +++ b/application/vectorstore/mongodb.py @@ -124,3 +124,53 @@ class MongoDBVectorStore(BaseVectorStore): def delete_index(self, *args, **kwargs): self._collection.delete_many({"source_id": self._source_id}) + + def get_chunks(self): + try: + chunks = [] + cursor = self._collection.find({"source_id": self._source_id}) + for doc in cursor: + doc_id = str(doc.get("_id")) + text = doc.get(self._text_key) + metadata = { + k: v + for k, v in doc.items() + if k + not in ["_id", self._text_key, self._embedding_key, "source_id"] + } + + if text: + chunks.append( + {"doc_id": doc_id, "text": text, "metadata": metadata} + ) + + return chunks + except Exception as e: + print(f"Error getting chunks: {e}") + return [] + + def add_chunk(self, text, metadata=None): + metadata = metadata or {} + embeddings = self._embedding.embed_documents([text]) + if not embeddings: + raise ValueError("Could not generate embedding for chunk") + + chunk_data = { + self._text_key: text, + self._embedding_key: embeddings[0], + "source_id": self._source_id, + **metadata, + } + result = self._collection.insert_one(chunk_data) + return str(result.inserted_id) + + def delete_chunk(self, chunk_id): + try: + from bson.objectid import ObjectId + + object_id = ObjectId(chunk_id) + result = self._collection.delete_one({"_id": object_id}) + return result.deleted_count > 0 + except Exception as e: + print(f"Error deleting chunk: {e}") + return False diff --git a/frontend/src/api/endpoints.ts b/frontend/src/api/endpoints.ts index 66c334c1..9bf659de 100644 --- a/frontend/src/api/endpoints.ts +++ b/frontend/src/api/endpoints.ts @@ -24,6 +24,12 @@ const endpoints = { UPDATE_TOOL_STATUS: '/api/update_tool_status', UPDATE_TOOL: '/api/update_tool', DELETE_TOOL: '/api/delete_tool', + GET_CHUNKS: (docId: string, page: number, per_page: number) => + `/api/get_chunks?id=${docId}&page=${page}&per_page=${per_page}`, + ADD_CHUNK: '/api/add_chunk', + DELETE_CHUNK: (docId: string, chunkId: string) => + `/api/delete_chunk?id=${docId}&chunk_id=${chunkId}`, + UPDATE_CHUNK: '/api/update_chunk', }, CONVERSATION: { ANSWER: '/api/answer', diff --git a/frontend/src/api/services/userService.ts b/frontend/src/api/services/userService.ts index 8a9b5858..e7f367f1 100644 --- a/frontend/src/api/services/userService.ts +++ b/frontend/src/api/services/userService.ts @@ -47,6 +47,18 @@ const userService = { apiClient.post(endpoints.USER.UPDATE_TOOL, data), deleteTool: (data: any): Promise => apiClient.post(endpoints.USER.DELETE_TOOL, data), + getDocumentChunks: ( + docId: string, + page: number, + perPage: number, + ): Promise => + apiClient.get(endpoints.USER.GET_CHUNKS(docId, page, perPage)), + addChunk: (data: any): Promise => + apiClient.post(endpoints.USER.ADD_CHUNK, data), + deleteChunk: (docId: string, chunkId: string): Promise => + apiClient.delete(endpoints.USER.DELETE_CHUNK(docId, chunkId)), + updateChunk: (data: any): Promise => + apiClient.put(endpoints.USER.UPDATE_CHUNK, data), }; export default userService; diff --git a/frontend/src/components/Spinner.tsx b/frontend/src/components/Spinner.tsx new file mode 100644 index 00000000..d34a5665 --- /dev/null +++ b/frontend/src/components/Spinner.tsx @@ -0,0 +1,43 @@ +import React from 'react'; + +type SpinnerProps = { + size?: 'small' | 'medium' | 'large'; + color?: string; +}; + +export default function Spinner({ + size = 'medium', + color = 'grey', +}: SpinnerProps) { + const sizeMap = { + small: '20px', + medium: '30px', + large: '40px', + }; + const spinnerSize = sizeMap[size]; + + const spinnerStyle = { + width: spinnerSize, + height: spinnerSize, + aspectRatio: '1', + borderRadius: '50%', + background: ` + radial-gradient(farthest-side, ${color} 94%, #0000) top/8px 8px no-repeat, + conic-gradient(#0000 30%, ${color}) + `, + WebkitMask: + 'radial-gradient(farthest-side, #0000 calc(100% - 8px), #000 0)', + animation: 'l13 1s infinite linear', + } as React.CSSProperties; + + const keyframesStyle = `@keyframes l13 { + 100% { transform: rotate(1turn) } + }`; + + return ( + <> + +
+ + ); +} diff --git a/frontend/src/modals/AddActionModal.tsx b/frontend/src/modals/AddActionModal.tsx index c52d89f8..6ff88ae6 100644 --- a/frontend/src/modals/AddActionModal.tsx +++ b/frontend/src/modals/AddActionModal.tsx @@ -1,21 +1,40 @@ -import React from 'react'; +import React, { useState } from 'react'; import { useTranslation } from 'react-i18next'; import Exit from '../assets/exit.svg'; import Input from '../components/Input'; import { ActiveState } from '../models/misc'; +const isValidFunctionName = (name: string): boolean => { + const pattern = /^[a-zA-Z0-9_-]+$/; + return pattern.test(name); +}; + +interface AddActionModalProps { + modalState: ActiveState; + setModalState: (state: ActiveState) => void; + handleSubmit: (actionName: string) => void; +} + export default function AddActionModal({ modalState, setModalState, handleSubmit, -}: { - modalState: ActiveState; - setModalState: (state: ActiveState) => void; - handleSubmit: (actionName: string) => void; -}) { +}: AddActionModalProps) { const { t } = useTranslation(); const [actionName, setActionName] = React.useState(''); + const [functionNameError, setFunctionNameError] = useState(false); // New error state + + const handleAddAction = () => { + if (!isValidFunctionName(actionName)) { + setFunctionNameError(true); // Set error state if invalid + return; + } + setFunctionNameError(false); // Clear error state if valid + handleSubmit(actionName); + setModalState('INACTIVE'); + }; + return (
- + Action Name setActionName(e.target.value)} borderVariant="thin" placeholder={'Enter name'} - > + /> +

+ Use only letters, numbers, underscores, and hyphens (e.g., + `get_user_data`, `send-report`). +

+ {functionNameError && ( +

+ Invalid function name format. Use only letters, numbers, + underscores, and hyphens. +

+ )}
+
+

+ Add Chunk +

+
+ + Title + + setTitle(e.target.value)} + borderVariant="thin" + placeholder={'Enter title'} + > +
+
+
+ + Body text + + +
+
+
+ + +
+
+
+ +
+ ); + } else { + return ( +
+
+
+ +
+

+ Edit Chunk +

+
+ + Title + + setTitle(e.target.value)} + borderVariant="thin" + placeholder={'Enter title'} + > +
+
+
+ + Body text + + +
+
+
+ +
+ + +
+
+
+
+
+ { + /* no-op */ + } + } + submitLabel="Delete" + /> +
+ ); + } +} diff --git a/frontend/src/settings/ToolConfig.tsx b/frontend/src/settings/ToolConfig.tsx index ec422819..0be3a776 100644 --- a/frontend/src/settings/ToolConfig.tsx +++ b/frontend/src/settings/ToolConfig.tsx @@ -134,7 +134,7 @@ export default function ToolConfig({ {Object.keys(tool?.config).length !== 0 && tool.name !== 'api_tool' && (
- + API Key / Oauth
- + URL
- + Description { + userService + .getUserTools() + .then((res) => res.json()) + .then((data) => { + const newTool = data.tools.find( + (tool: UserToolType) => tool.id === toolId, + ); + if (newTool) { + setSelectedTool(newTool); + } else { + console.error('Newly added tool not found'); + } + }) + .catch((error) => console.error('Error fetching tools:', error)); + }; + React.useEffect(() => { getUserTools(); }, []); + return (
{selectedTool ? ( @@ -85,6 +103,7 @@ export default function Tools() { id="tool-search-input" value={searchTerm} onChange={(e) => setSearchTerm(e.target.value)} + borderVariant="thin" />
)} diff --git a/frontend/src/settings/types/index.ts b/frontend/src/settings/types/index.ts index 2e7079c7..0795fbb0 100644 --- a/frontend/src/settings/types/index.ts +++ b/frontend/src/settings/types/index.ts @@ -1,3 +1,9 @@ +export type ChunkType = { + doc_id: string; + text: string; + metadata: { [key: string]: string }; +}; + export type APIKeyData = { id: string; name: string;