diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 10b141c0..09210440 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -1478,7 +1478,7 @@ class GetFeedbackAnalytics(Resource): ) except Exception as err: return make_response(jsonify({"success": False, "error": str(err)}), 400) - + end_date = datetime.datetime.now(datetime.timezone.utc) if filter_option == "last_hour": @@ -1525,11 +1525,8 @@ class GetFeedbackAnalytics(Resource): {"$match": {"queries.feedback": {"$exists": True}}}, { "$group": { - "_id": { - "time": date_field, - "feedback": "$queries.feedback" - }, - "count": {"$sum": 1} + "_id": {"time": date_field, "feedback": "$queries.feedback"}, + "count": {"$sum": 1}, } }, { @@ -1540,7 +1537,7 @@ class GetFeedbackAnalytics(Resource): "$cond": [ {"$eq": ["$_id.feedback", "LIKE"]}, "$count", - 0 + 0, ] } }, @@ -1549,13 +1546,13 @@ class GetFeedbackAnalytics(Resource): "$cond": [ {"$eq": ["$_id.feedback", "DISLIKE"]}, "$count", - 0 + 0, ] } - } + }, } }, - {"$sort": {"_id": 1}} + {"$sort": {"_id": 1}}, ] feedback_data = conversations_collection.aggregate(pipeline) @@ -1574,7 +1571,7 @@ class GetFeedbackAnalytics(Resource): for entry in feedback_data: daily_feedback[entry["_id"]] = { "positive": entry["positive"], - "negative": entry["negative"] + "negative": entry["negative"], } except Exception as err: @@ -2031,3 +2028,128 @@ class DeleteTool(Resource): return {"success": False, "error": str(err)}, 400 return {"success": True}, 200 + + +def get_vector_store(source_id): + """ + Get the Vector Store + Args: + source_id (str): source id of the document + """ + + store = VectorCreator.create_vectorstore( + settings.VECTOR_STORE, + source_id=source_id, + embeddings_key=os.getenv("EMBEDDINGS_KEY"), + ) + return store + + +@user_ns.route("/api/get_chunks") +class GetChunks(Resource): + @api.doc( + description="Retrieves all chunks associated with a document", + params={"id": "The document ID"}, + ) + def get(self): + doc_id = request.args.get("id") + page = int(request.args.get("page", 1)) + per_page = int(request.args.get("per_page", 10)) + + if not ObjectId.is_valid(doc_id): + return make_response(jsonify({"error": "Invalid doc_id"}), 400) + + try: + store = get_vector_store(doc_id) + chunks = store.get_chunks() + total_chunks = len(chunks) + start = (page - 1) * per_page + end = start + per_page + paginated_chunks = chunks[start:end] + + return make_response( + jsonify( + { + "page": page, + "per_page": per_page, + "total": total_chunks, + "chunks": paginated_chunks, + } + ), + 200, + ) + + except Exception as e: + return make_response(jsonify({"error": str(e)}), 500) + + +@user_ns.route("/api/add_chunk") +class AddChunk(Resource): + @api.expect( + api.model( + "AddChunkModel", + { + "id": fields.String(required=True, description="Document ID"), + "text": fields.String(required=True, description="Text of the chunk"), + "metadata": fields.Raw( + required=False, + description="Metadata associated with the chunk", + ), + }, + ) + ) + @api.doc( + description="Adds a new chunk to the document", + ) + def post(self): + data = request.get_json() + required_fields = ["id", "text"] + missing_fields = check_required_fields(data, required_fields) + if missing_fields: + return missing_fields + + doc_id = data.get("id") + text = data.get("text") + metadata = data.get("metadata", {}) + + if not ObjectId.is_valid(doc_id): + return make_response(jsonify({"error": "Invalid doc_id"}), 400) + + try: + store = get_vector_store(doc_id) + chunk_id = store.add_chunk(text, metadata) + return make_response( + jsonify({"message": "Chunk added successfully", "chunk_id": chunk_id}), + 201, + ) + except Exception as e: + return make_response(jsonify({"error": str(e)}), 500) + + +@user_ns.route("/api/delete_chunk") +class DeleteChunk(Resource): + @api.doc( + description="Deletes a specific chunk from the document.", + params={"id": "The document ID", "chunk_id": "The ID of the chunk to delete"}, + ) + def delete(self): + doc_id = request.args.get("id") + chunk_id = request.args.get("chunk_id") + + if not ObjectId.is_valid(doc_id): + return make_response(jsonify({"error": "Invalid doc_id"}), 400) + + try: + store = get_vector_store(doc_id) + deleted = store.delete_chunk(chunk_id) + if deleted: + return make_response( + jsonify({"message": "Chunk deleted successfully"}), 200 + ) + else: + return make_response( + jsonify({"message": "Chunk not found or could not be deleted"}), + 404, + ) + except Exception as e: + return make_response(jsonify({"error": str(e)}), 500) diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py index afa55db9..f8aaa1ea 100644 --- a/application/vectorstore/faiss.py +++ b/application/vectorstore/faiss.py @@ -1,8 +1,12 @@ -from langchain_community.vectorstores import FAISS -from application.vectorstore.base import BaseVectorStore -from application.core.settings import settings import os +from langchain_community.vectorstores import FAISS + +from application.core.settings import settings +from application.parser.schema.base import Document +from application.vectorstore.base import BaseVectorStore + + def get_vectorstore(path: str) -> str: if path: vectorstore = os.path.join("application", "indexes", path) @@ -10,9 +14,11 @@ def get_vectorstore(path: str) -> str: vectorstore = os.path.join("application") return vectorstore + class FaissStore(BaseVectorStore): def __init__(self, source_id: str, embeddings_key: str, docs_init=None): super().__init__() + self.source_id = source_id self.path = get_vectorstore(source_id) embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key) @@ -20,7 +26,9 @@ class FaissStore(BaseVectorStore): if docs_init: self.docsearch = FAISS.from_documents(docs_init, embeddings) else: - self.docsearch = FAISS.load_local(self.path, embeddings, allow_dangerous_deserialization=True) + self.docsearch = FAISS.load_local( + self.path, embeddings, allow_dangerous_deserialization=True + ) except Exception: raise @@ -40,11 +48,53 @@ class FaissStore(BaseVectorStore): def assert_embedding_dimensions(self, embeddings): """Check that the word embedding dimension of the docsearch index matches the dimension of the word embeddings used.""" - if settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2": - word_embedding_dimension = getattr(embeddings, 'dimension', None) + if ( + settings.EMBEDDINGS_NAME + == "huggingface_sentence-transformers/all-mpnet-base-v2" + ): + word_embedding_dimension = getattr(embeddings, "dimension", None) if word_embedding_dimension is None: - raise AttributeError("'dimension' attribute not found in embeddings instance.") - + raise AttributeError( + "'dimension' attribute not found in embeddings instance." + ) + docsearch_index_dimension = self.docsearch.index.d if word_embedding_dimension != docsearch_index_dimension: - raise ValueError(f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) != docsearch index dimension ({docsearch_index_dimension})") + raise ValueError( + f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) != docsearch index dimension ({docsearch_index_dimension})" + ) + + def get_chunks(self): + chunks = [] + if self.docsearch: + for doc_id, doc in self.docsearch.docstore._dict.items(): + chunk_data = { + "doc_id": doc_id, + "text": doc.page_content, + "metadata": doc.metadata, + } + chunks.append(chunk_data) + return chunks + + def add_chunk(self, text, metadata=None): + metadata = metadata or {} + doc = Document(text=text, extra_info=metadata).to_langchain_format() + doc_id = self.docsearch.add_documents([doc]) + self.save_local(self.path) + return doc_id + + def delete_chunk(self, chunk_id): + docstore = self.docsearch.docstore._dict + if chunk_id not in docstore: + return False + + del docstore[chunk_id] + + documents = list(docstore.values()) + if documents: + self.docsearch = FAISS.from_documents(documents, self.embeddings) + else: + self.docsearch = FAISS.from_texts([" "], self.embeddings) + + self.save_local() + return True diff --git a/application/vectorstore/mongodb.py b/application/vectorstore/mongodb.py index c577a5d5..e036d2a8 100644 --- a/application/vectorstore/mongodb.py +++ b/application/vectorstore/mongodb.py @@ -124,3 +124,52 @@ class MongoDBVectorStore(BaseVectorStore): def delete_index(self, *args, **kwargs): self._collection.delete_many({"source_id": self._source_id}) + + def get_chunks(self): + try: + chunks = [] + cursor = self._collection.find({"source_id": self._source_id}) + for doc in cursor: + doc_id = str(doc.get("_id")) + text = doc.get(self._text_key) + metadata = { + k: v + for k, v in doc.items() + if k + not in ["_id", self._text_key, self._embedding_key, "source_id"] + } + + if text: + chunks.append( + {"doc_id": doc_id, "text": text, "metadata": metadata} + ) + + return chunks + except Exception as e: + return [] + + def add_chunk(self, text, metadata=None): + metadata = metadata or {} + embeddings = self._embedding.embed_documents([text]) + if not embeddings: + raise ValueError("Could not generate embedding for chunk") + + chunk_data = { + self._text_key: text, + self._embedding_key: embeddings[0], + "source_id": self._source_id, + **metadata, + } + result = self._collection.insert_one(chunk_data) + return str(result.inserted_id) + + def delete_chunk(self, chunk_id): + try: + from bson.objectid import ObjectId + + object_id = ObjectId(chunk_id) + result = self._collection.delete_one({"_id": object_id}) + return result.deleted_count > 0 + except Exception as e: + print(f"Error deleting chunk: {e}") + return False diff --git a/frontend/src/api/endpoints.ts b/frontend/src/api/endpoints.ts index 66c334c1..6346a88f 100644 --- a/frontend/src/api/endpoints.ts +++ b/frontend/src/api/endpoints.ts @@ -24,6 +24,9 @@ const endpoints = { UPDATE_TOOL_STATUS: '/api/update_tool_status', UPDATE_TOOL: '/api/update_tool', DELETE_TOOL: '/api/delete_tool', + GET_CHUNKS: (docId: string, page: number, per_page: number) => + `/api/get_chunks?id=${docId}&page=${page}&per_page=${per_page}`, + ADD_CHUNK: '/api/add_chunk', }, CONVERSATION: { ANSWER: '/api/answer', diff --git a/frontend/src/api/services/userService.ts b/frontend/src/api/services/userService.ts index 8a9b5858..1a7befbd 100644 --- a/frontend/src/api/services/userService.ts +++ b/frontend/src/api/services/userService.ts @@ -47,6 +47,14 @@ const userService = { apiClient.post(endpoints.USER.UPDATE_TOOL, data), deleteTool: (data: any): Promise => apiClient.post(endpoints.USER.DELETE_TOOL, data), + getDocumentChunks: ( + docId: string, + page: number, + perPage: number, + ): Promise => + apiClient.get(endpoints.USER.GET_CHUNKS(docId, page, perPage)), + addChunk: (data: any): Promise => + apiClient.post(endpoints.USER.ADD_CHUNK, data), }; export default userService; diff --git a/frontend/src/modals/AddActionModal.tsx b/frontend/src/modals/AddActionModal.tsx index c52d89f8..f0ee797b 100644 --- a/frontend/src/modals/AddActionModal.tsx +++ b/frontend/src/modals/AddActionModal.tsx @@ -37,7 +37,7 @@ export default function AddActionModal({ New Action
- + Action Name void; + handleSubmit: (title: string, text: string) => void; +}) { + const [title, setTitle] = React.useState(''); + const [chunkText, setChunkText] = React.useState(''); + return ( +
+
+
+ +
+

+ Add Chunk +

+
+ + Title + + setTitle(e.target.value)} + borderVariant="thin" + placeholder={'Enter title'} + > +
+
+ + Body text + + +
+
+ + +
+
+
+
+
+ ); +} diff --git a/frontend/src/settings/Documents.tsx b/frontend/src/settings/Documents.tsx index 88590165..b5e874d5 100644 --- a/frontend/src/settings/Documents.tsx +++ b/frontend/src/settings/Documents.tsx @@ -1,24 +1,31 @@ -import React, { useState, useEffect, useCallback } from 'react'; -import PropTypes from 'prop-types'; -import userService from '../api/services/userService'; -import SyncIcon from '../assets/sync.svg'; -import Trash from '../assets/trash.svg'; -import caretSort from '../assets/caret-sort.svg'; -import DropdownMenu from '../components/DropdownMenu'; -import SkeletonLoader from '../components/SkeletonLoader'; -import Input from '../components/Input'; -import Upload from '../upload/Upload'; // Import the Upload component -import Pagination from '../components/DocumentPagination'; +import React, { useCallback, useEffect, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { useDispatch } from 'react-redux'; -import { Doc, DocumentsProps, ActiveState } from '../models/misc'; // Ensure ActiveState type is imported -import { getDocs, getDocsWithPagination } from '../preferences/preferenceApi'; -import { setSourceDocs } from '../preferences/preferenceSlice'; -import { setPaginatedDocuments } from '../preferences/preferenceSlice'; -import { formatDate } from '../utils/dateTimeUtils'; -import ConfirmationModal from '../modals/ConfirmationModal'; -// Utility function to format numbers +import userService from '../api/services/userService'; +import ArrowLeft from '../assets/arrow-left.svg'; +import caretSort from '../assets/caret-sort.svg'; +import NoFilesDarkIcon from '../assets/no-files-dark.svg'; +import NoFilesIcon from '../assets/no-files.svg'; +import SyncIcon from '../assets/sync.svg'; +import Trash from '../assets/trash.svg'; +import Pagination from '../components/DocumentPagination'; +import DropdownMenu from '../components/DropdownMenu'; +import Input from '../components/Input'; +import SkeletonLoader from '../components/SkeletonLoader'; +import { useDarkTheme } from '../hooks'; +import AddChunkModal from '../modals/AddChunkModal'; +import ConfirmationModal from '../modals/ConfirmationModal'; +import { ActiveState, Doc, DocumentsProps } from '../models/misc'; +import { getDocs, getDocsWithPagination } from '../preferences/preferenceApi'; +import { + setPaginatedDocuments, + setSourceDocs, +} from '../preferences/preferenceSlice'; +import Upload from '../upload/Upload'; +import { formatDate } from '../utils/dateTimeUtils'; +import { ChunkType } from './types'; + const formatTokens = (tokens: number): string => { const roundToTwoDecimals = (num: number): string => { return (Math.round((num + Number.EPSILON) * 100) / 100).toString(); @@ -35,17 +42,16 @@ const formatTokens = (tokens: number): string => { } }; -const Documents: React.FC = ({ +export default function Documents({ paginatedDocuments, handleDeleteDocument, -}) => { +}: DocumentsProps) { const { t } = useTranslation(); const dispatch = useDispatch(); - // State for search input + const [searchTerm, setSearchTerm] = useState(''); - // State for modal: active/inactive - const [modalState, setModalState] = useState('INACTIVE'); // Initialize with inactive state - const [isOnboarding, setIsOnboarding] = useState(false); // State for onboarding flag + const [modalState, setModalState] = useState('INACTIVE'); + const [isOnboarding, setIsOnboarding] = useState(false); const [loading, setLoading] = useState(false); const [sortField, setSortField] = useState<'date' | 'tokens'>('date'); const [sortOrder, setSortOrder] = useState<'asc' | 'desc'>('desc'); @@ -60,6 +66,7 @@ const Documents: React.FC = ({ { label: t('settings.documents.syncFrequency.weekly'), value: 'weekly' }, { label: t('settings.documents.syncFrequency.monthly'), value: 'monthly' }, ]; + const [showDocumentChunks, setShowDocumentChunks] = useState(); const refreshDocs = useCallback( ( @@ -159,7 +166,14 @@ const Documents: React.FC = ({ refreshDocs(undefined, 1, rowsPerPage); }, [searchTerm]); - return ( + return showDocumentChunks ? ( + { + setShowDocumentChunks(undefined); + }} + /> + ) : (
@@ -183,6 +197,7 @@ const Documents: React.FC = ({ setSearchTerm(e.target.value); setCurrentPage(1); }} + borderVariant="thin" />
); -}; +} -Documents.propTypes = { - //documents: PropTypes.array.isRequired, - handleDeleteDocument: PropTypes.func.isRequired, -}; +function DocumentChunks({ + document, + handleGoBack, +}: { + document: Doc; + handleGoBack: () => void; +}) { + const { t } = useTranslation(); + const [isDarkTheme] = useDarkTheme(); + const [paginatedChunks, setPaginatedChunks] = useState([]); + const [page, setPage] = useState(1); + const [perPage, setPerPage] = useState(5); + const [totalChunks, setTotalChunks] = useState(0); + const [loading, setLoading] = useState(false); + const [searchTerm, setSearchTerm] = useState(''); + const [modalState, setModalState] = useState('INACTIVE'); -export default Documents; + const fetchChunks = () => { + setLoading(true); + try { + userService + .getDocumentChunks(document.id ?? '', page, perPage) + .then((response) => { + if (!response.ok) { + throw new Error('Failed to fetch chunks data'); + } + return response.json(); + }) + .then((data) => { + setPage(data.page); + setPerPage(data.per_page); + setTotalChunks(data.total); + setPaginatedChunks(data.chunks); + }); + } catch (e) { + console.log(e); + } finally { + setLoading(false); + } + }; + + const handleAddChunk = (title: string, text: string) => { + try { + userService + .addChunk({ + id: document.id ?? '', + text: text, + metadata: { + title: title, + }, + }) + .then((response) => { + if (!response.ok) { + throw new Error('Failed to add chunk'); + } + fetchChunks(); + }); + } catch (e) { + console.log(e); + } + }; + + React.useEffect(() => { + fetchChunks(); + }, [page, perPage]); + return ( +
+
+ +

Back to all documents

+
+
+
+ + { + setSearchTerm(e.target.value); + }} + borderVariant="thin" + /> +
+ +
+
+ {paginatedChunks.filter((chunk) => + chunk.metadata?.title + .toLowerCase() + .includes(searchTerm.toLowerCase()), + ).length === 0 ? ( +
+ No tools found + No chunks found +
+ ) : ( + paginatedChunks + .filter((chunk) => + chunk.metadata?.title + .toLowerCase() + .includes(searchTerm.toLowerCase()), + ) + .map((chunk, index) => ( +
+
+
+

+ {chunk.metadata?.title} +

+

+ {chunk.text} +

+
+
+
+ )) + )} +
+
+ { + setPage(page); + }} + onRowsPerPageChange={(rows) => { + setPerPage(rows); + setPage(1); + }} + /> +
+ +
+ ); +} diff --git a/frontend/src/settings/ToolConfig.tsx b/frontend/src/settings/ToolConfig.tsx index ec422819..0be3a776 100644 --- a/frontend/src/settings/ToolConfig.tsx +++ b/frontend/src/settings/ToolConfig.tsx @@ -134,7 +134,7 @@ export default function ToolConfig({ {Object.keys(tool?.config).length !== 0 && tool.name !== 'api_tool' && (
- + API Key / Oauth
- + URL
- + Description setSearchTerm(e.target.value)} + borderVariant="thin" />