mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
(feat:chunks) server-side filter on search
This commit is contained in:
@@ -3265,12 +3265,13 @@ class DeleteTool(Resource):
|
||||
@user_ns.route("/api/get_chunks")
|
||||
class GetChunks(Resource):
|
||||
@api.doc(
|
||||
description="Retrieves chunks from a document, optionally filtered by file path",
|
||||
description="Retrieves chunks from a document, optionally filtered by file path and search term",
|
||||
params={
|
||||
"id": "The document ID",
|
||||
"page": "Page number for pagination",
|
||||
"per_page": "Number of chunks per page",
|
||||
"path": "Optional: Filter chunks by relative file path"
|
||||
"path": "Optional: Filter chunks by relative file path",
|
||||
"search": "Optional: Search term to filter chunks by title or content"
|
||||
},
|
||||
)
|
||||
def get(self):
|
||||
@@ -3282,6 +3283,7 @@ class GetChunks(Resource):
|
||||
page = int(request.args.get("page", 1))
|
||||
per_page = int(request.args.get("per_page", 10))
|
||||
path = request.args.get("path")
|
||||
search_term = request.args.get("search", "").strip().lower()
|
||||
|
||||
if not ObjectId.is_valid(doc_id):
|
||||
return make_response(jsonify({"error": "Invalid doc_id"}), 400)
|
||||
@@ -3294,20 +3296,35 @@ class GetChunks(Resource):
|
||||
store = get_vector_store(doc_id)
|
||||
chunks = store.get_chunks()
|
||||
|
||||
if path:
|
||||
filtered_chunks = []
|
||||
for chunk in chunks:
|
||||
metadata = chunk.get("metadata", {})
|
||||
filtered_chunks = []
|
||||
for chunk in chunks:
|
||||
metadata = chunk.get("metadata", {})
|
||||
|
||||
if path:
|
||||
source = metadata.get("source", "")
|
||||
path_match = False
|
||||
|
||||
if isinstance(source, str) and source.endswith(path):
|
||||
filtered_chunks.append(chunk)
|
||||
path_match = True
|
||||
elif isinstance(source, list):
|
||||
for src in source:
|
||||
if isinstance(src, str) and src.endswith(path):
|
||||
filtered_chunks.append(chunk)
|
||||
path_match = True
|
||||
break
|
||||
chunks = filtered_chunks
|
||||
|
||||
if not path_match:
|
||||
continue
|
||||
|
||||
if search_term:
|
||||
text_match = search_term in chunk.get("text", "").lower()
|
||||
title_match = search_term in metadata.get("title", "").lower()
|
||||
|
||||
if not (text_match or title_match):
|
||||
continue
|
||||
|
||||
filtered_chunks.append(chunk)
|
||||
|
||||
chunks = filtered_chunks
|
||||
|
||||
total_chunks = len(chunks)
|
||||
start = (page - 1) * per_page
|
||||
@@ -3321,7 +3338,8 @@ class GetChunks(Resource):
|
||||
"per_page": per_page,
|
||||
"total": total_chunks,
|
||||
"chunks": paginated_chunks,
|
||||
"path": path if path else None
|
||||
"path": path if path else None,
|
||||
"search": search_term if search_term else None
|
||||
}
|
||||
),
|
||||
200,
|
||||
@@ -3330,7 +3348,6 @@ class GetChunks(Resource):
|
||||
current_app.logger.error(f"Error getting chunks: {e}", exc_info=True)
|
||||
return make_response(jsonify({"success": False}), 500)
|
||||
|
||||
|
||||
@user_ns.route("/api/add_chunk")
|
||||
class AddChunk(Resource):
|
||||
@api.expect(
|
||||
|
||||
@@ -43,8 +43,11 @@ const endpoints = {
|
||||
page: number,
|
||||
per_page: number,
|
||||
path?: string,
|
||||
search?: string,
|
||||
) =>
|
||||
`/api/get_chunks?id=${docId}&page=${page}&per_page=${per_page}${path ? `&path=${encodeURIComponent(path)}` : ''}`,
|
||||
`/api/get_chunks?id=${docId}&page=${page}&per_page=${per_page}${
|
||||
path ? `&path=${encodeURIComponent(path)}` : ''
|
||||
}${search ? `&search=${encodeURIComponent(search)}` : ''}`,
|
||||
ADD_CHUNK: '/api/add_chunk',
|
||||
DELETE_CHUNK: (docId: string, chunkId: string) =>
|
||||
`/api/delete_chunk?id=${docId}&chunk_id=${chunkId}`,
|
||||
|
||||
@@ -87,8 +87,9 @@ const userService = {
|
||||
perPage: number,
|
||||
token: string | null,
|
||||
path?: string,
|
||||
search?: string,
|
||||
): Promise<any> =>
|
||||
apiClient.get(endpoints.USER.GET_CHUNKS(docId, page, perPage, path), token),
|
||||
apiClient.get(endpoints.USER.GET_CHUNKS(docId, page, perPage, path, search), token),
|
||||
addChunk: (data: any, token: string | null): Promise<any> =>
|
||||
apiClient.post(endpoints.USER.ADD_CHUNK, data, token),
|
||||
deleteChunk: (
|
||||
|
||||
@@ -116,7 +116,7 @@ const DocumentChunks: React.FC<DocumentChunksProps> = ({
|
||||
setLoading(true);
|
||||
try {
|
||||
userService
|
||||
.getDocumentChunks(documentId, page, perPage, token, path)
|
||||
.getDocumentChunks(documentId, page, perPage, token, path, searchTerm)
|
||||
.then((response) => {
|
||||
if (!response.ok) {
|
||||
setLoading(false);
|
||||
@@ -131,10 +131,14 @@ const DocumentChunks: React.FC<DocumentChunksProps> = ({
|
||||
setTotalChunks(data.total);
|
||||
setPaginatedChunks(data.chunks);
|
||||
setLoading(false);
|
||||
})
|
||||
.catch((error) => {
|
||||
setLoading(false);
|
||||
setPaginatedChunks([]);
|
||||
});
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
setLoading(false);
|
||||
setPaginatedChunks([]);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -221,16 +225,34 @@ const DocumentChunks: React.FC<DocumentChunksProps> = ({
|
||||
setChunkToDelete(null);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const delayDebounceFn = setTimeout(() => {
|
||||
if (page !== 1) {
|
||||
setPage(1);
|
||||
} else {
|
||||
fetchChunks();
|
||||
}
|
||||
}, 300);
|
||||
|
||||
return () => clearTimeout(delayDebounceFn);
|
||||
}, [searchTerm]);
|
||||
useEffect(() => {
|
||||
fetchChunks();
|
||||
}, [page, perPage, path]);
|
||||
useEffect(() => {
|
||||
setSearchTerm('');
|
||||
setPage(1);
|
||||
}, [path]);
|
||||
// Remove the client-side filtering
|
||||
// const filteredChunks = paginatedChunks.filter((chunk) => {
|
||||
// if (!chunk.metadata?.title) return true;
|
||||
// return chunk.metadata.title
|
||||
// .toLowerCase()
|
||||
// .includes(searchTerm.toLowerCase());
|
||||
// });
|
||||
|
||||
const filteredChunks = paginatedChunks.filter((chunk) => {
|
||||
if (!chunk.metadata?.title) return true;
|
||||
return chunk.metadata.title
|
||||
.toLowerCase()
|
||||
.includes(searchTerm.toLowerCase());
|
||||
});
|
||||
// Use the server-filtered chunks directly
|
||||
const filteredChunks = paginatedChunks;
|
||||
|
||||
const renderPathNavigation = () => {
|
||||
return (
|
||||
@@ -367,7 +389,7 @@ const DocumentChunks: React.FC<DocumentChunksProps> = ({
|
||||
<Spinner />
|
||||
</div>
|
||||
) : (
|
||||
<div className="w-full grid grid-cols-1 md:grid-cols-2 gap-4">
|
||||
<div className="w-full grid grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-4">
|
||||
{filteredChunks.length === 0 ? (
|
||||
<div className="col-span-full flex flex-col items-center justify-center mt-24 text-center text-gray-500 dark:text-gray-400">
|
||||
<img
|
||||
@@ -442,7 +464,7 @@ const DocumentChunks: React.FC<DocumentChunksProps> = ({
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!loading && filteredChunks.length > 0 && !editingChunk && !isAddingChunk && (
|
||||
{!loading && totalChunks > perPage && !editingChunk && !isAddingChunk && (
|
||||
<Pagination
|
||||
currentPage={page}
|
||||
totalPages={Math.ceil(totalChunks / perPage)}
|
||||
|
||||
Reference in New Issue
Block a user