(feat:chunks) server-side filter on search

This commit is contained in:
ManishMadan2882
2025-07-25 01:43:50 +05:30
parent 8ede3a0173
commit 58465ece65
4 changed files with 66 additions and 23 deletions

View File

@@ -3265,12 +3265,13 @@ class DeleteTool(Resource):
@user_ns.route("/api/get_chunks")
class GetChunks(Resource):
@api.doc(
description="Retrieves chunks from a document, optionally filtered by file path",
description="Retrieves chunks from a document, optionally filtered by file path and search term",
params={
"id": "The document ID",
"page": "Page number for pagination",
"per_page": "Number of chunks per page",
"path": "Optional: Filter chunks by relative file path"
"path": "Optional: Filter chunks by relative file path",
"search": "Optional: Search term to filter chunks by title or content"
},
)
def get(self):
@@ -3282,6 +3283,7 @@ class GetChunks(Resource):
page = int(request.args.get("page", 1))
per_page = int(request.args.get("per_page", 10))
path = request.args.get("path")
search_term = request.args.get("search", "").strip().lower()
if not ObjectId.is_valid(doc_id):
return make_response(jsonify({"error": "Invalid doc_id"}), 400)
@@ -3294,20 +3296,35 @@ class GetChunks(Resource):
store = get_vector_store(doc_id)
chunks = store.get_chunks()
if path:
filtered_chunks = []
for chunk in chunks:
metadata = chunk.get("metadata", {})
filtered_chunks = []
for chunk in chunks:
metadata = chunk.get("metadata", {})
if path:
source = metadata.get("source", "")
path_match = False
if isinstance(source, str) and source.endswith(path):
filtered_chunks.append(chunk)
path_match = True
elif isinstance(source, list):
for src in source:
if isinstance(src, str) and src.endswith(path):
filtered_chunks.append(chunk)
path_match = True
break
chunks = filtered_chunks
if not path_match:
continue
if search_term:
text_match = search_term in chunk.get("text", "").lower()
title_match = search_term in metadata.get("title", "").lower()
if not (text_match or title_match):
continue
filtered_chunks.append(chunk)
chunks = filtered_chunks
total_chunks = len(chunks)
start = (page - 1) * per_page
@@ -3321,7 +3338,8 @@ class GetChunks(Resource):
"per_page": per_page,
"total": total_chunks,
"chunks": paginated_chunks,
"path": path if path else None
"path": path if path else None,
"search": search_term if search_term else None
}
),
200,
@@ -3330,7 +3348,6 @@ class GetChunks(Resource):
current_app.logger.error(f"Error getting chunks: {e}", exc_info=True)
return make_response(jsonify({"success": False}), 500)
@user_ns.route("/api/add_chunk")
class AddChunk(Resource):
@api.expect(

View File

@@ -43,8 +43,11 @@ const endpoints = {
page: number,
per_page: number,
path?: string,
search?: string,
) =>
`/api/get_chunks?id=${docId}&page=${page}&per_page=${per_page}${path ? `&path=${encodeURIComponent(path)}` : ''}`,
`/api/get_chunks?id=${docId}&page=${page}&per_page=${per_page}${
path ? `&path=${encodeURIComponent(path)}` : ''
}${search ? `&search=${encodeURIComponent(search)}` : ''}`,
ADD_CHUNK: '/api/add_chunk',
DELETE_CHUNK: (docId: string, chunkId: string) =>
`/api/delete_chunk?id=${docId}&chunk_id=${chunkId}`,

View File

@@ -87,8 +87,9 @@ const userService = {
perPage: number,
token: string | null,
path?: string,
search?: string,
): Promise<any> =>
apiClient.get(endpoints.USER.GET_CHUNKS(docId, page, perPage, path), token),
apiClient.get(endpoints.USER.GET_CHUNKS(docId, page, perPage, path, search), token),
addChunk: (data: any, token: string | null): Promise<any> =>
apiClient.post(endpoints.USER.ADD_CHUNK, data, token),
deleteChunk: (

View File

@@ -116,7 +116,7 @@ const DocumentChunks: React.FC<DocumentChunksProps> = ({
setLoading(true);
try {
userService
.getDocumentChunks(documentId, page, perPage, token, path)
.getDocumentChunks(documentId, page, perPage, token, path, searchTerm)
.then((response) => {
if (!response.ok) {
setLoading(false);
@@ -131,10 +131,14 @@ const DocumentChunks: React.FC<DocumentChunksProps> = ({
setTotalChunks(data.total);
setPaginatedChunks(data.chunks);
setLoading(false);
})
.catch((error) => {
setLoading(false);
setPaginatedChunks([]);
});
} catch (e) {
console.log(e);
setLoading(false);
setPaginatedChunks([]);
}
};
@@ -221,16 +225,34 @@ const DocumentChunks: React.FC<DocumentChunksProps> = ({
setChunkToDelete(null);
};
useEffect(() => {
const delayDebounceFn = setTimeout(() => {
if (page !== 1) {
setPage(1);
} else {
fetchChunks();
}
}, 300);
return () => clearTimeout(delayDebounceFn);
}, [searchTerm]);
useEffect(() => {
fetchChunks();
}, [page, perPage, path]);
useEffect(() => {
setSearchTerm('');
setPage(1);
}, [path]);
// Remove the client-side filtering
// const filteredChunks = paginatedChunks.filter((chunk) => {
// if (!chunk.metadata?.title) return true;
// return chunk.metadata.title
// .toLowerCase()
// .includes(searchTerm.toLowerCase());
// });
const filteredChunks = paginatedChunks.filter((chunk) => {
if (!chunk.metadata?.title) return true;
return chunk.metadata.title
.toLowerCase()
.includes(searchTerm.toLowerCase());
});
// Use the server-filtered chunks directly
const filteredChunks = paginatedChunks;
const renderPathNavigation = () => {
return (
@@ -367,7 +389,7 @@ const DocumentChunks: React.FC<DocumentChunksProps> = ({
<Spinner />
</div>
) : (
<div className="w-full grid grid-cols-1 md:grid-cols-2 gap-4">
<div className="w-full grid grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-4">
{filteredChunks.length === 0 ? (
<div className="col-span-full flex flex-col items-center justify-center mt-24 text-center text-gray-500 dark:text-gray-400">
<img
@@ -442,7 +464,7 @@ const DocumentChunks: React.FC<DocumentChunksProps> = ({
</div>
)}
{!loading && filteredChunks.length > 0 && !editingChunk && !isAddingChunk && (
{!loading && totalChunks > perPage && !editingChunk && !isAddingChunk && (
<Pagination
currentPage={page}
totalPages={Math.ceil(totalChunks / perPage)}