(feat:dir-reader) save tokens with filenames

This commit is contained in:
ManishMadan2882
2025-07-02 16:30:29 +05:30
parent ade704d065
commit fd905b1a06
3 changed files with 33 additions and 0 deletions

View File

@@ -258,6 +258,10 @@ def ingest_worker(
file_metadata=metadata_from_filename,
)
raw_docs = reader.load_data()
file_token_counts = getattr(reader, 'file_token_counts', {})
logging.info(f"File token counts from reader: {file_token_counts}")
chunker = Chunker(
chunking_strategy="classic_chunk",
@@ -292,6 +296,7 @@ def ingest_worker(
"id": str(id),
"type": "local",
"file_path": file_path,
"file_token_counts": json.dumps(file_token_counts),
}
upload_index(vector_store_path, file_data)