metadata on ingestion

2026-02-14 10:11:19 +00:00 · 2023-05-17 21:41:24 +01:00
parent 27c45ae24a
commit e49dd0cc6a
2 changed files with 7 additions and 2 deletions
--- a/scripts/ingest.py
+++ b/scripts/ingest.py
@@ -24,6 +24,9 @@ nltk.download('punkt', quiet=True)
 nltk.download('averaged_perceptron_tagger', quiet=True)


+def metadata_from_filename(title):
+    return {'title': title}
+
 # Splits all files in specified folder to documents
@app.command()
 def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
@@ -55,7 +58,7 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
    def process_one_docs(directory, folder_name):
        raw_docs = SimpleDirectoryReader(input_dir=directory, input_files=file, recursive=recursive,
                                         required_exts=formats, num_files_limit=limit,
-                                         exclude_hidden=exclude).load_data()
+                                         exclude_hidden=exclude, file_metadata=metadata_from_filename).load_data()

        # Here we split the documents, as needed, into smaller chunks.
        # We do this due to the context limits of the LLMs.