From 39b36b6857e48ad9d352f374b957f604d11caa20 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 13 May 2025 14:03:05 +0100 Subject: [PATCH] Feat: Add MD gen script, enable Qdrant lazy loading --- application/requirements.txt | 1 - application/vectorstore/qdrant.py | 5 ++-- md-gen.py | 47 +++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 md-gen.py diff --git a/application/requirements.txt b/application/requirements.txt index 02476089..d68554a3 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -71,7 +71,6 @@ python-dateutil==2.9.0.post0 python-dotenv==1.0.1 python-jose==3.4.0 python-pptx==1.0.2 -qdrant-client==1.13.2 redis==5.2.1 referencing==0.30.2 regex==2024.11.6 diff --git a/application/vectorstore/qdrant.py b/application/vectorstore/qdrant.py index 3f94505f..61a9d63d 100644 --- a/application/vectorstore/qdrant.py +++ b/application/vectorstore/qdrant.py @@ -1,11 +1,12 @@ -from langchain_community.vectorstores.qdrant import Qdrant from application.vectorstore.base import BaseVectorStore from application.core.settings import settings -from qdrant_client import models class QdrantStore(BaseVectorStore): def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"): + from qdrant_client import models + from langchain_community.vectorstores.qdrant import Qdrant + self._filter = models.Filter( must=[ models.FieldCondition( diff --git a/md-gen.py b/md-gen.py new file mode 100644 index 00000000..93754475 --- /dev/null +++ b/md-gen.py @@ -0,0 +1,47 @@ +import os + +def create_markdown_from_directory(directory=".", output_file="combined.md"): + """ + Recursively traverses the given directory, reads all files (ignoring files/folders in ignore_list), + and creates a single markdown file containing the contents of each file, prefixed with the + relative path of the file. + + Args: + directory (str): The directory to traverse. Defaults to the current directory. + output_file (str): The name of the output markdown file. Defaults to 'combined.md'. + """ + ignore_list = [ + "node_modules", "__pycache__", ".git", ".DS_Store", "inputs", "indexes", + "model", "models", ".venv", "temp", ".pytest_cache", ".ruff_cache", + "extensions", "dir_tree.py", "map.txt", "signal-desktop-keyring.gpg", + ".husky", ".next", "docs", "index.pkl", "index.faiss", "assets", "fonts", "public", + "yarn.lock", "package-lock.json", + ] + + with open(output_file, "w", encoding="utf-8") as outfile: + for root, dirs, files in os.walk(directory): + # Filter out directories in ignore_list so they won't be traversed + dirs[:] = [d for d in dirs if d not in ignore_list] + + for filename in files: + if filename in ignore_list: + continue + filepath = os.path.join(root, filename) + + try: + with open(filepath, "r", encoding="utf-8") as infile: + content = infile.read() + + # Get a relative path to better indicate file location + rel_path = os.path.relpath(filepath, directory) + outfile.write(f"## File: {rel_path}\n\n") + outfile.write(content) + outfile.write("\n\n---\n\n") # Separator between files + + except Exception as e: + print(f"Error processing file {filepath}: {e}") + + print(f"Successfully created {output_file}") + +if __name__ == "__main__": + create_markdown_from_directory()