diff --git a/application/requirements.txt b/application/requirements.txt index 0c6fb498..d68554a3 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -55,7 +55,7 @@ pandas==2.2.3 openpyxl==3.1.5 pathable==0.4.4 pillow==11.1.0 -portalocker==2.10.1 +portalocker==3.1.1 prance==23.6.21.0 primp==0.14.0 prompt-toolkit==3.0.50 @@ -71,7 +71,6 @@ python-dateutil==2.9.0.post0 python-dotenv==1.0.1 python-jose==3.4.0 python-pptx==1.0.2 -qdrant-client==1.13.2 redis==5.2.1 referencing==0.30.2 regex==2024.11.6 diff --git a/application/vectorstore/qdrant.py b/application/vectorstore/qdrant.py index 3f94505f..61a9d63d 100644 --- a/application/vectorstore/qdrant.py +++ b/application/vectorstore/qdrant.py @@ -1,11 +1,12 @@ -from langchain_community.vectorstores.qdrant import Qdrant from application.vectorstore.base import BaseVectorStore from application.core.settings import settings -from qdrant_client import models class QdrantStore(BaseVectorStore): def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"): + from qdrant_client import models + from langchain_community.vectorstores.qdrant import Qdrant + self._filter = models.Filter( must=[ models.FieldCondition( diff --git a/md-gen.py b/md-gen.py new file mode 100644 index 00000000..93754475 --- /dev/null +++ b/md-gen.py @@ -0,0 +1,47 @@ +import os + +def create_markdown_from_directory(directory=".", output_file="combined.md"): + """ + Recursively traverses the given directory, reads all files (ignoring files/folders in ignore_list), + and creates a single markdown file containing the contents of each file, prefixed with the + relative path of the file. + + Args: + directory (str): The directory to traverse. Defaults to the current directory. + output_file (str): The name of the output markdown file. Defaults to 'combined.md'. + """ + ignore_list = [ + "node_modules", "__pycache__", ".git", ".DS_Store", "inputs", "indexes", + "model", "models", ".venv", "temp", ".pytest_cache", ".ruff_cache", + "extensions", "dir_tree.py", "map.txt", "signal-desktop-keyring.gpg", + ".husky", ".next", "docs", "index.pkl", "index.faiss", "assets", "fonts", "public", + "yarn.lock", "package-lock.json", + ] + + with open(output_file, "w", encoding="utf-8") as outfile: + for root, dirs, files in os.walk(directory): + # Filter out directories in ignore_list so they won't be traversed + dirs[:] = [d for d in dirs if d not in ignore_list] + + for filename in files: + if filename in ignore_list: + continue + filepath = os.path.join(root, filename) + + try: + with open(filepath, "r", encoding="utf-8") as infile: + content = infile.read() + + # Get a relative path to better indicate file location + rel_path = os.path.relpath(filepath, directory) + outfile.write(f"## File: {rel_path}\n\n") + outfile.write(content) + outfile.write("\n\n---\n\n") # Separator between files + + except Exception as e: + print(f"Error processing file {filepath}: {e}") + + print(f"Successfully created {output_file}") + +if __name__ == "__main__": + create_markdown_from_directory()