Feat: Add MD gen script, enable Qdrant lazy loading

This commit is contained in:
Alex
2025-05-13 14:03:05 +01:00
parent 44e98748c5
commit 39b36b6857
3 changed files with 50 additions and 3 deletions

View File

@@ -71,7 +71,6 @@ python-dateutil==2.9.0.post0
python-dotenv==1.0.1 python-dotenv==1.0.1
python-jose==3.4.0 python-jose==3.4.0
python-pptx==1.0.2 python-pptx==1.0.2
qdrant-client==1.13.2
redis==5.2.1 redis==5.2.1
referencing==0.30.2 referencing==0.30.2
regex==2024.11.6 regex==2024.11.6

View File

@@ -1,11 +1,12 @@
from langchain_community.vectorstores.qdrant import Qdrant
from application.vectorstore.base import BaseVectorStore from application.vectorstore.base import BaseVectorStore
from application.core.settings import settings from application.core.settings import settings
from qdrant_client import models
class QdrantStore(BaseVectorStore): class QdrantStore(BaseVectorStore):
def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"): def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"):
from qdrant_client import models
from langchain_community.vectorstores.qdrant import Qdrant
self._filter = models.Filter( self._filter = models.Filter(
must=[ must=[
models.FieldCondition( models.FieldCondition(

47
md-gen.py Normal file
View File

@@ -0,0 +1,47 @@
import os
def create_markdown_from_directory(directory=".", output_file="combined.md"):
"""
Recursively traverses the given directory, reads all files (ignoring files/folders in ignore_list),
and creates a single markdown file containing the contents of each file, prefixed with the
relative path of the file.
Args:
directory (str): The directory to traverse. Defaults to the current directory.
output_file (str): The name of the output markdown file. Defaults to 'combined.md'.
"""
ignore_list = [
"node_modules", "__pycache__", ".git", ".DS_Store", "inputs", "indexes",
"model", "models", ".venv", "temp", ".pytest_cache", ".ruff_cache",
"extensions", "dir_tree.py", "map.txt", "signal-desktop-keyring.gpg",
".husky", ".next", "docs", "index.pkl", "index.faiss", "assets", "fonts", "public",
"yarn.lock", "package-lock.json",
]
with open(output_file, "w", encoding="utf-8") as outfile:
for root, dirs, files in os.walk(directory):
# Filter out directories in ignore_list so they won't be traversed
dirs[:] = [d for d in dirs if d not in ignore_list]
for filename in files:
if filename in ignore_list:
continue
filepath = os.path.join(root, filename)
try:
with open(filepath, "r", encoding="utf-8") as infile:
content = infile.read()
# Get a relative path to better indicate file location
rel_path = os.path.relpath(filepath, directory)
outfile.write(f"## File: {rel_path}\n\n")
outfile.write(content)
outfile.write("\n\n---\n\n") # Separator between files
except Exception as e:
print(f"Error processing file {filepath}: {e}")
print(f"Successfully created {output_file}")
if __name__ == "__main__":
create_markdown_from_directory()