mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 00:23:17 +00:00
Feat: Add MD gen script, enable Qdrant lazy loading
This commit is contained in:
@@ -71,7 +71,6 @@ python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.0.1
|
||||
python-jose==3.4.0
|
||||
python-pptx==1.0.2
|
||||
qdrant-client==1.13.2
|
||||
redis==5.2.1
|
||||
referencing==0.30.2
|
||||
regex==2024.11.6
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
from langchain_community.vectorstores.qdrant import Qdrant
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
from application.core.settings import settings
|
||||
from qdrant_client import models
|
||||
|
||||
|
||||
class QdrantStore(BaseVectorStore):
|
||||
def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"):
|
||||
from qdrant_client import models
|
||||
from langchain_community.vectorstores.qdrant import Qdrant
|
||||
|
||||
self._filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
|
||||
47
md-gen.py
Normal file
47
md-gen.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import os
|
||||
|
||||
def create_markdown_from_directory(directory=".", output_file="combined.md"):
|
||||
"""
|
||||
Recursively traverses the given directory, reads all files (ignoring files/folders in ignore_list),
|
||||
and creates a single markdown file containing the contents of each file, prefixed with the
|
||||
relative path of the file.
|
||||
|
||||
Args:
|
||||
directory (str): The directory to traverse. Defaults to the current directory.
|
||||
output_file (str): The name of the output markdown file. Defaults to 'combined.md'.
|
||||
"""
|
||||
ignore_list = [
|
||||
"node_modules", "__pycache__", ".git", ".DS_Store", "inputs", "indexes",
|
||||
"model", "models", ".venv", "temp", ".pytest_cache", ".ruff_cache",
|
||||
"extensions", "dir_tree.py", "map.txt", "signal-desktop-keyring.gpg",
|
||||
".husky", ".next", "docs", "index.pkl", "index.faiss", "assets", "fonts", "public",
|
||||
"yarn.lock", "package-lock.json",
|
||||
]
|
||||
|
||||
with open(output_file, "w", encoding="utf-8") as outfile:
|
||||
for root, dirs, files in os.walk(directory):
|
||||
# Filter out directories in ignore_list so they won't be traversed
|
||||
dirs[:] = [d for d in dirs if d not in ignore_list]
|
||||
|
||||
for filename in files:
|
||||
if filename in ignore_list:
|
||||
continue
|
||||
filepath = os.path.join(root, filename)
|
||||
|
||||
try:
|
||||
with open(filepath, "r", encoding="utf-8") as infile:
|
||||
content = infile.read()
|
||||
|
||||
# Get a relative path to better indicate file location
|
||||
rel_path = os.path.relpath(filepath, directory)
|
||||
outfile.write(f"## File: {rel_path}\n\n")
|
||||
outfile.write(content)
|
||||
outfile.write("\n\n---\n\n") # Separator between files
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing file {filepath}: {e}")
|
||||
|
||||
print(f"Successfully created {output_file}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_markdown_from_directory()
|
||||
Reference in New Issue
Block a user