mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 00:23:17 +00:00
Feat: Add MD gen script, enable Qdrant lazy loading
This commit is contained in:
@@ -71,7 +71,6 @@ python-dateutil==2.9.0.post0
|
|||||||
python-dotenv==1.0.1
|
python-dotenv==1.0.1
|
||||||
python-jose==3.4.0
|
python-jose==3.4.0
|
||||||
python-pptx==1.0.2
|
python-pptx==1.0.2
|
||||||
qdrant-client==1.13.2
|
|
||||||
redis==5.2.1
|
redis==5.2.1
|
||||||
referencing==0.30.2
|
referencing==0.30.2
|
||||||
regex==2024.11.6
|
regex==2024.11.6
|
||||||
|
|||||||
@@ -1,11 +1,12 @@
|
|||||||
from langchain_community.vectorstores.qdrant import Qdrant
|
|
||||||
from application.vectorstore.base import BaseVectorStore
|
from application.vectorstore.base import BaseVectorStore
|
||||||
from application.core.settings import settings
|
from application.core.settings import settings
|
||||||
from qdrant_client import models
|
|
||||||
|
|
||||||
|
|
||||||
class QdrantStore(BaseVectorStore):
|
class QdrantStore(BaseVectorStore):
|
||||||
def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"):
|
def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"):
|
||||||
|
from qdrant_client import models
|
||||||
|
from langchain_community.vectorstores.qdrant import Qdrant
|
||||||
|
|
||||||
self._filter = models.Filter(
|
self._filter = models.Filter(
|
||||||
must=[
|
must=[
|
||||||
models.FieldCondition(
|
models.FieldCondition(
|
||||||
|
|||||||
47
md-gen.py
Normal file
47
md-gen.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
def create_markdown_from_directory(directory=".", output_file="combined.md"):
|
||||||
|
"""
|
||||||
|
Recursively traverses the given directory, reads all files (ignoring files/folders in ignore_list),
|
||||||
|
and creates a single markdown file containing the contents of each file, prefixed with the
|
||||||
|
relative path of the file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
directory (str): The directory to traverse. Defaults to the current directory.
|
||||||
|
output_file (str): The name of the output markdown file. Defaults to 'combined.md'.
|
||||||
|
"""
|
||||||
|
ignore_list = [
|
||||||
|
"node_modules", "__pycache__", ".git", ".DS_Store", "inputs", "indexes",
|
||||||
|
"model", "models", ".venv", "temp", ".pytest_cache", ".ruff_cache",
|
||||||
|
"extensions", "dir_tree.py", "map.txt", "signal-desktop-keyring.gpg",
|
||||||
|
".husky", ".next", "docs", "index.pkl", "index.faiss", "assets", "fonts", "public",
|
||||||
|
"yarn.lock", "package-lock.json",
|
||||||
|
]
|
||||||
|
|
||||||
|
with open(output_file, "w", encoding="utf-8") as outfile:
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
# Filter out directories in ignore_list so they won't be traversed
|
||||||
|
dirs[:] = [d for d in dirs if d not in ignore_list]
|
||||||
|
|
||||||
|
for filename in files:
|
||||||
|
if filename in ignore_list:
|
||||||
|
continue
|
||||||
|
filepath = os.path.join(root, filename)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(filepath, "r", encoding="utf-8") as infile:
|
||||||
|
content = infile.read()
|
||||||
|
|
||||||
|
# Get a relative path to better indicate file location
|
||||||
|
rel_path = os.path.relpath(filepath, directory)
|
||||||
|
outfile.write(f"## File: {rel_path}\n\n")
|
||||||
|
outfile.write(content)
|
||||||
|
outfile.write("\n\n---\n\n") # Separator between files
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing file {filepath}: {e}")
|
||||||
|
|
||||||
|
print(f"Successfully created {output_file}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
create_markdown_from_directory()
|
||||||
Reference in New Issue
Block a user