mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
lint: ruff
This commit is contained in:
@@ -8,12 +8,10 @@ import traceback
|
||||
|
||||
from pymongo import MongoClient
|
||||
from bson.objectid import ObjectId
|
||||
from application.utils import count_tokens
|
||||
|
||||
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.retriever.retriever_creator import RetrieverCreator
|
||||
from application.error import bad_request
|
||||
|
||||
@@ -283,10 +283,12 @@ def check_docs():
|
||||
else:
|
||||
file_url = urlparse(base_path + vectorstore + "index.faiss")
|
||||
|
||||
if file_url.scheme in ['https'] and file_url.netloc == 'raw.githubusercontent.com' and file_url.path.startswith('/arc53/DocsHUB/main/'):
|
||||
|
||||
if (
|
||||
file_url.scheme in ['https'] and
|
||||
file_url.netloc == 'raw.githubusercontent.com' and
|
||||
file_url.path.startswith('/arc53/DocsHUB/main/')
|
||||
):
|
||||
r = requests.get(file_url.geturl())
|
||||
|
||||
if r.status_code != 200:
|
||||
return {"status": "null"}
|
||||
else:
|
||||
@@ -295,7 +297,6 @@ def check_docs():
|
||||
with open(vectorstore + "index.faiss", "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
# download the store
|
||||
r = requests.get(base_path + vectorstore + "index.pkl")
|
||||
with open(vectorstore + "index.pkl", "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
@@ -9,7 +9,7 @@ current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__
|
||||
|
||||
class Settings(BaseSettings):
|
||||
LLM_NAME: str = "docsgpt"
|
||||
MODEL_NAME: Optional[str] = None # when LLM_NAME is openai, MODEL_NAME can be e.g. gpt-4-turbo-preview or gpt-3.5-turbo
|
||||
MODEL_NAME: Optional[str] = None # if LLM_NAME is openai, MODEL_NAME can be gpt-4 or gpt-3.5-turbo
|
||||
EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
|
||||
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
|
||||
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
|
||||
|
||||
@@ -22,7 +22,10 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
|
||||
|
||||
# Check if current group is empty or if the document can be added based on token count and matching metadata
|
||||
if current_group is None or (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len < min_tokens and current_group.extra_info == doc.extra_info):
|
||||
if (current_group is None or
|
||||
(len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and
|
||||
doc_len < min_tokens and
|
||||
current_group.extra_info == doc.extra_info)):
|
||||
if current_group is None:
|
||||
current_group = doc # Use the document directly to retain its metadata
|
||||
else:
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import os
|
||||
import json
|
||||
from application.retriever.base import BaseRetriever
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
@@ -39,9 +38,19 @@ class ClassicRAG(BaseRetriever):
|
||||
if self.chunks == 0:
|
||||
docs = []
|
||||
else:
|
||||
docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, self.vectorstore, settings.EMBEDDINGS_KEY)
|
||||
docsearch = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE,
|
||||
self.vectorstore,
|
||||
settings.EMBEDDINGS_KEY
|
||||
)
|
||||
docs_temp = docsearch.search(self.question, k=self.chunks)
|
||||
docs = [{"title": i.metadata['title'].split('/')[-1] if i.metadata else i.page_content, "text": i.page_content} for i in docs_temp]
|
||||
docs = [
|
||||
{
|
||||
"title": i.metadata['title'].split('/')[-1] if i.metadata else i.page_content,
|
||||
"text": i.page_content
|
||||
}
|
||||
for i in docs_temp
|
||||
]
|
||||
if settings.LLM_NAME == "llama.cpp":
|
||||
docs = [docs[0]]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user