mirror of
https://github.com/arc53/DocsGPT.git
synced 2026-03-02 08:12:06 +00:00
chore: Update Docker build platforms for application and frontend and optimised embedding import
This commit is contained in:
@@ -15,7 +15,6 @@ html2text==2020.1.16
|
||||
javalang==0.13.0
|
||||
langchain==0.1.4
|
||||
langchain-openai==0.0.5
|
||||
nltk==3.8.1
|
||||
openapi3_parser==1.1.16
|
||||
pandas==2.2.0
|
||||
pydantic_settings==2.1.0
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from transformers import GPT2TokenizerFast
|
||||
|
||||
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
|
||||
tokenizer.model_max_length = 100000
|
||||
def count_tokens(string):
|
||||
|
||||
return len(tokenizer(string)['input_ids'])
|
||||
@@ -45,10 +45,15 @@ class BaseVectorStore(ABC):
|
||||
cohere_api_key=embeddings_key
|
||||
)
|
||||
elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
embedding_instance = embeddings_factory[embeddings_name](
|
||||
#model_name="./model/all-mpnet-base-v2",
|
||||
model_kwargs={"device": "cpu"},
|
||||
)
|
||||
if os.path.exists("./model/all-mpnet-base-v2"):
|
||||
embedding_instance = embeddings_factory[embeddings_name](
|
||||
model_name="./model/all-mpnet-base-v2",
|
||||
model_kwargs={"device": "cpu"},
|
||||
)
|
||||
else:
|
||||
embedding_instance = embeddings_factory[embeddings_name](
|
||||
model_kwargs={"device": "cpu"},
|
||||
)
|
||||
else:
|
||||
embedding_instance = embeddings_factory[embeddings_name]()
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ import string
|
||||
import zipfile
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import nltk
|
||||
import requests
|
||||
|
||||
from application.core.settings import settings
|
||||
@@ -14,13 +13,6 @@ from application.parser.open_ai_func import call_openai_api
|
||||
from application.parser.schema.base import Document
|
||||
from application.parser.token_func import group_split
|
||||
|
||||
try:
|
||||
nltk.download("punkt", quiet=True)
|
||||
nltk.download("averaged_perceptron_tagger", quiet=True)
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
|
||||
# Define a function to extract metadata from a given filename.
|
||||
def metadata_from_filename(title):
|
||||
store = "/".join(title.split("/")[1:3])
|
||||
|
||||
Reference in New Issue
Block a user