mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
Fixing ingestion metadata grouping
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -172,3 +172,4 @@ application/vectors/
|
|||||||
node_modules/
|
node_modules/
|
||||||
.vscode/settings.json
|
.vscode/settings.json
|
||||||
models/
|
models/
|
||||||
|
model/
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
"""Base reader class."""
|
"""Base reader class."""
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from typing import Any, List, Iterator
|
from typing import Any, List
|
||||||
|
|
||||||
from langchain.docstore.document import Document as LCDocument
|
from langchain.docstore.document import Document as LCDocument
|
||||||
from application.parser.schema.base import Document
|
from application.parser.schema.base import Document
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from langchain.document_loader import TelegramChatApiLoader, TelegramChatFileLoader
|
from langchain.document_loader import TelegramChatApiLoader
|
||||||
from application.parser.remote.base import BaseRemote
|
from application.parser.remote.base import BaseRemote
|
||||||
|
|
||||||
class TelegramChatApiRemote(BaseRemote):
|
class TelegramChatApiRemote(BaseRemote):
|
||||||
@@ -8,4 +8,4 @@ class TelegramChatApiRemote(BaseRemote):
|
|||||||
|
|
||||||
def parse_file(self, *args, **load_kwargs):
|
def parse_file(self, *args, **load_kwargs):
|
||||||
|
|
||||||
return text
|
return
|
||||||
@@ -124,7 +124,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def remote_worker(self, source_data, name_job, user, directory = 'temp', loader = 'url'):
|
def remote_worker(self, source_data, name_job, user, directory = 'temp', loader = 'url'):
|
||||||
sample = False
|
# sample = False
|
||||||
token_check = True
|
token_check = True
|
||||||
min_tokens = 150
|
min_tokens = 150
|
||||||
max_tokens = 1250
|
max_tokens = 1250
|
||||||
@@ -155,10 +155,10 @@ def remote_worker(self, source_data, name_job, user, directory = 'temp', loader
|
|||||||
if settings.VECTOR_STORE == "faiss":
|
if settings.VECTOR_STORE == "faiss":
|
||||||
files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
|
files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
|
||||||
'file_pkl': open(full_path + '/index.pkl', 'rb')}
|
'file_pkl': open(full_path + '/index.pkl', 'rb')}
|
||||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
|
requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
|
||||||
response = requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
|
requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
|
||||||
else:
|
else:
|
||||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
|
requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
|
||||||
|
|
||||||
shutil.rmtree(full_path)
|
shutil.rmtree(full_path)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user