mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 00:23:17 +00:00
Fixing ingestion metadata grouping
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -172,3 +172,4 @@ application/vectors/
|
||||
node_modules/
|
||||
.vscode/settings.json
|
||||
models/
|
||||
model/
|
||||
@@ -1,6 +1,6 @@
|
||||
"""Base reader class."""
|
||||
from abc import abstractmethod
|
||||
from typing import Any, List, Iterator
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from langchain.document_loader import TelegramChatApiLoader, TelegramChatFileLoader
|
||||
from langchain.document_loader import TelegramChatApiLoader
|
||||
from application.parser.remote.base import BaseRemote
|
||||
|
||||
class TelegramChatApiRemote(BaseRemote):
|
||||
@@ -8,4 +8,4 @@ class TelegramChatApiRemote(BaseRemote):
|
||||
|
||||
def parse_file(self, *args, **load_kwargs):
|
||||
|
||||
return text
|
||||
return
|
||||
@@ -124,7 +124,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
}
|
||||
|
||||
def remote_worker(self, source_data, name_job, user, directory = 'temp', loader = 'url'):
|
||||
sample = False
|
||||
# sample = False
|
||||
token_check = True
|
||||
min_tokens = 150
|
||||
max_tokens = 1250
|
||||
@@ -155,10 +155,10 @@ def remote_worker(self, source_data, name_job, user, directory = 'temp', loader
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
|
||||
'file_pkl': open(full_path + '/index.pkl', 'rb')}
|
||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
|
||||
response = requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
|
||||
requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
|
||||
requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
|
||||
else:
|
||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
|
||||
requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
|
||||
|
||||
shutil.rmtree(full_path)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user