Fixing ingestion metadata grouping

This commit is contained in:
Pavel
2024-02-28 19:52:58 +03:00
parent 325a8889ab
commit 54d187a0ad
4 changed files with 8 additions and 7 deletions

1
.gitignore vendored
View File

@@ -172,3 +172,4 @@ application/vectors/
node_modules/ node_modules/
.vscode/settings.json .vscode/settings.json
models/ models/
model/

View File

@@ -1,6 +1,6 @@
"""Base reader class.""" """Base reader class."""
from abc import abstractmethod from abc import abstractmethod
from typing import Any, List, Iterator from typing import Any, List
from langchain.docstore.document import Document as LCDocument from langchain.docstore.document import Document as LCDocument
from application.parser.schema.base import Document from application.parser.schema.base import Document

View File

@@ -1,4 +1,4 @@
from langchain.document_loader import TelegramChatApiLoader, TelegramChatFileLoader from langchain.document_loader import TelegramChatApiLoader
from application.parser.remote.base import BaseRemote from application.parser.remote.base import BaseRemote
class TelegramChatApiRemote(BaseRemote): class TelegramChatApiRemote(BaseRemote):
@@ -8,4 +8,4 @@ class TelegramChatApiRemote(BaseRemote):
def parse_file(self, *args, **load_kwargs): def parse_file(self, *args, **load_kwargs):
return text return

View File

@@ -124,7 +124,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
} }
def remote_worker(self, source_data, name_job, user, directory = 'temp', loader = 'url'): def remote_worker(self, source_data, name_job, user, directory = 'temp', loader = 'url'):
sample = False # sample = False
token_check = True token_check = True
min_tokens = 150 min_tokens = 150
max_tokens = 1250 max_tokens = 1250
@@ -155,10 +155,10 @@ def remote_worker(self, source_data, name_job, user, directory = 'temp', loader
if settings.VECTOR_STORE == "faiss": if settings.VECTOR_STORE == "faiss":
files = {'file_faiss': open(full_path + '/index.faiss', 'rb'), files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
'file_pkl': open(full_path + '/index.pkl', 'rb')} 'file_pkl': open(full_path + '/index.pkl', 'rb')}
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data) requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
response = requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path)) requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
else: else:
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data) requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
shutil.rmtree(full_path) shutil.rmtree(full_path)