From 54d187a0ade1f2f82d66067b86107114db8eaee8 Mon Sep 17 00:00:00 2001 From: Pavel Date: Wed, 28 Feb 2024 19:52:58 +0300 Subject: [PATCH] Fixing ingestion metadata grouping --- .gitignore | 1 + application/parser/remote/base.py | 2 +- application/parser/remote/telegram.py | 4 ++-- application/worker.py | 8 ++++---- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 053e5793..1a5f0419 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,4 @@ application/vectors/ node_modules/ .vscode/settings.json models/ +model/ \ No newline at end of file diff --git a/application/parser/remote/base.py b/application/parser/remote/base.py index 75ae34d5..91313f22 100644 --- a/application/parser/remote/base.py +++ b/application/parser/remote/base.py @@ -1,6 +1,6 @@ """Base reader class.""" from abc import abstractmethod -from typing import Any, List, Iterator +from typing import Any, List from langchain.docstore.document import Document as LCDocument from application.parser.schema.base import Document diff --git a/application/parser/remote/telegram.py b/application/parser/remote/telegram.py index 895d5cb3..0e691be4 100644 --- a/application/parser/remote/telegram.py +++ b/application/parser/remote/telegram.py @@ -1,4 +1,4 @@ -from langchain.document_loader import TelegramChatApiLoader, TelegramChatFileLoader +from langchain.document_loader import TelegramChatApiLoader from application.parser.remote.base import BaseRemote class TelegramChatApiRemote(BaseRemote): @@ -8,4 +8,4 @@ class TelegramChatApiRemote(BaseRemote): def parse_file(self, *args, **load_kwargs): - return text \ No newline at end of file + return \ No newline at end of file diff --git a/application/worker.py b/application/worker.py index 875611bf..21bb319f 100644 --- a/application/worker.py +++ b/application/worker.py @@ -124,7 +124,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user): } def remote_worker(self, source_data, name_job, user, directory = 'temp', loader = 'url'): - sample = False + # sample = False token_check = True min_tokens = 150 max_tokens = 1250 @@ -155,10 +155,10 @@ def remote_worker(self, source_data, name_job, user, directory = 'temp', loader if settings.VECTOR_STORE == "faiss": files = {'file_faiss': open(full_path + '/index.faiss', 'rb'), 'file_pkl': open(full_path + '/index.pkl', 'rb')} - response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data) - response = requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path)) + requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data) + requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path)) else: - response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data) + requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data) shutil.rmtree(full_path)