diff --git a/application/parser/remote/base.py b/application/parser/remote/base.py new file mode 100644 index 00000000..91313f22 --- /dev/null +++ b/application/parser/remote/base.py @@ -0,0 +1,19 @@ +"""Base reader class.""" +from abc import abstractmethod +from typing import Any, List + +from langchain.docstore.document import Document as LCDocument +from application.parser.schema.base import Document + + +class BaseRemote: + """Utilities for loading data from a directory.""" + + @abstractmethod + def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]: + """Load data from the input directory.""" + + def load_langchain_documents(self, **load_kwargs: Any) -> List[LCDocument]: + """Load data in LangChain document format.""" + docs = self.load_data(**load_kwargs) + return [d.to_langchain_format() for d in docs] diff --git a/application/parser/remote/telegram.py b/application/parser/remote/telegram.py new file mode 100644 index 00000000..895d5cb3 --- /dev/null +++ b/application/parser/remote/telegram.py @@ -0,0 +1,11 @@ +from langchain.document_loader import TelegramChatApiLoader, TelegramChatFileLoader +from application.parser.remote.base import BaseRemote + +class TelegramChatApiRemote(BaseRemote): + def _init_parser(self, *args, **load_kwargs): + self.loader = TelegramChatApiLoader(**load_kwargs) + return {} + + def parse_file(self, *args, **load_kwargs): + + return text \ No newline at end of file