Bulk ingest

Added a method based on indexGPT folder ingester. Additional rst reader included.
This commit is contained in:
Pavel
2023-02-10 19:44:42 +04:00
parent 5038de06bb
commit 79b5ef9c14
13 changed files with 962 additions and 0 deletions

View File

@@ -0,0 +1,20 @@
"""Base reader class."""
from abc import abstractmethod
from typing import Any, List
from langchain.docstore.document import Document as LCDocument
from parser.schema.base import Document
class BaseReader:
"""Utilities for loading data from a directory."""
@abstractmethod
def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
"""Load data from the input directory."""
def load_langchain_documents(self, **load_kwargs: Any) -> List[LCDocument]:
"""Load data in LangChain document format."""
docs = self.load_data(**load_kwargs)
return [d.to_langchain_format() for d in docs]