mirror of
https://github.com/coleam00/ai-agents-masterclass.git
synced 2025-12-02 10:03:13 +00:00
AI Agents Masterclass #6 - RAG AI Agent
This commit is contained in:
35
6-rag-task-agent/rag-document-loader.py
Normal file
35
6-rag-task-agent/rag-document-loader.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
|
||||
from langchain_community.document_loaders import DirectoryLoader
|
||||
from langchain_text_splitters import CharacterTextSplitter
|
||||
from langchain_chroma import Chroma
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
|
||||
rag_directory = os.getenv('DIRECTORY', 'meeting_notes')
|
||||
|
||||
def load_documents(directory):
|
||||
# Load the PDF or txt documents from the directory
|
||||
loader = DirectoryLoader(directory)
|
||||
documents = loader.load()
|
||||
|
||||
# Split the documents into chunks
|
||||
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
||||
docs = text_splitter.split_documents(documents)
|
||||
|
||||
return docs
|
||||
|
||||
def main():
|
||||
# Get the documents split into chunks
|
||||
docs = load_documents(rag_directory)
|
||||
|
||||
# Create the open-source embedding function
|
||||
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
|
||||
|
||||
# Load the documents into Chroma and save it to the disk
|
||||
Chroma.from_documents(docs, embedding_function, persist_directory="./chroma_db")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user