AI Agents Masterclass #6 - RAG AI Agent

2025-12-02 10:03:13 +00:00 · 2024-08-02 14:47:55 -05:00
parent 2914135e96
commit bf8d518c82
10 changed files with 639 additions and 1 deletions
--- a/6-rag-task-agent/rag-document-loader.py
+++ b/6-rag-task-agent/rag-document-loader.py
@@ -0,0 +1,35 @@
+from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
+from langchain_community.document_loaders import DirectoryLoader
+from langchain_text_splitters import CharacterTextSplitter
+from langchain_chroma import Chroma
+from dotenv import load_dotenv
+import os
+
+load_dotenv()
+
+rag_directory = os.getenv('DIRECTORY', 'meeting_notes')
+
+def load_documents(directory):
+    # Load the PDF or txt documents from the directory
+    loader = DirectoryLoader(directory)
+    documents = loader.load()
+
+    # Split the documents into chunks
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+    docs = text_splitter.split_documents(documents)
+
+    return docs
+
+def main():
+    # Get the documents split into chunks
+    docs = load_documents(rag_directory)
+
+    # Create the open-source embedding function
+    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+
+    # Load the documents into Chroma and save it to the disk
+    Chroma.from_documents(docs, embedding_function, persist_directory="./chroma_db")
+
+
+if __name__ == "__main__":
+    main()