mirror of
https://github.com/coleam00/ai-agents-masterclass.git
synced 2026-01-19 21:40:32 +00:00
Korvus RAG testing... cool example of using a tool for easy RAG with Python
This commit is contained in:
4
korvus-simple-rag/.env.example
Normal file
4
korvus-simple-rag/.env.example
Normal file
@@ -0,0 +1,4 @@
|
||||
# Rename this file to .env once you have filled in the below environment variables!
|
||||
|
||||
# PostgresML database connection string. See Korvus documentation for getting this set up.
|
||||
KORVUS_DATABASE_URL=postgres://user:password@sql.cloud.postgresml.org:6432/korvus_database
|
||||
61
korvus-simple-rag/korvus_rag.py
Normal file
61
korvus-simple-rag/korvus_rag.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from korvus import Collection, Pipeline
|
||||
from datasets import load_dataset
|
||||
from time import time
|
||||
from dotenv import load_dotenv
|
||||
from rich.console import Console
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
|
||||
async def main():
|
||||
load_dotenv()
|
||||
os.environ["RUST_BACKTRACE"] = "1"
|
||||
console = Console()
|
||||
|
||||
# Initialize collection
|
||||
collection = Collection("squad")
|
||||
|
||||
# Create and add pipeline
|
||||
pipeline = Pipeline(
|
||||
"squadv1",
|
||||
{
|
||||
"text": {
|
||||
"splitter": {"model": "recursive_character"},
|
||||
"semantic_search": {"model": "intfloat/e5-small-v2"},
|
||||
}
|
||||
},
|
||||
)
|
||||
await collection.add_pipeline(pipeline)
|
||||
|
||||
# Prep documents for upserting
|
||||
data = load_dataset("squad", split="train")
|
||||
data = data.to_pandas()
|
||||
data = data.drop_duplicates(subset=["context"])
|
||||
documents = [
|
||||
{"id": r["id"], "text": r["context"], "title": r["title"]}
|
||||
for r in data.to_dict(orient="records")
|
||||
]
|
||||
|
||||
print(len(documents))
|
||||
|
||||
# Upsert documents
|
||||
await collection.upsert_documents(documents[:200])
|
||||
|
||||
# Query for answer
|
||||
query = "Who won more than 20 grammy awards?"
|
||||
console.print("Querying for context ...")
|
||||
start = time()
|
||||
results = await collection.vector_search(
|
||||
{"query": {"fields": {"text": {"query": query}}}, "limit": 5}, pipeline
|
||||
)
|
||||
end = time()
|
||||
console.print("\n Results for '%s' " % (query), style="bold")
|
||||
console.print(results)
|
||||
console.print("Query time = %0.3f" % (end - start))
|
||||
|
||||
# Archive collection
|
||||
await collection.archive()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
4
korvus-simple-rag/requirements.txt
Normal file
4
korvus-simple-rag/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
korvus==1.1.2
|
||||
asyncio==3.4.3
|
||||
python-dotenv==0.13.0
|
||||
datasets==3.0.0
|
||||
Reference in New Issue
Block a user