From 20a0800aa73e8b6749723e5e759f04a052a15874 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 13 Mar 2023 17:37:01 +0000 Subject: [PATCH] Create test_ingestion.py --- scripts/test_ingestion.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 scripts/test_ingestion.py diff --git a/scripts/test_ingestion.py b/scripts/test_ingestion.py new file mode 100644 index 00000000..e4aad822 --- /dev/null +++ b/scripts/test_ingestion.py @@ -0,0 +1,19 @@ +import os + +import dotenv +import tiktoken +from langchain import FAISS +from langchain.embeddings import OpenAIEmbeddings + +dotenv.load_dotenv() +embeddings_key = os.getenv("API_KEY") +docsearch = FAISS.load_local('outputs/inputs', OpenAIEmbeddings(openai_api_key=embeddings_key)) + +d1 = docsearch.similarity_search("Whats new in 1.5.3?") +print(d1) +print("=====================================") +print("=====================================") +for i in d1: + print("docs length (tokens)") + doc_len = len(tiktoken.get_encoding("cl100k_base").encode(i.page_content)) + print(doc_len)