This commit is contained in:
Anton Larin
2023-06-03 11:01:50 +02:00
parent dd9f1abcea
commit 6d5b698c39
2 changed files with 17 additions and 2 deletions

View File

@@ -1,2 +1,8 @@
OPENAI_API_KEY=<LLM api key (for example, open ai key)> OPENAI_API_KEY=<LLM api key (for example, open ai key)>
EMBEDDINGS_KEY=<LLM embeddings api key (for example, open ai key)> EMBEDDINGS_KEY=<LLM embeddings api key (for example, open ai key)>
# Azure
OPENAI_API_BASE=
OPENAI_API_VERSION=
AZURE_DEPLOYMENT_NAME=
AZURE_EMBEDDINGS_DEPLOYMENT_NAME=

View File

@@ -11,7 +11,7 @@ from retry import retry
# from langchain.embeddings import CohereEmbeddings # from langchain.embeddings import CohereEmbeddings
def num_tokens_from_string(string: str, encoding_name: str) -> int: def num_tokens_from_string(string: str, encoding_name: str) -> tuple[int, float]:
# Function to convert string to tokens and estimate user cost. # Function to convert string to tokens and estimate user cost.
encoding = tiktoken.get_encoding(encoding_name) encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string)) num_tokens = len(encoding.encode(string))
@@ -45,7 +45,16 @@ def call_openai_api(docs, folder_name):
# environment="us-east1-gcp" # next to api key in console # environment="us-east1-gcp" # next to api key in console
# ) # )
# index_name = "pandas" # index_name = "pandas"
store = FAISS.from_documents(docs_test, OpenAIEmbeddings()) if ( # azure
os.environ.get("OPENAI_API_BASE")
and os.environ.get("OPENAI_API_VERSION")
and os.environ.get("AZURE_DEPLOYMENT_NAME")
):
os.environ["OPENAI_API_TYPE"] = "azure"
openai_embeddings = OpenAIEmbeddings(model=os.environ.get("AZURE_EMBEDDINGS_DEPLOYMENT_NAME"))
else:
openai_embeddings = OpenAIEmbeddings()
store = FAISS.from_documents(docs_test, openai_embeddings)
# store_pine = Pinecone.from_documents(docs_test, OpenAIEmbeddings(), index_name=index_name) # store_pine = Pinecone.from_documents(docs_test, OpenAIEmbeddings(), index_name=index_name)
# Uncomment for MPNet embeddings # Uncomment for MPNet embeddings