Merge pull request #1062 from ManishMadan2882/1059-migrating-database-to-new-model

Migrating database to new model
This commit is contained in:
Alex
2024-09-09 21:01:04 +01:00
committed by GitHub
28 changed files with 376 additions and 474 deletions

View File

@@ -21,7 +21,7 @@ logger = logging.getLogger(__name__)
mongo = MongoClient(settings.MONGO_URI)
db = mongo["docsgpt"]
conversations_collection = db["conversations"]
vectors_collection = db["vectors"]
sources_collection = db["sources"]
prompts_collection = db["prompts"]
api_key_collection = db["api_keys"]
answer = Blueprint("answer", __name__)
@@ -77,40 +77,27 @@ def get_data_from_api_key(api_key):
if data is None:
raise Exception("Invalid API Key, please generate new key", 401)
if isinstance(data["source"], DBRef):
source_id = db.dereference(data["source"])["_id"]
data["source"] = get_source(source_id)
if "retriever" not in data:
data["retriever"] = None
if "source" in data and isinstance(data["source"], DBRef):
source_doc = db.dereference(data["source"])
data["source"] = str(source_doc["_id"])
if "retriever" in source_doc:
data["retriever"] = source_doc["retriever"]
else:
data["source"] = {}
return data
def get_source(active_doc):
if ObjectId.is_valid(active_doc):
doc = vectors_collection.find_one({"_id": ObjectId(active_doc)})
if doc is None:
raise Exception("Source document does not exist", 404)
print("res", doc)
source = {"active_docs": "/".join(doc["location"].split("/")[-2:])}
else:
source = {"active_docs": active_doc}
return source
def get_retriever(source_id: str):
doc = sources_collection.find_one({"_id": ObjectId(source_id)})
if doc is None:
raise Exception("Source document does not exist", 404)
retriever_name = None if "retriever" not in doc else doc["retriever"]
return retriever_name
def get_vectorstore(data):
if "active_docs" in data:
if data["active_docs"].split("/")[0] == "default":
vectorstore = ""
elif data["active_docs"].split("/")[0] == "local":
vectorstore = "indexes/" + data["active_docs"]
else:
vectorstore = "vectors/" + data["active_docs"]
if data["active_docs"] == "default":
vectorstore = ""
else:
vectorstore = ""
vectorstore = os.path.join("application", vectorstore)
return vectorstore
def is_azure_configured():
return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
@@ -244,28 +231,34 @@ def stream():
else:
token_limit = settings.DEFAULT_MAX_HISTORY
# check if active_docs or api_key is set
## retriever can be "brave_search, duckduck_search or classic"
retriever_name = data["retriever"] if "retriever" in data else "classic"
# check if active_docs or api_key is set
if "api_key" in data:
data_key = get_data_from_api_key(data["api_key"])
chunks = int(data_key["chunks"])
prompt_id = data_key["prompt_id"]
source = data_key["source"]
source = {"active_docs": data_key["source"]}
retriever_name = data_key["retriever"] or retriever_name
user_api_key = data["api_key"]
elif "active_docs" in data:
source = get_source(data["active_docs"])
source = {"active_docs" : data["active_docs"]}
retriever_name = get_retriever(data["active_docs"]) or retriever_name
user_api_key = None
else:
source = {}
user_api_key = None
if source["active_docs"].split("/")[0] == "default" or source["active_docs"].split("/")[0] == "local":
""" if source["active_docs"].split("/")[0] == "default" or source["active_docs"].split("/")[0] == "local":
retriever_name = "classic"
else:
retriever_name = source["active_docs"]
retriever_name = source["active_docs"] """
prompt = get_prompt(prompt_id)
retriever = RetrieverCreator.create_retriever(
retriever_name,
question=question,
@@ -341,6 +334,9 @@ def api_answer():
else:
token_limit = settings.DEFAULT_MAX_HISTORY
## retriever can be brave_search, duckduck_search or classic
retriever_name = data["retriever"] if "retriever" in data else "classic"
# use try and except to check for exception
try:
# check if the vectorstore is set
@@ -348,16 +344,16 @@ def api_answer():
data_key = get_data_from_api_key(data["api_key"])
chunks = int(data_key["chunks"])
prompt_id = data_key["prompt_id"]
source = data_key["source"]
source = {"active_docs": data_key["source"]}
retriever_name = data_key["retriever"] or retriever_name
user_api_key = data["api_key"]
else:
source = get_source(data["active_docs"])
elif "active_docs" in data:
source = {"active_docs":data["active_docs"]}
retriever_name = get_retriever(data["active_docs"]) or retriever_name
user_api_key = None
if source["active_docs"].split("/")[0] == "default" or source["active_docs"].split("/")[0] == "local":
retriever_name = "classic"
else:
retriever_name = source["active_docs"]
source = {}
user_api_key = None
prompt = get_prompt(prompt_id)
@@ -407,19 +403,19 @@ def api_search():
if "api_key" in data:
data_key = get_data_from_api_key(data["api_key"])
chunks = int(data_key["chunks"])
source = data_key["source"]
source = {"active_docs":data_key["source"]}
user_api_key = data_key["api_key"]
elif "active_docs" in data:
source = get_source(data["active_docs"])
source = {"active_docs":data["active_docs"]}
user_api_key = None
else:
source = {}
user_api_key = None
if source["active_docs"].split("/")[0] == "default" or source["active_docs"].split("/")[0] == "local":
retriever_name = "classic"
if "retriever" in data:
retriever_name = data["retriever"]
else:
retriever_name = source["active_docs"]
retriever_name = "classic"
if "token_limit" in data:
token_limit = data["token_limit"]
else:

View File

@@ -3,13 +3,13 @@ import datetime
from flask import Blueprint, request, send_from_directory
from pymongo import MongoClient
from werkzeug.utils import secure_filename
from bson.objectid import ObjectId
from application.core.settings import settings
mongo = MongoClient(settings.MONGO_URI)
db = mongo["docsgpt"]
conversations_collection = db["conversations"]
vectors_collection = db["vectors"]
sources_collection = db["sources"]
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -35,7 +35,12 @@ def upload_index_files():
return {"status": "no name"}
job_name = secure_filename(request.form["name"])
tokens = secure_filename(request.form["tokens"])
save_dir = os.path.join(current_dir, "indexes", user, job_name)
retriever = secure_filename(request.form["retriever"])
id = secure_filename(request.form["id"])
type = secure_filename(request.form["type"])
remote_data = secure_filename(request.form["remote_data"]) if "remote_data" in request.form else None
save_dir = os.path.join(current_dir, "indexes", str(id))
if settings.VECTOR_STORE == "faiss":
if "file_faiss" not in request.files:
print("No file part")
@@ -55,17 +60,19 @@ def upload_index_files():
os.makedirs(save_dir)
file_faiss.save(os.path.join(save_dir, "index.faiss"))
file_pkl.save(os.path.join(save_dir, "index.pkl"))
# create entry in vectors_collection
vectors_collection.insert_one(
# create entry in sources_collection
sources_collection.insert_one(
{
"_id": ObjectId(id),
"user": user,
"name": job_name,
"language": job_name,
"location": save_dir,
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
"model": settings.EMBEDDINGS_NAME,
"type": "local",
"tokens": tokens
"type": type,
"tokens": tokens,
"retriever": retriever,
"remote_data": remote_data
}
)
return {"status": "ok"}

View File

@@ -2,8 +2,6 @@ import os
import uuid
import shutil
from flask import Blueprint, request, jsonify
from urllib.parse import urlparse
import requests
from pymongo import MongoClient
from bson.objectid import ObjectId
from bson.binary import Binary, UuidRepresentation
@@ -17,7 +15,7 @@ from application.vectorstore.vector_creator import VectorCreator
mongo = MongoClient(settings.MONGO_URI)
db = mongo["docsgpt"]
conversations_collection = db["conversations"]
vectors_collection = db["vectors"]
sources_collection = db["sources"]
prompts_collection = db["prompts"]
feedback_collection = db["feedback"]
api_key_collection = db["api_keys"]
@@ -25,9 +23,7 @@ shared_conversations_collections = db["shared_conversations"]
user = Blueprint("user", __name__)
current_dir = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@user.route("/api/delete_conversation", methods=["POST"])
@@ -57,9 +53,7 @@ def get_conversations():
conversations = conversations_collection.find().sort("date", -1).limit(30)
list_conversations = []
for conversation in conversations:
list_conversations.append(
{"id": str(conversation["_id"]), "name": conversation["name"]}
)
list_conversations.append({"id": str(conversation["_id"]), "name": conversation["name"]})
# list_conversations = [{"id": "default", "name": "default"}, {"id": "jeff", "name": "jeff"}]
@@ -110,7 +104,7 @@ def delete_by_ids():
return {"status": "error"}
if settings.VECTOR_STORE == "faiss":
result = vectors_collection.delete_index(ids=ids)
result = sources_collection.delete_index(ids=ids)
if result:
return {"status": "ok"}
return {"status": "error"}
@@ -120,28 +114,24 @@ def delete_by_ids():
def delete_old():
"""Delete old indexes."""
import shutil
path = request.args.get("path")
dirs = path.split("/")
dirs_clean = []
for i in range(0, len(dirs)):
dirs_clean.append(secure_filename(dirs[i]))
# check that path strats with indexes or vectors
if dirs_clean[0] not in ["indexes", "vectors"]:
return {"status": "error"}
path_clean = "/".join(dirs_clean)
vectors_collection.delete_one({"name": dirs_clean[-1], "user": dirs_clean[-2]})
source_id = request.args.get("source_id")
doc = sources_collection.find_one({
"_id": ObjectId(source_id),
"user": "local",
})
if(doc is None):
return {"status":"not found"},404
if settings.VECTOR_STORE == "faiss":
try:
shutil.rmtree(os.path.join(current_dir, path_clean))
shutil.rmtree(os.path.join(current_dir, str(doc["_id"])))
except FileNotFoundError:
pass
else:
vetorstore = VectorCreator.create_vectorstore(
settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean)
)
vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, source_id=str(doc["_id"]))
vetorstore.delete_index()
sources_collection.delete_one({
"_id": ObjectId(source_id),
})
return {"status": "ok"}
@@ -175,9 +165,7 @@ def upload_file():
file.save(os.path.join(temp_dir, filename))
# Use shutil.make_archive to zip the temp directory
zip_path = shutil.make_archive(
base_name=os.path.join(save_dir, job_name), format="zip", root_dir=temp_dir
)
zip_path = shutil.make_archive(base_name=os.path.join(save_dir, job_name), format="zip", root_dir=temp_dir)
final_filename = os.path.basename(zip_path)
# Clean up the temporary directory after zipping
@@ -219,9 +207,7 @@ def upload_remote():
source_data = request.form["data"]
if source_data:
task = ingest_remote.delay(
source_data=source_data, job_name=job_name, user=user, loader=source
)
task = ingest_remote.delay(source_data=source_data, job_name=job_name, user=user, loader=source)
task_id = task.id
return {"status": "ok", "task_id": task_id}
else:
@@ -248,55 +234,36 @@ def combined_json():
data = [
{
"name": "default",
"language": "default",
"version": "",
"description": "default",
"fullName": "default",
"date": "default",
"docLink": "default",
"model": settings.EMBEDDINGS_NAME,
"location": "remote",
"tokens": "",
"retriever": "classic",
}
]
# structure: name, language, version, description, fullName, date, docLink
# append data from vectors_collection in sorted order in descending order of date
for index in vectors_collection.find({"user": user}).sort("date", -1):
# append data from sources_collection in sorted order in descending order of date
for index in sources_collection.find({"user": user}).sort("date", -1):
data.append(
{
"id":str(index["_id"]),
"id": str(index["_id"]),
"name": index["name"],
"language": index["language"],
"version": "",
"description": index["name"],
"fullName": index["name"],
"date": index["date"],
"docLink": index["location"],
"model": settings.EMBEDDINGS_NAME,
"location": "local",
"tokens": index["tokens"] if ("tokens" in index.keys()) else "",
"retriever": index["retriever"] if ("retriever" in index.keys()) else "classic",
}
)
if settings.VECTOR_STORE == "faiss":
data_remote = requests.get(
"https://d3dg1063dc54p9.cloudfront.net/combined.json"
).json()
for index in data_remote:
index["location"] = "remote"
data.append(index)
if "duckduck_search" in settings.RETRIEVERS_ENABLED:
data.append(
{
"name": "DuckDuckGo Search",
"language": "en",
"version": "",
"description": "duckduck_search",
"fullName": "DuckDuckGo Search",
"date": "duckduck_search",
"docLink": "duckduck_search",
"model": settings.EMBEDDINGS_NAME,
"location": "custom",
"tokens": "",
"retriever": "duckduck_search",
}
)
if "brave_search" in settings.RETRIEVERS_ENABLED:
@@ -304,14 +271,11 @@ def combined_json():
{
"name": "Brave Search",
"language": "en",
"version": "",
"description": "brave_search",
"fullName": "Brave Search",
"date": "brave_search",
"docLink": "brave_search",
"model": settings.EMBEDDINGS_NAME,
"location": "custom",
"tokens": "",
"retriever": "brave_search",
}
)
@@ -320,39 +284,13 @@ def combined_json():
@user.route("/api/docs_check", methods=["POST"])
def check_docs():
# check if docs exist in a vectorstore folder
data = request.get_json()
# split docs on / and take first part
if data["docs"].split("/")[0] == "local":
return {"status": "exists"}
vectorstore = "vectors/" + secure_filename(data["docs"])
base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/"
if os.path.exists(vectorstore) or data["docs"] == "default":
return {"status": "exists"}
else:
file_url = urlparse(base_path + vectorstore + "index.faiss")
if (
file_url.scheme in ["https"]
and file_url.netloc == "raw.githubusercontent.com"
and file_url.path.startswith("/arc53/DocsHUB/main/")
):
r = requests.get(file_url.geturl())
if r.status_code != 200:
return {"status": "null"}
else:
if not os.path.exists(vectorstore):
os.makedirs(vectorstore)
with open(vectorstore + "index.faiss", "wb") as f:
f.write(r.content)
r = requests.get(base_path + vectorstore + "index.pkl")
with open(vectorstore + "index.pkl", "wb") as f:
f.write(r.content)
else:
return {"status": "null"}
return {"status": "loaded"}
return {"status": "not found"}
@user.route("/api/create_prompt", methods=["POST"])
@@ -383,9 +321,7 @@ def get_prompts():
list_prompts.append({"id": "creative", "name": "creative", "type": "public"})
list_prompts.append({"id": "strict", "name": "strict", "type": "public"})
for prompt in prompts:
list_prompts.append(
{"id": str(prompt["_id"]), "name": prompt["name"], "type": "private"}
)
list_prompts.append({"id": str(prompt["_id"]), "name": prompt["name"], "type": "private"})
return jsonify(list_prompts)
@@ -394,21 +330,15 @@ def get_prompts():
def get_single_prompt():
prompt_id = request.args.get("id")
if prompt_id == "default":
with open(
os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r"
) as f:
with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
chat_combine_template = f.read()
return jsonify({"content": chat_combine_template})
elif prompt_id == "creative":
with open(
os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r"
) as f:
with open(os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r") as f:
chat_reduce_creative = f.read()
return jsonify({"content": chat_reduce_creative})
elif prompt_id == "strict":
with open(
os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r"
) as f:
with open(os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r") as f:
chat_reduce_strict = f.read()
return jsonify({"content": chat_reduce_strict})
@@ -437,9 +367,7 @@ def update_prompt_name():
# check if name is null
if name == "":
return {"status": "error"}
prompts_collection.update_one(
{"_id": ObjectId(id)}, {"$set": {"name": name, "content": content}}
)
prompts_collection.update_one({"_id": ObjectId(id)}, {"$set": {"name": name, "content": content}})
return {"status": "ok"}
@@ -449,12 +377,23 @@ def get_api_keys():
keys = api_key_collection.find({"user": user})
list_keys = []
for key in keys:
if "source" in key and isinstance(key["source"],DBRef):
source = db.dereference(key["source"])
if source is None:
continue
else:
source_name = source["name"]
elif "retriever" in key:
source_name = key["retriever"]
else:
continue
list_keys.append(
{
"id": str(key["_id"]),
"name": key["name"],
"key": key["key"][:4] + "..." + key["key"][-4:],
"source": str(key["source"]),
"source": source_name,
"prompt_id": key["prompt_id"],
"chunks": key["chunks"],
}
@@ -466,23 +405,22 @@ def get_api_keys():
def create_api_key():
data = request.get_json()
name = data["name"]
source = data["source"]
prompt_id = data["prompt_id"]
chunks = data["chunks"]
key = str(uuid.uuid4())
user = "local"
if(ObjectId.is_valid(data["source"])):
source = DBRef("vectors",ObjectId(data["source"]))
resp = api_key_collection.insert_one(
{
"name": name,
"key": key,
"source": source,
"user": user,
"prompt_id": prompt_id,
"chunks": chunks,
}
)
new_api_key = {
"name": name,
"key": key,
"user": user,
"prompt_id": prompt_id,
"chunks": chunks,
}
if "source" in data and ObjectId.is_valid(data["source"]):
new_api_key["source"] = DBRef("sources", ObjectId(data["source"]))
if "retriever" in data:
new_api_key["retriever"] = data["retriever"]
resp = api_key_collection.insert_one(new_api_key)
new_id = str(resp.inserted_id)
return {"id": new_id, "key": key}
@@ -509,36 +447,37 @@ def share_conversation():
conversation_id = data["conversation_id"]
isPromptable = request.args.get("isPromptable").lower() == "true"
conversation = conversations_collection.find_one(
{"_id": ObjectId(conversation_id)}
)
conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})
if(conversation is None):
raise Exception("Conversation does not exist")
current_n_queries = len(conversation["queries"])
##generate binary representation of uuid
explicit_binary = Binary.from_uuid(uuid.uuid4(), UuidRepresentation.STANDARD)
if isPromptable:
source = "default" if "source" not in data else data["source"]
prompt_id = "default" if "prompt_id" not in data else data["prompt_id"]
chunks = "2" if "chunks" not in data else data["chunks"]
name = conversation["name"] + "(shared)"
pre_existing_api_document = api_key_collection.find_one(
{
new_api_key_data = {
"prompt_id": prompt_id,
"chunks": chunks,
"source": DBRef("vectors",ObjectId(source)) if ObjectId.is_valid(source) else source,
"user": user,
}
if "source" in data and ObjectId.is_valid(data["source"]):
new_api_key_data["source"] = DBRef("sources",ObjectId(data["source"]))
elif "retriever" in data:
new_api_key_data["retriever"] = data["retriever"]
pre_existing_api_document = api_key_collection.find_one(
new_api_key_data
)
api_uuid = str(uuid.uuid4())
if pre_existing_api_document:
api_uuid = pre_existing_api_document["key"]
pre_existing = shared_conversations_collections.find_one(
{
"conversation_id": DBRef(
"conversations", ObjectId(conversation_id)
),
"conversation_id": DBRef("conversations", ObjectId(conversation_id)),
"isPromptable": isPromptable,
"first_n_queries": current_n_queries,
"user": user,
@@ -569,21 +508,18 @@ def share_conversation():
"api_key": api_uuid,
}
)
return jsonify(
{"success": True, "identifier": str(explicit_binary.as_uuid())}
)
return jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())})
else:
api_key_collection.insert_one(
{
"name": name,
"key": api_uuid,
"source": DBRef("vectors",ObjectId(source)) if ObjectId.is_valid(source) else source,
"user": user,
"prompt_id": prompt_id,
"chunks": chunks,
}
)
shared_conversations_collections.insert_one(
api_uuid = str(uuid.uuid4())
new_api_key_data["key"] = api_uuid
new_api_key_data["name"] = name
if "source" in data and ObjectId.is_valid(data["source"]):
new_api_key_data["source"] = DBRef("sources", ObjectId(data["source"]))
if "retriever" in data:
new_api_key_data["retriever"] = data["retriever"]
api_key_collection.insert_one(new_api_key_data)
shared_conversations_collections.insert_one(
{
"uuid": explicit_binary,
"conversation_id": {
@@ -595,12 +531,10 @@ def share_conversation():
"user": user,
"api_key": api_uuid,
}
)
)
## Identifier as route parameter in frontend
return (
jsonify(
{"success": True, "identifier": str(explicit_binary.as_uuid())}
),
jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())}),
201,
)
@@ -615,9 +549,7 @@ def share_conversation():
)
if pre_existing is not None:
return (
jsonify(
{"success": True, "identifier": str(pre_existing["uuid"].as_uuid())}
),
jsonify({"success": True, "identifier": str(pre_existing["uuid"].as_uuid())}),
200,
)
else:
@@ -635,9 +567,7 @@ def share_conversation():
)
## Identifier as route parameter in frontend
return (
jsonify(
{"success": True, "identifier": str(explicit_binary.as_uuid())}
),
jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())}),
201,
)
except Exception as err:
@@ -649,16 +579,10 @@ def share_conversation():
@user.route("/api/shared_conversation/<string:identifier>", methods=["GET"])
def get_publicly_shared_conversations(identifier: str):
try:
query_uuid = Binary.from_uuid(
uuid.UUID(identifier), UuidRepresentation.STANDARD
)
query_uuid = Binary.from_uuid(uuid.UUID(identifier), UuidRepresentation.STANDARD)
shared = shared_conversations_collections.find_one({"uuid": query_uuid})
conversation_queries = []
if (
shared
and "conversation_id" in shared
and isinstance(shared["conversation_id"], DBRef)
):
if shared and "conversation_id" in shared and isinstance(shared["conversation_id"], DBRef):
# Resolve the DBRef
conversation_ref = shared["conversation_id"]
conversation = db.dereference(conversation_ref)
@@ -672,9 +596,7 @@ def get_publicly_shared_conversations(identifier: str):
),
404,
)
conversation_queries = conversation["queries"][
: (shared["first_n_queries"])
]
conversation_queries = conversation["queries"][: (shared["first_n_queries"])]
for query in conversation_queries:
query.pop("sources") ## avoid exposing sources
else:

View File

@@ -11,12 +11,14 @@ from retry import retry
@retry(tries=10, delay=60)
def store_add_texts_with_retry(store, i):
def store_add_texts_with_retry(store, i, id):
# add source_id to the metadata
i.metadata["source_id"] = str(id)
store.add_texts([i.page_content], metadatas=[i.metadata])
# store_pine.add_texts([i.page_content], metadatas=[i.metadata])
def call_openai_api(docs, folder_name, task_status):
def call_openai_api(docs, folder_name, id, task_status):
# Function to create a vector store from the documents and save it to disk
if not os.path.exists(f"{folder_name}"):
@@ -32,13 +34,13 @@ def call_openai_api(docs, folder_name, task_status):
store = VectorCreator.create_vectorstore(
settings.VECTOR_STORE,
docs_init=docs_init,
path=f"{folder_name}",
source_id=f"{folder_name}",
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
)
else:
store = VectorCreator.create_vectorstore(
settings.VECTOR_STORE,
path=f"{folder_name}",
source_id=str(id),
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
)
# Uncomment for MPNet embeddings
@@ -57,7 +59,7 @@ def call_openai_api(docs, folder_name, task_status):
task_status.update_state(
state="PROGRESS", meta={"current": int((c1 / s1) * 100)}
)
store_add_texts_with_retry(store, i)
store_add_texts_with_retry(store, i, id)
except Exception as e:
print(e)
print("Error on ", i)

View File

@@ -1,4 +1,3 @@
import os
from application.retriever.base import BaseRetriever
from application.core.settings import settings
from application.vectorstore.vector_creator import VectorCreator
@@ -21,7 +20,7 @@ class ClassicRAG(BaseRetriever):
user_api_key=None,
):
self.question = question
self.vectorstore = self._get_vectorstore(source=source)
self.vectorstore = source['active_docs'] if 'active_docs' in source else None
self.chat_history = chat_history
self.prompt = prompt
self.chunks = chunks
@@ -38,21 +37,6 @@ class ClassicRAG(BaseRetriever):
)
self.user_api_key = user_api_key
def _get_vectorstore(self, source):
if "active_docs" in source:
if source["active_docs"].split("/")[0] == "default":
vectorstore = ""
elif source["active_docs"].split("/")[0] == "local":
vectorstore = "indexes/" + source["active_docs"]
else:
vectorstore = "vectors/" + source["active_docs"]
if source["active_docs"] == "default":
vectorstore = ""
else:
vectorstore = ""
vectorstore = os.path.join("application", vectorstore)
return vectorstore
def _get_data(self):
if self.chunks == 0:
docs = []

View File

@@ -9,9 +9,9 @@ import elasticsearch
class ElasticsearchStore(BaseVectorStore):
_es_connection = None # Class attribute to hold the Elasticsearch connection
def __init__(self, path, embeddings_key, index_name=settings.ELASTIC_INDEX):
def __init__(self, source_id, embeddings_key, index_name=settings.ELASTIC_INDEX):
super().__init__()
self.path = path.replace("application/indexes/", "").rstrip("/")
self.source_id = source_id.replace("application/indexes/", "").rstrip("/")
self.embeddings_key = embeddings_key
self.index_name = index_name
@@ -81,7 +81,7 @@ class ElasticsearchStore(BaseVectorStore):
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key)
vector = embeddings.embed_query(question)
knn = {
"filter": [{"match": {"metadata.store.keyword": self.path}}],
"filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
"field": "vector",
"k": k,
"num_candidates": 100,
@@ -100,7 +100,7 @@ class ElasticsearchStore(BaseVectorStore):
}
}
],
"filter": [{"match": {"metadata.store.keyword": self.path}}],
"filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
}
},
"rank": {"rrf": {}},
@@ -209,5 +209,4 @@ class ElasticsearchStore(BaseVectorStore):
def delete_index(self):
self._es_connection.delete_by_query(index=self.index_name, query={"match": {
"metadata.store.keyword": self.path}},)
"metadata.source_id.keyword": self.source_id}},)

View File

@@ -1,12 +1,22 @@
from langchain_community.vectorstores import FAISS
from application.vectorstore.base import BaseVectorStore
from application.core.settings import settings
import os
def get_vectorstore(path):
if path:
vectorstore = "indexes/"+path
vectorstore = os.path.join("application", vectorstore)
else:
vectorstore = os.path.join("application")
return vectorstore
class FaissStore(BaseVectorStore):
def __init__(self, path, embeddings_key, docs_init=None):
def __init__(self, source_id, embeddings_key, docs_init=None):
super().__init__()
self.path = path
self.path = get_vectorstore(source_id)
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
if docs_init:
self.docsearch = FAISS.from_documents(

View File

@@ -5,7 +5,7 @@ from application.vectorstore.document_class import Document
class MongoDBVectorStore(BaseVectorStore):
def __init__(
self,
path: str = "",
source_id: str = "",
embeddings_key: str = "embeddings",
collection: str = "documents",
index_name: str = "vector_search_index",
@@ -18,7 +18,7 @@ class MongoDBVectorStore(BaseVectorStore):
self._embedding_key = embedding_key
self._embeddings_key = embeddings_key
self._mongo_uri = settings.MONGO_URI
self._path = path.replace("application/indexes/", "").rstrip("/")
self._source_id = source_id.replace("application/indexes/", "").rstrip("/")
self._embedding = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
try:
@@ -46,7 +46,7 @@ class MongoDBVectorStore(BaseVectorStore):
"numCandidates": k * 10,
"index": self._index_name,
"filter": {
"store": {"$eq": self._path}
"source_id": {"$eq": self._source_id}
}
}
}
@@ -123,4 +123,4 @@ class MongoDBVectorStore(BaseVectorStore):
return result_ids
def delete_index(self, *args, **kwargs):
self._collection.delete_many({"store": self._path})
self._collection.delete_many({"source_id": self._source_id})

View File

@@ -5,12 +5,12 @@ from qdrant_client import models
class QdrantStore(BaseVectorStore):
def __init__(self, path: str = "", embeddings_key: str = "embeddings"):
def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"):
self._filter = models.Filter(
must=[
models.FieldCondition(
key="metadata.store",
match=models.MatchValue(value=path.replace("application/indexes/", "").rstrip("/")),
key="metadata.source_id",
match=models.MatchValue(value=source_id.replace("application/indexes/", "").rstrip("/")),
)
]
)

View File

@@ -6,6 +6,7 @@ import tiktoken
from urllib.parse import urljoin
import requests
from bson.objectid import ObjectId
from application.core.settings import settings
from application.parser.file.bulk import SimpleDirectoryReader
@@ -14,10 +15,10 @@ from application.parser.open_ai_func import call_openai_api
from application.parser.schema.base import Document
from application.parser.token_func import group_split
# Define a function to extract metadata from a given filename.
def metadata_from_filename(title):
store = "/".join(title.split("/")[1:3])
return {"title": title, "store": store}
return {"title": title}
# Define a function to generate a random string of a given length.
@@ -25,9 +26,7 @@ def generate_random_string(length):
return "".join([string.ascii_letters[i % 52] for i in range(length)])
current_dir = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
@@ -58,7 +57,7 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
# Define the main function for ingesting and processing documents.
def ingest_worker(self, directory, formats, name_job, filename, user):
def ingest_worker(self, directory, formats, name_job, filename, user, retriever="classic"):
"""
Ingest and process documents.
@@ -69,6 +68,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
name_job (str): Name of the job for this ingestion task.
filename (str): Name of the file to be ingested.
user (str): Identifier for the user initiating the ingestion.
retriever (str): Type of retriever to use for processing the documents.
Returns:
dict: Information about the completed ingestion task, including input parameters and a "limited" flag.
@@ -93,9 +93,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
print(full_path, file=sys.stderr)
# check if API_URL env variable is set
file_data = {"name": name_job, "file": filename, "user": user}
response = requests.get(
urljoin(settings.API_URL, "/api/download"), params=file_data
)
response = requests.get(urljoin(settings.API_URL, "/api/download"), params=file_data)
# check if file is in the response
print(response, file=sys.stderr)
file = response.content
@@ -107,9 +105,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
# check if file is .zip and extract it
if filename.endswith(".zip"):
extract_zip_recursive(
os.path.join(full_path, filename), full_path, 0, recursion_depth
)
extract_zip_recursive(os.path.join(full_path, filename), full_path, 0, recursion_depth)
self.update_state(state="PROGRESS", meta={"current": 1})
@@ -130,8 +126,9 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
)
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
id = ObjectId()
call_openai_api(docs, full_path, self)
call_openai_api(docs, full_path, id, self)
tokens = count_tokens_docs(docs)
self.update_state(state="PROGRESS", meta={"current": 100})
@@ -141,22 +138,15 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
# and send them to the server (provide user and name in form)
file_data = {"name": name_job, "user": user, "tokens":tokens}
file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever, "id": str(id), 'type': 'local'}
if settings.VECTOR_STORE == "faiss":
files = {
"file_faiss": open(full_path + "/index.faiss", "rb"),
"file_pkl": open(full_path + "/index.pkl", "rb"),
}
response = requests.post(
urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
)
response = requests.get(
urljoin(settings.API_URL, "/api/delete_old?path=" + full_path)
)
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
else:
response = requests.post(
urljoin(settings.API_URL, "/api/upload_index"), data=file_data
)
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
# delete local
shutil.rmtree(full_path)
@@ -171,7 +161,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
}
def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
def remote_worker(self, source_data, name_job, user, loader, directory="temp", retriever="classic"):
token_check = True
min_tokens = 150
max_tokens = 1250
@@ -191,22 +181,21 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
token_check=token_check,
)
# docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
call_openai_api(docs, full_path, self)
tokens = count_tokens_docs(docs)
id = ObjectId()
call_openai_api(docs, full_path, id, self)
self.update_state(state="PROGRESS", meta={"current": 100})
# Proceed with uploading and cleaning as in the original function
file_data = {"name": name_job, "user": user, "tokens":tokens}
file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever,
"id": str(id), 'type': loader, 'remote_data': source_data}
if settings.VECTOR_STORE == "faiss":
files = {
"file_faiss": open(full_path + "/index.faiss", "rb"),
"file_pkl": open(full_path + "/index.pkl", "rb"),
}
requests.post(
urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
)
requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
else:
requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
@@ -222,9 +211,7 @@ def count_tokens_docs(docs):
for doc in docs:
docs_content += doc.page_content
tokens, total_price = num_tokens_from_string(
string=docs_content, encoding_name="cl100k_base"
)
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
return tokens
@@ -234,4 +221,4 @@ def num_tokens_from_string(string: str, encoding_name: str) -> int:
encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string))
total_price = (num_tokens / 1000) * 0.0004
return num_tokens, total_price
return num_tokens, total_price

View File

@@ -23,9 +23,9 @@ import {
import ConversationTile from './conversation/ConversationTile';
import { useDarkTheme, useMediaQuery, useOutsideAlerter } from './hooks';
import DeleteConvModal from './modals/DeleteConvModal';
import { ActiveState } from './models/misc';
import { ActiveState, Doc } from './models/misc';
import APIKeyModal from './preferences/APIKeyModal';
import { Doc, getConversations, getDocs } from './preferences/preferenceApi';
import { getConversations, getDocs } from './preferences/preferenceApi';
import {
selectApiKeyStatus,
selectConversationId,
@@ -124,10 +124,8 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
};
const handleDeleteClick = (doc: Doc) => {
const docPath = `indexes/local/${doc.name}`;
userService
.deletePath(docPath)
.deletePath(doc.id ?? '')
.then(() => {
return getDocs();
})

View File

@@ -10,7 +10,7 @@ const endpoints = {
DELETE_PROMPT: '/api/delete_prompt',
UPDATE_PROMPT: '/api/update_prompt',
SINGLE_PROMPT: (id: string) => `/api/get_single_prompt?id=${id}`,
DELETE_PATH: (docPath: string) => `/api/delete_old?path=${docPath}`,
DELETE_PATH: (docPath: string) => `/api/delete_old?source_id=${docPath}`,
TASK_STATUS: (task_id: string) => `/api/task_status?task_id=${task_id}`,
},
CONVERSATION: {

View File

@@ -26,6 +26,7 @@ function Dropdown({
| string
| { label: string; value: string }
| { value: number; description: string }
| { name: string; id: string; type: string }
| null;
onSelect:
| ((value: string) => void)
@@ -96,6 +97,10 @@ function Dropdown({
? selectedValue.value + ` (${selectedValue.description})`
: selectedValue.description
}`
: selectedValue &&
'name' in selectedValue &&
'id' in selectedValue
? `${selectedValue.name}`
: placeholder
? placeholder
: 'From URL'}

View File

@@ -1,7 +1,7 @@
import React from 'react';
import Trash from '../assets/trash.svg';
import Arrow2 from '../assets/dropdown-arrow.svg';
import { Doc } from '../preferences/preferenceApi';
import { Doc } from '../models/misc';
import { useDispatch } from 'react-redux';
import { useTranslation } from 'react-i18next';
type Props = {
@@ -63,9 +63,6 @@ function SourceDropdown({
<p className="max-w-3/4 truncate whitespace-nowrap">
{selectedDocs?.name || 'None'}
</p>
<p className="flex flex-col items-center justify-center">
{selectedDocs?.version}
</p>
</div>
</span>
<img

View File

@@ -1,32 +1,6 @@
import conversationService from '../api/services/conversationService';
import { Doc } from '../preferences/preferenceApi';
import { Answer, FEEDBACK } from './conversationModels';
function getDocPath(selectedDocs: Doc | null): string {
let docPath = 'default';
if (selectedDocs) {
let namePath = selectedDocs.name;
if (selectedDocs.language === namePath) {
namePath = '.project';
}
if (selectedDocs.location === 'local') {
docPath = 'local' + '/' + selectedDocs.name + '/';
} else if (selectedDocs.location === 'remote') {
docPath =
selectedDocs.language +
'/' +
namePath +
'/' +
selectedDocs.version +
'/' +
selectedDocs.model +
'/';
} else if (selectedDocs.location === 'custom') {
docPath = selectedDocs.docLink;
}
}
return docPath;
}
import { Doc } from '../models/misc';
import { Answer, FEEDBACK, RetrievalPayload } from './conversationModels';
export function handleFetchAnswer(
question: string,
@@ -54,23 +28,22 @@ export function handleFetchAnswer(
title: any;
}
> {
const docPath = getDocPath(selectedDocs);
history = history.map((item) => {
return { prompt: item.prompt, response: item.response };
});
const payload: RetrievalPayload = {
question: question,
history: JSON.stringify(history),
conversation_id: conversationId,
prompt_id: promptId,
chunks: chunks,
token_limit: token_limit,
};
if (selectedDocs && 'id' in selectedDocs)
payload.active_docs = selectedDocs.id as string;
payload.retriever = selectedDocs?.retriever as string;
return conversationService
.answer(
{
question: question,
history: history,
active_docs: docPath,
conversation_id: conversationId,
prompt_id: promptId,
chunks: chunks,
token_limit: token_limit,
},
signal,
)
.answer(payload, signal)
.then((response) => {
if (response.ok) {
return response.json();
@@ -101,24 +74,24 @@ export function handleFetchAnswerSteaming(
token_limit: number,
onEvent: (event: MessageEvent) => void,
): Promise<Answer> {
const docPath = getDocPath(selectedDocs);
history = history.map((item) => {
return { prompt: item.prompt, response: item.response };
});
const payload: RetrievalPayload = {
question: question,
history: JSON.stringify(history),
conversation_id: conversationId,
prompt_id: promptId,
chunks: chunks,
token_limit: token_limit,
};
if (selectedDocs && 'id' in selectedDocs)
payload.active_docs = selectedDocs.id as string;
payload.retriever = selectedDocs?.retriever as string;
return new Promise<Answer>((resolve, reject) => {
conversationService
.answerStream(
{
question: question,
active_docs: docPath,
history: JSON.stringify(history),
conversation_id: conversationId,
prompt_id: promptId,
chunks: chunks,
token_limit: token_limit,
},
signal,
)
.answerStream(payload, signal)
.then((response) => {
if (!response.body) throw Error('No response body');
@@ -175,16 +148,21 @@ export function handleSearch(
chunks: string,
token_limit: number,
) {
const docPath = getDocPath(selectedDocs);
history = history.map((item) => {
return { prompt: item.prompt, response: item.response };
});
const payload: RetrievalPayload = {
question: question,
history: JSON.stringify(history),
conversation_id: conversation_id,
chunks: chunks,
token_limit: token_limit,
};
if (selectedDocs && 'id' in selectedDocs)
payload.active_docs = selectedDocs.id as string;
payload.retriever = selectedDocs?.retriever as string;
return conversationService
.search({
question: question,
active_docs: docPath,
conversation_id,
history,
chunks: chunks,
token_limit: token_limit,
})
.search(payload)
.then((response) => response.json())
.then((data) => {
return data;

View File

@@ -31,3 +31,13 @@ export interface Query {
conversationId?: string | null;
title?: string | null;
}
export interface RetrievalPayload {
question: string;
active_docs?: string;
retriever?: string;
history: string;
conversation_id: string | null;
prompt_id?: string | null;
chunks: string;
token_limit: number;
}

View File

@@ -22,8 +22,9 @@ export default function CreateAPIKeyModal({
const [APIKeyName, setAPIKeyName] = React.useState<string>('');
const [sourcePath, setSourcePath] = React.useState<{
label: string;
value: string;
name: string;
id: string;
type: string;
} | null>(null);
const [prompt, setPrompt] = React.useState<{
name: string;
@@ -41,27 +42,17 @@ export default function CreateAPIKeyModal({
? docs
.filter((doc) => doc.model === embeddingsName)
.map((doc: Doc) => {
let namePath = doc.name;
if (doc.language === namePath) {
namePath = '.project';
}
let docPath = 'default';
if (doc.location === 'local') {
docPath = 'local' + '/' + doc.name + '/';
} else if (doc.location === 'remote') {
docPath =
doc.language +
'/' +
namePath +
'/' +
doc.version +
'/' +
doc.model +
'/';
if ('id' in doc) {
return {
name: doc.name,
id: doc.id as string,
type: 'local',
};
}
return {
label: doc.name,
value: docPath,
name: doc.name,
id: doc.id ?? 'default',
type: doc.type ?? 'default',
};
})
: [];
@@ -107,9 +98,14 @@ export default function CreateAPIKeyModal({
<Dropdown
placeholder={t('modals.createAPIKey.sourceDoc')}
selectedValue={sourcePath}
onSelect={(selection: { label: string; value: string }) =>
setSourcePath(selection)
}
onSelect={(selection: {
name: string;
id: string;
type: string;
}) => {
setSourcePath(selection);
console.log(selection);
}}
options={extractDocPaths()}
size="w-full"
rounded="xl"
@@ -142,16 +138,22 @@ export default function CreateAPIKeyModal({
</div>
<button
disabled={!sourcePath || APIKeyName.length === 0 || !prompt}
onClick={() =>
sourcePath &&
prompt &&
createAPIKey({
name: APIKeyName,
source: sourcePath.value,
prompt_id: prompt.id,
chunks: chunk,
})
}
onClick={() => {
if (sourcePath && prompt) {
const payload: any = {
name: APIKeyName,
prompt_id: prompt.id,
chunks: chunk,
};
if (sourcePath.type === 'default') {
payload.retriever = sourcePath.id;
}
if (sourcePath.type === 'local') {
payload.source = sourcePath.id;
}
createAPIKey(payload);
}
}}
className="float-right mt-4 rounded-full bg-purple-30 px-5 py-2 text-sm text-white hover:bg-[#6F3FD1] disabled:opacity-50"
>
{t('modals.createAPIKey.create')}

View File

@@ -46,27 +46,9 @@ export const ShareConversationModal = ({
? docs
.filter((doc) => doc.model === embeddingsName)
.map((doc: Doc) => {
let namePath = doc.name;
if (doc.language === namePath) {
namePath = '.project';
}
let docPath = 'default';
if (doc.location === 'local') {
docPath = 'local' + '/' + doc.name + '/';
} else if (doc.location === 'remote') {
docPath =
doc.language +
'/' +
namePath +
'/' +
doc.version +
'/' +
doc.model +
'/';
}
return {
label: doc.name,
value: docPath,
value: doc.id ?? 'default',
};
})
: [];

View File

@@ -4,16 +4,13 @@ export type User = {
avatar: string;
};
export type Doc = {
location: string;
id?: string;
name: string;
language: string;
version: string;
description: string;
fullName: string;
date: string;
docLink: string;
model: string;
tokens?: string;
type?: string;
retriever?: string;
};
export type PromptProps = {

View File

@@ -8,7 +8,8 @@ import {
selectSourceDocs,
selectSelectedDocs,
} from './preferenceSlice';
import { getDocs, Doc } from './preferenceApi';
import { Doc } from '../models/misc';
import { getDocs } from './preferenceApi';
export default function APIKeyModal({
modalState,
@@ -75,9 +76,7 @@ export default function APIKeyModal({
{!localSelectedDocs ? (
<p className="py-3 text-gray-500">Select</p>
) : (
<p className="py-3">
{localSelectedDocs.name} {localSelectedDocs.version}
</p>
<p className="py-3">{localSelectedDocs.name}</p>
)}
</div>
{isDocsListOpen && (
@@ -94,9 +93,7 @@ export default function APIKeyModal({
}}
className="h-10 w-full cursor-pointer border-x-2 border-b-2 hover:bg-gray-100"
>
<p className="ml-5 py-3">
{doc.name} {doc.version}
</p>
<p className="ml-5 py-3">{doc.name}</p>
</div>
);
}

View File

@@ -1,18 +1,6 @@
import conversationService from '../api/services/conversationService';
import userService from '../api/services/userService';
// not all properties in Doc are going to be present. Make some optional
export type Doc = {
location: string;
name: string;
language: string;
version: string;
description: string;
fullName: string;
date: string;
docLink: string;
model: string;
};
import { Doc } from '../models/misc';
//Fetches all JSON objects from the source. We only use the objects with the "model" property in SelectDocsModal.tsx. Hopefully can clean up the source file later.
export async function getDocs(): Promise<Doc[] | null> {
@@ -78,17 +66,10 @@ export function setLocalPrompt(prompt: string): void {
export function setLocalRecentDocs(doc: Doc): void {
localStorage.setItem('DocsGPTRecentDocs', JSON.stringify(doc));
let namePath = doc.name;
if (doc.language === namePath) {
namePath = '.project';
}
let docPath = 'default';
if (doc.location === 'local') {
if (doc.type === 'local') {
docPath = 'local' + '/' + doc.name + '/';
} else if (doc.location === 'remote') {
docPath =
doc.language + '/' + namePath + '/' + doc.version + '/' + doc.model + '/';
}
userService
.checkDocs({

View File

@@ -4,9 +4,9 @@ import {
createSlice,
isAnyOf,
} from '@reduxjs/toolkit';
import { Doc, setLocalApiKey, setLocalRecentDocs } from './preferenceApi';
import { setLocalApiKey, setLocalRecentDocs } from './preferenceApi';
import { RootState } from '../store';
import { ActiveState } from '../models/misc';
import { ActiveState, Doc } from '../models/misc';
interface Preference {
apiKey: string;
@@ -25,15 +25,13 @@ const initialState: Preference = {
chunks: '2',
token_limit: 2000,
selectedDocs: {
id: 'default',
name: 'default',
language: 'default',
location: 'default',
version: 'default',
description: 'default',
fullName: 'default',
type: 'remote',
date: 'default',
docLink: 'default',
model: 'openai_text-embedding-ada-002',
retriever: 'classic',
} as Doc,
sourceDocs: null,
conversations: null,

View File

@@ -48,7 +48,8 @@ export default function APIKeys() {
const handleCreateKey = (payload: {
name: string;
source: string;
source?: string;
retriever?: string;
prompt_id: string;
chunks: string;
}) => {

View File

@@ -61,12 +61,10 @@ const Documents: React.FC<DocumentsProps> = ({
{document.tokens ? formatTokens(+document.tokens) : ''}
</td>
<td className="border-r border-t px-4 py-2">
{document.location === 'remote'
? 'Pre-loaded'
: 'Private'}
{document.type === 'remote' ? 'Pre-loaded' : 'Private'}
</td>
<td className="border-t px-4 py-2">
{document.location !== 'remote' && (
{document.type !== 'remote' && (
<img
src={Trash}
alt="Delete"

View File

@@ -6,7 +6,7 @@ import userService from '../api/services/userService';
import ArrowLeft from '../assets/arrow-left.svg';
import ArrowRight from '../assets/arrow-right.svg';
import i18n from '../locale/i18n';
import { Doc } from '../preferences/preferenceApi';
import { Doc } from '../models/misc';
import {
selectSourceDocs,
setSourceDocs,
@@ -35,9 +35,8 @@ export default function Settings() {
};
const handleDeleteClick = (index: number, doc: Doc) => {
const docPath = 'indexes/' + 'local' + '/' + doc.name;
userService
.deletePath(docPath)
.deletePath(doc.id ?? '')
.then((response) => {
if (response.ok && documents) {
const updatedDocuments = [

View File

@@ -26,15 +26,12 @@ const store = configureStore({
conversations: null,
sourceDocs: [
{
location: '',
language: '',
name: 'default',
version: '',
date: '',
description: '',
docLink: '',
fullName: '',
model: '1.0',
type: 'remote',
id: 'default',
retriever: 'clasic',
},
],
modalState: 'INACTIVE',

View File

@@ -120,7 +120,7 @@ function Upload({
dispatch(setSourceDocs(data));
dispatch(
setSelectedDocs(
data?.find((d) => d.location.toLowerCase() === 'local'),
data?.find((d) => d.type?.toLowerCase() === 'local'),
),
);
});
@@ -137,7 +137,7 @@ function Upload({
dispatch(setSourceDocs(data));
dispatch(
setSelectedDocs(
data?.find((d) => d.location.toLowerCase() === 'local'),
data?.find((d) => d.type?.toLowerCase() === 'local'),
),
);
});

View File

@@ -0,0 +1,55 @@
import pymongo
import os
def migrate_to_v1_vectorstore_mongo():
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["docsgpt"]
vectors_collection = db["vectors"]
sources_collection = db["sources"]
for vector in vectors_collection.find():
if "location" in vector:
del vector["location"]
if "retriever" not in vector:
vector["retriever"] = "classic"
vector["remote_data"] = None
vectors_collection.update_one({"_id": vector["_id"]}, {"$set": vector})
# move data from vectors_collection to sources_collection
for vector in vectors_collection.find():
sources_collection.insert_one(vector)
vectors_collection.drop()
client.close()
def migrate_faiss_to_v1_vectorstore():
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["docsgpt"]
vectors_collection = db["vectors"]
for vector in vectors_collection.find():
old_path = f"./application/indexes/{vector['user']}/{vector['name']}"
new_path = f"./application/indexes/{vector['_id']}"
try:
os.rename(old_path, new_path)
except OSError as e:
print(f"Error moving {old_path} to {new_path}: {e}")
client.close()
def migrate_mongo_atlas_vector_to_v1_vectorstore():
client = pymongo.MongoClient("mongodb+srv://<username>:<password>@<cluster>/<dbname>?retryWrites=true&w=majority")
db = client["docsgpt"]
vectors_collection = db["vectors"]
# mongodb atlas collection
documents_collection = db["documents"]
for vector in vectors_collection.find():
documents_collection.update_many({"store": vector["user"] + "/" + vector["name"]}, {"$set": {"source_id": str(vector["_id"])}})
client.close()
migrate_faiss_to_v1_vectorstore()
migrate_to_v1_vectorstore_mongo()