diff --git a/README.md b/README.md index eeecb598..99baf811 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,7 @@ docker compose -f docker-compose-dev.yaml up -d > Make sure you have Python 3.10 or 3.11 installed. 1. Export required environment variables or prepare a `.env` file in the project folder: - - Copy [.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) and create `.env`. + - Copy [.env-template](https://github.com/arc53/DocsGPT/blob/main/application/.env-template) and create `.env`. (check out [`application/core/settings.py`](application/core/settings.py) if you want to see more config options.) diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py index 182cdf2b..f109db26 100644 --- a/application/api/answer/routes.py +++ b/application/api/answer/routes.py @@ -241,6 +241,7 @@ def complete_stream( yield f"data: {data}\n\n" except Exception as e: print("\033[91merr", str(e), file=sys.stderr) + traceback.print_exc() data = json.dumps( { "type": "error", diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 8e62683e..6a2f3bea 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -2,11 +2,12 @@ import datetime import os import shutil import uuid +import math from bson.binary import Binary, UuidRepresentation from bson.dbref import DBRef from bson.objectid import ObjectId -from flask import Blueprint, jsonify, make_response, request +from flask import Blueprint, jsonify, make_response, request, redirect from flask_restx import inputs, fields, Namespace, Resource from werkzeug.utils import secure_filename @@ -315,7 +316,7 @@ class UploadFile(Resource): for file in files: filename = secure_filename(file.filename) file.save(os.path.join(temp_dir, filename)) - + print(f"Saved file: {filename}") zip_path = shutil.make_archive( base_name=os.path.join(save_dir, job_name), format="zip", @@ -323,6 +324,26 @@ class UploadFile(Resource): ) final_filename = os.path.basename(zip_path) shutil.rmtree(temp_dir) + task = ingest.delay( + settings.UPLOAD_FOLDER, + [ + ".rst", + ".md", + ".pdf", + ".txt", + ".docx", + ".csv", + ".epub", + ".html", + ".mdx", + ".json", + ".xlsx", + ".pptx", + ], + job_name, + final_filename, + user, + ) else: file = files[0] final_filename = secure_filename(file.filename) @@ -349,9 +370,10 @@ class UploadFile(Resource): final_filename, user, ) - except Exception as err: - return make_response(jsonify({"success": False, "error": str(err)}), 400) + except Exception as err: + print(f"Error: {err}") + return make_response(jsonify({"success": False, "error": str(err)}), 400) return make_response(jsonify({"success": True, "task_id": task.id}), 200) @@ -422,6 +444,11 @@ class TaskStatus(Resource): task = celery.AsyncResult(task_id) task_meta = task.info + print(f"Task status: {task.status}") + if not isinstance( + task_meta, (dict, list, str, int, float, bool, type(None)) + ): + task_meta = str(task_meta) # Convert to a string representation except Exception as err: return make_response(jsonify({"success": False, "error": str(err)}), 400) @@ -429,12 +456,70 @@ class TaskStatus(Resource): @user_ns.route("/api/combine") +class RedirectToSources(Resource): + @api.doc( + description="Redirects /api/combine to /api/sources for backward compatibility" + ) + def get(self): + return redirect("/api/sources", code=301) + + +@user_ns.route("/api/sources/paginated") +class PaginatedSources(Resource): + @api.doc(description="Get document with pagination, sorting and filtering") + def get(self): + user = "local" + sort_field = request.args.get("sort", "date") # Default to 'date' + sort_order = request.args.get("order", "desc") # Default to 'desc' + page = int(request.args.get("page", 1)) # Default to 1 + rows_per_page = int(request.args.get("rows", 10)) # Default to 10 + + # Prepare + query = {"user": user} + total_documents = sources_collection.count_documents(query) + total_pages = max(1, math.ceil(total_documents / rows_per_page)) + sort_order = 1 if sort_order == "asc" else -1 + skip = (page - 1) * rows_per_page + + try: + documents = ( + sources_collection.find(query) + .sort(sort_field, sort_order) + .skip(skip) + .limit(rows_per_page) + ) + + paginated_docs = [] + for doc in documents: + doc_data = { + "id": str(doc["_id"]), + "name": doc.get("name", ""), + "date": doc.get("date", ""), + "model": settings.EMBEDDINGS_NAME, + "location": "local", + "tokens": doc.get("tokens", ""), + "retriever": doc.get("retriever", "classic"), + "syncFrequency": doc.get("sync_frequency", ""), + } + paginated_docs.append(doc_data) + + response = { + "total": total_documents, + "totalPages": total_pages, + "currentPage": page, + "paginated": paginated_docs, + } + return make_response(jsonify(response), 200) + + except Exception as err: + return make_response(jsonify({"success": False, "error": str(err)}), 400) + + +@user_ns.route("/api/sources") class CombinedJson(Resource): @api.doc(description="Provide JSON file with combined available indexes") def get(self): user = "local" - sort_field = request.args.get('sort', 'date') # Default to 'date' - sort_order = request.args.get('order', "desc") # Default to 'desc' data = [ { "name": "default", @@ -447,7 +532,7 @@ class CombinedJson(Resource): ] try: - for index in sources_collection.find({"user": user}).sort(sort_field, 1 if sort_order=="asc" else -1): + for index in sources_collection.find({"user": user}).sort("date", -1): data.append( { "id": str(index["_id"]), @@ -485,6 +570,7 @@ class CombinedJson(Resource): "retriever": "brave_search", } ) + except Exception as err: return make_response(jsonify({"success": False, "error": str(err)}), 400) @@ -1674,7 +1760,9 @@ class TextToSpeech(Resource): tts_model = api.model( "TextToSpeechModel", { - "text": fields.String(required=True, description="Text to be synthesized as audio"), + "text": fields.String( + required=True, description="Text to be synthesized as audio" + ), }, ) @@ -1686,8 +1774,15 @@ class TextToSpeech(Resource): try: tts_instance = GoogleTTS() audio_base64, detected_language = tts_instance.text_to_speech(text) - return make_response(jsonify({"success": True,'audio_base64': audio_base64,'lang':detected_language}), 200) + return make_response( + jsonify( + { + "success": True, + "audio_base64": audio_base64, + "lang": detected_language, + } + ), + 200, + ) except Exception as err: return make_response(jsonify({"success": False, "error": str(err)}), 400) - - diff --git a/application/llm/google_ai.py b/application/llm/google_ai.py new file mode 100644 index 00000000..df252abf --- /dev/null +++ b/application/llm/google_ai.py @@ -0,0 +1,48 @@ +from application.llm.base import BaseLLM + +class GoogleLLM(BaseLLM): + + def __init__(self, api_key=None, user_api_key=None, *args, **kwargs): + + super().__init__(*args, **kwargs) + self.api_key = api_key + self.user_api_key = user_api_key + + def _clean_messages_google(self, messages): + return [ + { + "role": "model" if message["role"] == "system" else message["role"], + "parts": [message["content"]], + } + for message in messages[1:] + ] + + def _raw_gen( + self, + baseself, + model, + messages, + stream=False, + **kwargs + ): + import google.generativeai as genai + genai.configure(api_key=self.api_key) + model = genai.GenerativeModel(model, system_instruction=messages[0]["content"]) + response = model.generate_content(self._clean_messages_google(messages)) + return response.text + + def _raw_gen_stream( + self, + baseself, + model, + messages, + stream=True, + **kwargs + ): + import google.generativeai as genai + genai.configure(api_key=self.api_key) + model = genai.GenerativeModel(model, system_instruction=messages[0]["content"]) + response = model.generate_content(self._clean_messages_google(messages), stream=True) + for line in response: + if line.text is not None: + yield line.text \ No newline at end of file diff --git a/application/llm/llm_creator.py b/application/llm/llm_creator.py index 6a19de10..f32089de 100644 --- a/application/llm/llm_creator.py +++ b/application/llm/llm_creator.py @@ -6,6 +6,7 @@ from application.llm.llama_cpp import LlamaCpp from application.llm.anthropic import AnthropicLLM from application.llm.docsgpt_provider import DocsGPTAPILLM from application.llm.premai import PremAILLM +from application.llm.google_ai import GoogleLLM class LLMCreator: @@ -18,7 +19,8 @@ class LLMCreator: "anthropic": AnthropicLLM, "docsgpt": DocsGPTAPILLM, "premai": PremAILLM, - "groq": GroqLLM + "groq": GroqLLM, + "google": GoogleLLM } @classmethod diff --git a/application/parser/remote/reddit_loader.py b/application/parser/remote/reddit_loader.py index 0230653a..22f5b185 100644 --- a/application/parser/remote/reddit_loader.py +++ b/application/parser/remote/reddit_loader.py @@ -1,10 +1,19 @@ from application.parser.remote.base import BaseRemote from langchain_community.document_loaders import RedditPostsLoader +import json class RedditPostsLoaderRemote(BaseRemote): def load_data(self, inputs): - data = eval(inputs) + try: + data = json.loads(inputs) + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON input: {e}") + + required_fields = ["client_id", "client_secret", "user_agent", "search_queries"] + missing_fields = [field for field in required_fields if field not in data] + if missing_fields: + raise ValueError(f"Missing required fields: {', '.join(missing_fields)}") client_id = data.get("client_id") client_secret = data.get("client_secret") user_agent = data.get("user_agent") diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py index 6a67cb38..42e318d2 100644 --- a/application/retriever/classic_rag.py +++ b/application/retriever/classic_rag.py @@ -45,7 +45,6 @@ class ClassicRAG(BaseRetriever): settings.VECTOR_STORE, self.vectorstore, settings.EMBEDDINGS_KEY ) docs_temp = docsearch.search(self.question, k=self.chunks) - print(docs_temp) docs = [ { "title": i.metadata.get( @@ -60,8 +59,6 @@ class ClassicRAG(BaseRetriever): } for i in docs_temp ] - if settings.LLM_NAME == "llama.cpp": - docs = [docs[0]] return docs diff --git a/docs/pages/_app.mdx b/docs/pages/_app.mdx index ac2be195..1cb8cadd 100644 --- a/docs/pages/_app.mdx +++ b/docs/pages/_app.mdx @@ -4,7 +4,7 @@ export default function MyApp({ Component, pageProps }) { return ( <> - + ) } \ No newline at end of file diff --git a/frontend/src/Navigation.tsx b/frontend/src/Navigation.tsx index 3591469b..242efb1a 100644 --- a/frontend/src/Navigation.tsx +++ b/frontend/src/Navigation.tsx @@ -18,6 +18,7 @@ import SourceDropdown from './components/SourceDropdown'; import { setConversation, updateConversationId, + handleAbort, } from './conversation/conversationSlice'; import ConversationTile from './conversation/ConversationTile'; import { useDarkTheme, useMediaQuery, useOutsideAlerter } from './hooks'; @@ -34,10 +35,12 @@ import { selectSelectedDocs, selectSelectedDocsStatus, selectSourceDocs, + selectPaginatedDocuments, setConversations, setModalStateDeleteConv, setSelectedDocs, setSourceDocs, + setPaginatedDocuments, } from './preferences/preferenceSlice'; import Spinner from './assets/spinner.svg'; import SpinnerDark from './assets/spinner-dark.svg'; @@ -72,6 +75,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) { const conversations = useSelector(selectConversations); const modalStateDeleteConv = useSelector(selectModalStateDeleteConv); const conversationId = useSelector(selectConversationId); + const paginatedDocuments = useSelector(selectPaginatedDocuments); const [isDeletingConversation, setIsDeletingConversation] = useState(false); const { isMobile } = useMediaQuery(); @@ -143,9 +147,18 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) { }) .then((updatedDocs) => { dispatch(setSourceDocs(updatedDocs)); + const updatedPaginatedDocs = paginatedDocuments?.filter( + (document) => document.id !== doc.id, + ); + dispatch( + setPaginatedDocuments(updatedPaginatedDocs || paginatedDocuments), + ); dispatch( setSelectedDocs( - updatedDocs?.find((doc) => doc.name.toLowerCase() === 'default'), + Array.isArray(updatedDocs) && + updatedDocs?.find( + (doc: Doc) => doc.name.toLowerCase() === 'default', + ), ), ); }) @@ -168,6 +181,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) { }; const resetConversation = () => { + handleAbort(); dispatch(setConversation([])); dispatch( updateConversationId({ diff --git a/frontend/src/api/endpoints.ts b/frontend/src/api/endpoints.ts index 84674049..4e7112d0 100644 --- a/frontend/src/api/endpoints.ts +++ b/frontend/src/api/endpoints.ts @@ -1,7 +1,8 @@ const endpoints = { USER: { - DOCS: '/api/combine', + DOCS: '/api/sources', DOCS_CHECK: '/api/docs_check', + DOCS_PAGINATED: '/api/sources/paginated', API_KEYS: '/api/get_api_keys', CREATE_API_KEY: '/api/create_api_key', DELETE_API_KEY: '/api/delete_api_key', diff --git a/frontend/src/api/services/userService.ts b/frontend/src/api/services/userService.ts index 53b38f50..942318ae 100644 --- a/frontend/src/api/services/userService.ts +++ b/frontend/src/api/services/userService.ts @@ -2,8 +2,9 @@ import apiClient from '../client'; import endpoints from '../endpoints'; const userService = { - getDocs: (sort = 'date', order = 'desc'): Promise => - apiClient.get(`${endpoints.USER.DOCS}?sort=${sort}&order=${order}`), + getDocs: (): Promise => apiClient.get(`${endpoints.USER.DOCS}`), + getDocsWithPagination: (query: string): Promise => + apiClient.get(`${endpoints.USER.DOCS_PAGINATED}?${query}`), checkDocs: (data: any): Promise => apiClient.post(endpoints.USER.DOCS_CHECK, data), getAPIKeys: (): Promise => apiClient.get(endpoints.USER.API_KEYS), diff --git a/frontend/src/assets/double-arrow-left.svg b/frontend/src/assets/double-arrow-left.svg new file mode 100644 index 00000000..cab9ff90 --- /dev/null +++ b/frontend/src/assets/double-arrow-left.svg @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/frontend/src/assets/double-arrow-right.svg b/frontend/src/assets/double-arrow-right.svg new file mode 100644 index 00000000..0d5167c2 --- /dev/null +++ b/frontend/src/assets/double-arrow-right.svg @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/frontend/src/assets/single-left-arrow.svg b/frontend/src/assets/single-left-arrow.svg new file mode 100644 index 00000000..f28b2592 --- /dev/null +++ b/frontend/src/assets/single-left-arrow.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/frontend/src/assets/single-right-arrow.svg b/frontend/src/assets/single-right-arrow.svg new file mode 100644 index 00000000..85729e57 --- /dev/null +++ b/frontend/src/assets/single-right-arrow.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/frontend/src/components/DocumentPagination.tsx b/frontend/src/components/DocumentPagination.tsx new file mode 100644 index 00000000..b0532362 --- /dev/null +++ b/frontend/src/components/DocumentPagination.tsx @@ -0,0 +1,119 @@ +import React, { useState } from 'react'; +import SingleArrowLeft from '../assets/single-left-arrow.svg'; +import SingleArrowRight from '../assets/single-right-arrow.svg'; +import DoubleArrowLeft from '../assets/double-arrow-left.svg'; +import DoubleArrowRight from '../assets/double-arrow-right.svg'; + +interface PaginationProps { + currentPage: number; + totalPages: number; + rowsPerPage: number; + onPageChange: (page: number) => void; + onRowsPerPageChange: (rows: number) => void; +} + +const Pagination: React.FC = ({ + currentPage, + totalPages, + rowsPerPage, + onPageChange, + onRowsPerPageChange, +}) => { + const [rowsPerPageOptions] = useState([5, 10, 15, 20]); + + const handlePreviousPage = () => { + if (currentPage > 1) { + onPageChange(currentPage - 1); + } + }; + + const handleNextPage = () => { + if (currentPage < totalPages) { + onPageChange(currentPage + 1); + } + }; + + const handleFirstPage = () => { + onPageChange(1); + }; + + const handleLastPage = () => { + onPageChange(totalPages); + }; + + return ( +
+
+ Rows per page: + +
+ +
+ Page {currentPage} of {totalPages} +
+ +
+ + + + +
+
+ ); +}; + +export default Pagination; diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx index 07f33650..15923661 100644 --- a/frontend/src/components/Dropdown.tsx +++ b/frontend/src/components/Dropdown.tsx @@ -140,7 +140,7 @@ function Dropdown({ : option.description }`} - {showEdit && onEdit && ( + {showEdit && onEdit && option.type !== 'public' && ( Edit -
+