From 2612ce5ad91ac19f6e06d0211753cec36015c51d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 17 Feb 2025 20:34:44 +0000 Subject: [PATCH 1/4] build(deps): bump duckduckgo-search from 6.3.0 to 7.4.2 in /application Bumps [duckduckgo-search](https://github.com/deedy5/duckduckgo_search) from 6.3.0 to 7.4.2. - [Release notes](https://github.com/deedy5/duckduckgo_search/releases) - [Commits](https://github.com/deedy5/duckduckgo_search/compare/v6.3.0...v7.4.2) --- updated-dependencies: - dependency-name: duckduckgo-search dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- application/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application/requirements.txt b/application/requirements.txt index 1707ad80..ffb96ecd 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -4,7 +4,7 @@ beautifulsoup4==4.12.3 celery==5.4.0 dataclasses-json==0.6.7 docx2txt==0.8 -duckduckgo-search==6.3.0 +duckduckgo-search==7.4.2 ebooklib==0.18 elastic-transport==8.17.0 elasticsearch==8.17.0 From 13cd221fe5442c9779c76d775d8ea7e4cf47acdc Mon Sep 17 00:00:00 2001 From: ManishMadan2882 Date: Tue, 18 Feb 2025 14:19:53 +0530 Subject: [PATCH 2/4] (feat:widget) udpate docs --- docs/pages/Extensions/chat-widget.mdx | 1 + docs/pages/_app.mdx | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/pages/Extensions/chat-widget.mdx b/docs/pages/Extensions/chat-widget.mdx index 4cc887dc..5eae2fc9 100644 --- a/docs/pages/Extensions/chat-widget.mdx +++ b/docs/pages/Extensions/chat-widget.mdx @@ -143,6 +143,7 @@ The DocsGPT Widget offers a range of customizable properties that allow you to t | **`buttonIcon`** | `string` | `"https://your-icon"` | URL for the icon image used in the widget's launch button. | | **`buttonBg`** | `string` | `"#222327"` | Background color of the widget's launch button. | | **`size`** | `"small" \| "medium"` | `"medium"` | Size of the widget. Options: `"small"` or `"medium"`. Defaults to `"medium"`. | +| **`showSources`** | `boolean` | `false` | Enables displaying source URLs for data fetched within the widget. When set to `true`, the widget will show the original sources of the fetched data. | --- diff --git a/docs/pages/_app.mdx b/docs/pages/_app.mdx index 0111cd96..e7fcf8c5 100644 --- a/docs/pages/_app.mdx +++ b/docs/pages/_app.mdx @@ -4,7 +4,7 @@ export default function MyApp({ Component, pageProps }) { return ( <> - + ) } \ No newline at end of file From 03ee16f5ca2a533cf4926eaa96812946de5c4209 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 18 Feb 2025 09:09:37 +0000 Subject: [PATCH 3/4] Update _app.mdx --- docs/pages/_app.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/pages/_app.mdx b/docs/pages/_app.mdx index e7fcf8c5..817cc517 100644 --- a/docs/pages/_app.mdx +++ b/docs/pages/_app.mdx @@ -4,7 +4,7 @@ export default function MyApp({ Component, pageProps }) { return ( <> - + ) -} \ No newline at end of file +} From 750345d209a0b1529b8c022f75c7272a61b8bbaa Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 18 Feb 2025 14:51:17 +0000 Subject: [PATCH 4/4] feat: architecrure guide --- docs/pages/Guides/Architecture.mdx | 157 +++++++++++++++++++++++++++++ docs/pages/Guides/_meta.json | 4 + 2 files changed, 161 insertions(+) create mode 100644 docs/pages/Guides/Architecture.mdx diff --git a/docs/pages/Guides/Architecture.mdx b/docs/pages/Guides/Architecture.mdx new file mode 100644 index 00000000..247abda6 --- /dev/null +++ b/docs/pages/Guides/Architecture.mdx @@ -0,0 +1,157 @@ +--- +title: Architecture +description: High-level architecture of DocsGPT +--- + +## Introduction + +DocsGPT is designed as a modular and scalable application for knowledge based GenAI system. This document outlines the high-level architecture of DocsGPT, highlighting its key components. + +## High-Level Architecture + +This diagram provides a bird's-eye view of the DocsGPT architecture, illustrating the main components and their interactions. + +```mermaid +flowchart LR + User["User"] --> Frontend["Frontend (React/Vite)"] + Frontend --> Backend["Backend API (Flask)"] + Backend --> LLM["LLM Integration Layer"] & VectorStore["Vector Stores"] & TaskQueue["Task Queue (Celery)"] & Databases["Databases (MongoDB, Redis)"] + LLM -- Cloud APIs / Local Engines --> InferenceEngine["Inference Engine"] + VectorStore -- Document Embeddings --> Indexes[("Indexes")] + TaskQueue -- Asynchronous Tasks --> DocumentIngestion["Document Ingestion"] + + style Frontend fill:#AA00FF,color:#FFFFFF + style Backend fill:#AA00FF,color:#FFFFFF + style LLM fill:#AA00FF,color:#FFFFFF + style TaskQueue fill:#AA00FF,color:#FFFFFF,stroke:#AA00FF + style DocumentIngestion fill:#AA00FF,color:#FFFFFF,stroke:none +``` + +## Component Descriptions + +### 1. Frontend (React/Vite) + +* **Technology:** Built using React and Vite. +* **Responsibility:** This is the user interface of DocsGPT, providing users with an UI to ask questions and receive answers, configure prompts, tools and other settings. It handles user input, displays conversation history, shows sources, and manages settings. +* **Key Features:** + * Clean and responsive UI. + * Simple static client-side rendering. + * Manages conversation state and settings. + * Communicates with the Backend API for data retrieval and processing. + +### 2. Backend API (Flask) + +* **Technology:** Implemented using Flask (Python). +* **Responsibility:** The Backend API serves as the core logic and orchestration layer of DocsGPT. It receives requests from the Frontend, Extensions or API clients, processes them, and coordinates interactions between different components. +* **Key Features:** + * API endpoints for handling user queries, document uploads, and settings configurations. + * Manages the overall application flow and logic. + * Integrates with the LLM Integration Layer, Vector Stores, Task Queue, Tools, Agents and Databases. + * Provides Swagger documentation for API endpoints. + +### 3. LLM Integration Layer (Part of backend) + +* **Technology:** Supports multiple LLM APIs and local engines. +* **Responsibility:** This layer provides an abstraction for interacting with Large Language Models (LLMs). +* **Key Features:** + * Supports LLMs from OpenAI, Google, Anthropic, Groq, HuggingFace Inference API, Azure OpenAI, also compatable with local models like Ollama, LLaMa.cpp, Text Generation Inference (TGI), SGLang, vLLM, Aphrodite, FriendliAI, and LMDeploy. + * Manages API key handling and request formatting and Tool fromatting. + * Offers caching mechanisms to improve response times and reduce API usage. + * Handles streaming responses for a more interactive user experience. + +### 4. Vector Stores (Part of backend) + +* **Technology:** Supports multiple vector databases. +* **Responsibility:** Vector Stores are used to store and retrieve vector embeddings of document chunks. This enables semantic search and retrieval of relevant document snippets in response to user queries. +* **Key Features:** + * Supports vector databases including FAISS, Elasticsearch, Qdrant, Milvus, and LanceDB. + * Provides storage and indexing of high-dimensional vector embeddings. + * Enables editing and updating of vector indexes including specific chunks. + +### 5. Parser Integration Layer (Part of backend) + +* **Technology:** Supports multiple formats for file processing and remote source uploading. +* **Responsibility:** Parser Integration Layer handles uploading, parsing, chunking, embedding, and indexing documents. +* **Key Features:** + * Supports various document formats (PDF, DOCX, TXT, etc.) and remote sources (web URLs, sitemaps). + * Handles document parsing, text chunking, and embedding generation. + * Utilizes Celery for asynchronous processing, ensuring efficient handling of large documents. + +### 6. Task Queue (Celery) + +* **Technology:** Celery with Redis as broker and backend. +* **Responsibility:** Celery handles asynchronous task processing, for long-running operations such as document ingestion and indexing. This ensures that the main application remains responsive and efficient. +* **Key Features:** + * Manages background tasks for document processing and indexing. + * Improves application responsiveness by offloading heavy tasks. + * Enhances scalability and reliability through distributed task processing. + +### 7. Databases (MongoDB, Redis) + +* **Technology:** MongoDB and Redis. +* **Responsibility:** Databases are used for persistent data storage and caching. MongoDB stores structured data such as conversations, documents, user settings, and API keys. Redis is used as a cache, as well as a message broker for Celery. + +## Request Flow Diagram + +This diagram illustrates the sequence of steps involved when a user submits a question to DocsGPT. + +```mermaid +sequenceDiagram + participant User + participant Frontend + participant BackendAPI + participant LLMIntegrationLayer + participant VectorStores + participant InferenceEngine + + User->>Frontend: User asks a question + Frontend->>BackendAPI: API Request (Question) + BackendAPI->>VectorStores: Fetch relevant document chunks (Similarity Search) + VectorStores-->>BackendAPI: Return document chunks + BackendAPI->>LLMIntegrationLayer: Send question and document chunks + LLMIntegrationLayer->>InferenceEngine: LLM API Request (Prompt + Context) + InferenceEngine-->>LLMIntegrationLayer: LLM API Response (Answer) + LLMIntegrationLayer-->>BackendAPI: Return Answer + BackendAPI->>Frontend: API Response (Answer) + Frontend->>User: Display Answer + + Note over Frontend,BackendAPI: Data flow is simplified for clarity +``` + +## Deployment Architecture + +DocsGPT is designed to be deployed using Docker and Kubernetes, here is a qucik overview of a simple k8s deployment. + +```mermaid +graph LR + subgraph Kubernetes Cluster + subgraph Nodes + subgraph Node 1 + FrontendPod[Frontend Pod] + BackendAPIPod[Backend API Pod] + end + subgraph Node 2 + CeleryWorkerPod[Celery Worker Pod] + RedisPod[Redis Pod] + end + subgraph Node 3 + MongoDBPod[MongoDB Pod] + VectorStorePod[Vector Store Pod] + end + end + LoadBalancer[Load Balancer] --> docsgpt-frontend-service[docsgpt-frontend-service] + LoadBalancer --> docsgpt-api-service[docsgpt-api-service] + docsgpt-frontend-service --> FrontendPod + docsgpt-api-service --> BackendAPIPod + BackendAPIPod --> CeleryWorkerPod + BackendAPIPod --> RedisPod + BackendAPIPod --> MongoDBPod + BackendAPIPod --> VectorStorePod + CeleryWorkerPod --> RedisPod + BackendAPIPod --> InferenceEngine[(Inference Engine)] + VectorStorePod --> Indexes[(Indexes)] + MongoDBPod --> Data[(Data)] + RedisPod --> Cache[(Cache)] + end + User[User] --> LoadBalancer +``` diff --git a/docs/pages/Guides/_meta.json b/docs/pages/Guides/_meta.json index 1a331167..a88202d1 100644 --- a/docs/pages/Guides/_meta.json +++ b/docs/pages/Guides/_meta.json @@ -16,5 +16,9 @@ "title": "💭️ Avoiding hallucinations", "href": "/Guides/My-AI-answers-questions-using-external-knowledge", "display": "hidden" + }, + "Architecture": { + "title": "🏗️ Architecture", + "href": "/Guides/Architecture" } } \ No newline at end of file