Merge remote-tracking branch 'upstream/main'

2026-03-07 06:15:10 +00:00 · 2025-06-18 22:18:06 +01:00
parent 96f6188722 6c3a79802e
commit 56bfa98633
544 changed files with 90904 additions and 0 deletions
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,15 @@
+FROM python:3.12-bookworm
+
+# Install Node.js 20.x
+RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
+    && apt-get install -y nodejs \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install global npm packages
+RUN npm install -g husky vite
+
+# Create and activate Python virtual environment
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+
+WORKDIR /workspace
--- a/.devcontainer/devc-welcome.md
+++ b/.devcontainer/devc-welcome.md
@@ -0,0 +1,49 @@
+# Welcome to DocsGPT Devcontainer
+
+Welcome to the DocsGPT development environment! This guide will help you get started quickly.
+
+## Starting Services
+
+To run DocsGPT, you need to start three main services: Flask (backend), Celery (task queue), and Vite (frontend). Here are the commands to start each service within the devcontainer:
+
+### Vite (Frontend)
+
+```bash
+cd frontend
+npm run dev -- --host
+```
+
+### Flask (Backend)
+
+```bash
+flask --app application/app.py run --host=0.0.0.0 --port=7091
+```
+
+### Celery (Task Queue)
+
+```bash
+celery -A application.app.celery worker -l INFO
+```
+
+## Github Codespaces Instructions
+
+### 1. Make Ports Public:
+
+Go to the "Ports" panel in Codespaces (usually located at the bottom of the VS Code window).
+
+For both port 5173 and 7091, right-click on the port and select "Make Public".
+
+![CleanShot 2025-02-12 at 09 46 14@2x](https://github.com/user-attachments/assets/00a34b16-a7ef-47af-9648-87a7e3008475)
+
+
+ ### 2. Update VITE_API_HOST:
+
+After making port 7091 public, copy the public URL provided by Codespaces for port 7091.
+
+Open the file frontend/.env.development.
+
+Find the line VITE_API_HOST=http://localhost:7091.
+
+Replace http://localhost:7091 with the public URL you copied from Codespaces.
+
+![CleanShot 2025-02-12 at 09 46 56@2x](https://github.com/user-attachments/assets/c472242f-1079-4cd8-bc0b-2d78db22b94c)
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,24 @@
+{
+	"name": "DocsGPT Dev Container",
+	"dockerComposeFile": ["docker-compose-dev.yaml", "docker-compose.override.yaml"],
+	"service": "dev",
+	"workspaceFolder": "/workspace",
+	"postCreateCommand": ".devcontainer/post-create-command.sh",
+	"forwardPorts": [7091, 5173, 6379, 27017],
+	"customizations": {
+	  "vscode": {
+		"extensions": [
+		  "ms-python.python",
+		  "ms-toolsai.jupyter",
+		  "esbenp.prettier-vscode",
+		  "dbaeumer.vscode-eslint"
+		]
+	  },
+	  "codespaces": {
+			"openFiles": [
+			".devcontainer/devc-welcome.md",
+			"CONTRIBUTING.md"
+			]
+		}
+	}
+  }
--- a/.devcontainer/docker-compose-dev.yaml
+++ b/.devcontainer/docker-compose-dev.yaml
@@ -0,0 +1,18 @@
+services:
+
+  redis:
+    image: redis:6-alpine
+    ports:
+      - 6379:6379
+
+  mongo:
+    image: mongo:6
+    ports:
+      - 27017:27017
+    volumes:
+      - mongodb_data_container:/data/db
+
+
+
+volumes:
+  mongodb_data_container:
--- a/.devcontainer/docker-compose.override.yaml
+++ b/.devcontainer/docker-compose.override.yaml
@@ -0,0 +1,40 @@
+version: '3.8'
+
+services:
+  dev:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    volumes:
+      - ../:/workspace:cached
+    command: sleep infinity
+    depends_on:
+      redis:
+        condition: service_healthy
+      mongo:
+        condition: service_healthy
+    environment:
+      - CELERY_BROKER_URL=redis://redis:6379/0
+      - CELERY_RESULT_BACKEND=redis://redis:6379/1
+      - MONGO_URI=mongodb://mongo:27017/docsgpt
+      - CACHE_REDIS_URL=redis://redis:6379/2
+    networks:
+      - default
+
+  redis:
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 5s
+      timeout: 30s
+      retries: 5
+
+  mongo:
+    healthcheck:
+      test: ["CMD", "mongosh", "--eval", "db.adminCommand('ping')"]
+      interval: 5s
+      timeout: 30s
+      retries: 5
+
+networks:
+  default:
+    name: docsgpt-dev-network
--- a/.devcontainer/post-create-command.sh
+++ b/.devcontainer/post-create-command.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+set -e  # Exit immediately if a command exits with a non-zero status
+
+if [ ! -f frontend/.env.development ]; then
+  cp -n .env-template frontend/.env.development || true # Assuming .env-template is in the root
+fi
+
+# Determine VITE_API_HOST based on environment
+if [ -n "$CODESPACES" ]; then
+  # Running in Codespaces
+  CODESPACE_NAME=$(echo "$CODESPACES" | cut -d'-' -f1) # Extract codespace name
+  PUBLIC_API_HOST="https://${CODESPACE_NAME}-7091.${GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN}"
+  echo "Setting VITE_API_HOST for Codespaces: $PUBLIC_API_HOST in frontend/.env.development"
+  sed -i "s|VITE_API_HOST=.*|VITE_API_HOST=$PUBLIC_API_HOST|" frontend/.env.development
+else
+  # Not running in Codespaces (local devcontainer)
+  DEFAULT_API_HOST="http://localhost:7091"
+  echo "Setting VITE_API_HOST for local dev: $DEFAULT_API_HOST in frontend/.env.development"
+  sed -i "s|VITE_API_HOST=.*|VITE_API_HOST=$DEFAULT_API_HOST|" frontend/.env.development
+fi
+
+
+mkdir -p model
+if [ ! -d model/all-mpnet-base-v2 ]; then
+    wget -q https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip -O model/mpnet-base-v2.zip
+    unzip -q model/mpnet-base-v2.zip -d model
+    rm model/mpnet-base-v2.zip
+fi
+pip install -r application/requirements.txt
+cd frontend
+npm install --include=dev
--- a/.env-template
+++ b/.env-template
@@ -0,0 +1,9 @@
+API_KEY=<LLM api key (for example, open ai key)>
+LLM_NAME=docsgpt
+VITE_API_STREAMING=true
+
+#For Azure (you can delete it if you don't use Azure)
+OPENAI_API_BASE=
+OPENAI_API_VERSION=
+AZURE_DEPLOYMENT_NAME=
+AZURE_EMBEDDINGS_DEPLOYMENT_NAME=
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -0,0 +1,3 @@
+# These are supported funding model platforms
+
+github: arc53
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,138 @@
+name: "🐛 Bug Report"
+description: "Submit a bug report to help us improve"
+title: "🐛 Bug Report: "
+labels: ["type: bug"]
+body:
+  - type: markdown
+    attributes:
+      value: We value your time and your efforts to submit this bug report is appreciated. 🙏
+
+  - type: textarea
+    id: description
+    validations:
+      required: true
+    attributes:
+      label: "📜 Description"
+      description: "A clear and concise description of what the bug is."
+      placeholder: "It bugs out when ..."
+
+  - type: textarea
+    id: steps-to-reproduce
+    validations:
+      required: true
+    attributes:
+      label: "👟 Reproduction steps"
+      description: "How do you trigger this bug? Please walk us through it step by step."
+      placeholder: "1. Go to '...'
+        2. Click on '....'
+        3. Scroll down to '....'
+        4. See error"
+
+  - type: textarea
+    id: expected-behavior
+    validations:
+      required: true
+    attributes:
+      label: "👍 Expected behavior"
+      description: "What did you think should happen?"
+      placeholder: "It should ..."
+
+  - type: textarea
+    id: actual-behavior
+    validations:
+      required: true
+    attributes:
+      label: "👎 Actual Behavior with Screenshots"
+      description: "What did actually happen? Add screenshots, if applicable."
+      placeholder: "It actually ..."
+
+  - type: dropdown
+    id: operating-system
+    attributes:
+      label: "💻 Operating system"
+      description: "What OS is your app running on?"
+      options:
+        - Linux
+        - MacOS
+        - Windows
+        - Something else
+    validations:
+      required: true
+
+  - type: dropdown
+    id: browsers
+    attributes:
+      label: What browsers are you seeing the problem on?
+      multiple: true
+      options:
+        - Firefox
+        - Chrome
+        - Safari
+        - Microsoft Edge
+        - Something else
+
+  - type: dropdown
+    id: dev-environment
+    validations:
+      required: true
+    attributes:
+      label: "🤖 What development environment are you experiencing this bug on?"
+      options:
+        - Docker
+        - Local dev server
+
+  - type: textarea
+    id: env-vars
+    validations:
+      required: false
+    attributes:
+      label: "🔒 Did you set the correct environment variables in the right path? List the environment variable names (not values please!)"
+      description: "Please refer to the [Project setup instructions](https://github.com/arc53/DocsGPT#quickstart) if you are unsure."
+      placeholder: "It actually ..."
+
+  - type: textarea
+    id: additional-context
+    validations:
+      required: false
+    attributes:
+      label: "📃 Provide any additional context for the Bug."
+      description: "Add any other context about the problem here."
+      placeholder: "It actually ..."
+
+  - type: textarea
+    id: logs
+    validations:
+      required: false
+    attributes:
+      label: 📖 Relevant log output
+      description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
+      render: shell
+
+  - type: checkboxes
+    id: no-duplicate-issues
+    attributes:
+      label: "👀 Have you spent some time to check if this bug has been raised before?"
+      options:
+        - label: "I checked and didn't find similar issue"
+          required: true
+
+  - type: dropdown
+    id: willing-to-submit-pr
+    attributes:
+      label: 🔗 Are you willing to submit PR?
+      description: This is absolutely not required, but we are happy to guide you in the contribution process.
+      options: # Added options key
+        - "Yes, I am willing to submit a PR!"
+        - "No"
+    validations:
+      required: false
+
+
+  - type: checkboxes
+    id: terms
+    attributes:
+      label: 🧑‍⚖️ Code of Conduct
+      description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/arc53/DocsGPT/blob/main/CODE_OF_CONDUCT.md)
+      options:
+        - label: I agree to follow this project's Code of Conduct
+          required: true
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,54 @@
+name: 🚀 Feature
+description: "Submit a proposal for a new feature"
+title: "🚀 Feature: "
+labels: [feature]
+body:
+  - type: markdown
+    attributes:
+      value: We value your time and your efforts to submit this bug report is appreciated. 🙏
+  - type: textarea
+    id: feature-description
+    validations:
+      required: true
+    attributes:
+      label: "🔖 Feature description"
+      description: "A clear and concise description of what the feature is."
+      placeholder: "You should add ..."
+  - type: textarea
+    id: pitch
+    validations:
+      required: true
+    attributes:
+      label: "🎤 Why is this feature needed ?"
+      description: "Please explain why this feature should be implemented and how it would be used. Add examples, if applicable."
+      placeholder: "In my use-case, ..."
+  - type: textarea
+    id: solution
+    validations:
+      required: true
+    attributes:
+      label: "✌️ How do you aim to achieve this?"
+      description: "A clear and concise description of what you want to happen."
+      placeholder: "I want this feature to, ..."
+  - type: textarea
+    id: alternative
+    validations:
+      required: false
+    attributes:
+      label: "🔄️ Additional Information"
+      description: "A clear and concise description of any alternative solutions or additional solutions you've considered."
+      placeholder: "I tried, ..."
+  - type: checkboxes
+    id: no-duplicate-issues
+    attributes:
+      label: "👀 Have you spent some time to check if this feature request has been raised before?"
+      options:
+        - label: "I checked and didn't find similar issue"
+          required: true
+  - type: dropdown
+    id: willing-to-submit-pr
+    attributes:
+      label: Are you willing to submit PR?
+      description: This is absolutely not required, but we are happy to guide you in the contribution process.
+      options:
+        - "Yes I am willing to submit a PR!"
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,5 @@
+- **What kind of change does this PR introduce?** (Bug fix, feature, docs update, ...)
+
+- **Why was this change needed?** (You can also link to an open issue here)
+
+- **Other information**:
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -0,0 +1,19 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+
+version: 2
+updates:
+  - package-ecosystem: "pip" # See documentation for possible values
+    directory: "/application" # Location of package manifests
+    schedule:
+      interval: "daily"
+  - package-ecosystem: "npm" # See documentation for possible values
+    directory: "/frontend" # Location of package manifests
+    schedule:
+      interval: "daily"
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "daily"
--- a/.github/holopin.yml
+++ b/.github/holopin.yml
@@ -0,0 +1,11 @@
+organization: docsgpt
+defaultSticker: cm1ulwkkl180570cl82rtzympu
+stickers:
+  - id: cm1ulwkkl180570cl82rtzympu
+    alias: contributor2024
+  - id: cm1ureg8o130450cl8c1po6mil
+    alias: api
+  - id: cm1urhmag148240cl8yvqxkthx
+    alias: lpc
+  - id: cm1urlcpq622090cl2tvu4w71y
+    alias: lexeu
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -0,0 +1,31 @@
+repo:
+- changed-files:
+  - any-glob-to-any-file: '*'
+
+github:
+- changed-files:
+  - any-glob-to-any-file: '.github/**/*'
+
+application:
+- changed-files:
+  - any-glob-to-any-file: 'application/**/*'
+
+docs:
+- changed-files:
+  - any-glob-to-any-file: 'docs/**/*'
+
+extensions:
+- changed-files:
+  - any-glob-to-any-file: 'extensions/**/*'
+
+frontend:
+- changed-files:
+  - any-glob-to-any-file: 'frontend/**/*'
+
+scripts:
+- changed-files:
+  - any-glob-to-any-file: 'scripts/**/*'
+
+tests:
+- changed-files:
+  - any-glob-to-any-file: 'tests/**/*'
--- a/.github/workflows/bandit.yaml
+++ b/.github/workflows/bandit.yaml
@@ -0,0 +1,40 @@
+name: Bandit Security Scan
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    types: [opened, synchronize, reopened]
+
+jobs:
+  bandit_scan:
+    if: ${{ github.repository == 'arc53/DocsGPT' }}
+    runs-on: ubuntu-latest
+    permissions:
+      security-events: write
+      actions: read
+      contents: read
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install bandit  # Bandit is needed for this action
+          if [ -f application/requirements.txt ]; then pip install -r application/requirements.txt; fi
+
+      - name: Run Bandit scan
+        uses: PyCQA/bandit-action@v1
+        with:
+          severity: medium
+          confidence: medium
+          targets: application/
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,112 @@
+name: Build and push DocsGPT Docker image
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  build:
+    if: github.repository == 'arc53/DocsGPT'
+    strategy:
+      matrix:
+        include:
+          - platform: linux/amd64
+            runner: ubuntu-latest
+            suffix: amd64
+          - platform: linux/arm64
+            runner: ubuntu-24.04-arm
+            suffix: arm64
+    runs-on: ${{ matrix.runner }}
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up QEMU  # Only needed for emulation, not for native arm64 builds
+        if: matrix.platform == 'linux/arm64'
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver: docker-container
+          install: true
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Login to ghcr.io
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push platform-specific images
+        uses: docker/build-push-action@v6
+        with:
+          file: './application/Dockerfile'
+          platforms: ${{ matrix.platform }}
+          context: ./application
+          push: true
+          tags: |
+            ${{ secrets.DOCKER_USERNAME }}/docsgpt:${{ github.event.release.tag_name }}-${{ matrix.suffix }}
+            ghcr.io/${{ github.repository_owner }}/docsgpt:${{ github.event.release.tag_name }}-${{ matrix.suffix }}
+          provenance: false
+          sbom: false
+          cache-from: type=registry,ref=${{ secrets.DOCKER_USERNAME }}/docsgpt:latest
+          cache-to: type=inline
+
+  manifest:
+    if: github.repository == 'arc53/DocsGPT'
+    needs: build
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+    steps:
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver: docker-container
+          install: true
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Login to ghcr.io
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Create and push manifest for DockerHub
+        run: |
+          set -e
+          docker manifest create ${{ secrets.DOCKER_USERNAME }}/docsgpt:${{ github.event.release.tag_name }} \
+            --amend ${{ secrets.DOCKER_USERNAME }}/docsgpt:${{ github.event.release.tag_name }}-amd64 \
+            --amend ${{ secrets.DOCKER_USERNAME }}/docsgpt:${{ github.event.release.tag_name }}-arm64
+          docker manifest push ${{ secrets.DOCKER_USERNAME }}/docsgpt:${{ github.event.release.tag_name }}
+          docker manifest create ${{ secrets.DOCKER_USERNAME }}/docsgpt:latest \
+            --amend ${{ secrets.DOCKER_USERNAME }}/docsgpt:${{ github.event.release.tag_name }}-amd64 \
+            --amend ${{ secrets.DOCKER_USERNAME }}/docsgpt:${{ github.event.release.tag_name }}-arm64
+          docker manifest push ${{ secrets.DOCKER_USERNAME }}/docsgpt:latest
+
+      - name: Create and push manifest for ghcr.io
+        run: |
+          set -e
+          docker manifest create ghcr.io/${{ github.repository_owner }}/docsgpt:${{ github.event.release.tag_name }} \
+            --amend ghcr.io/${{ github.repository_owner }}/docsgpt:${{ github.event.release.tag_name }}-amd64 \
+            --amend ghcr.io/${{ github.repository_owner }}/docsgpt:${{ github.event.release.tag_name }}-arm64
+          docker manifest push ghcr.io/${{ github.repository_owner }}/docsgpt:${{ github.event.release.tag_name }}
+          docker manifest create ghcr.io/${{ github.repository_owner }}/docsgpt:latest \
+            --amend ghcr.io/${{ github.repository_owner }}/docsgpt:${{ github.event.release.tag_name }}-amd64 \
+            --amend ghcr.io/${{ github.repository_owner }}/docsgpt:${{ github.event.release.tag_name }}-arm64
+          docker manifest push ghcr.io/${{ github.repository_owner }}/docsgpt:latest
--- a/.github/workflows/cife.yml
+++ b/.github/workflows/cife.yml
@@ -0,0 +1,112 @@
+name: Build and push DocsGPT-FE Docker image
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  build:
+    if: github.repository == 'arc53/DocsGPT'
+    strategy:
+      matrix:
+        include:
+          - platform: linux/amd64
+            runner: ubuntu-latest
+            suffix: amd64
+          - platform: linux/arm64
+            runner: ubuntu-24.04-arm
+            suffix: arm64
+    runs-on: ${{ matrix.runner }}
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up QEMU # Only needed for emulation, not for native arm64 builds
+        if: matrix.platform == 'linux/arm64'
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver: docker-container
+          install: true
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Login to ghcr.io
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push platform-specific images
+        uses: docker/build-push-action@v6
+        with:
+          file: './frontend/Dockerfile'
+          platforms: ${{ matrix.platform }}
+          context: ./frontend
+          push: true
+          tags: |
+            ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:${{ github.event.release.tag_name }}-${{ matrix.suffix }}
+            ghcr.io/${{ github.repository_owner }}/docsgpt-fe:${{ github.event.release.tag_name }}-${{ matrix.suffix }}
+          provenance: false
+          sbom: false
+          cache-from: type=registry,ref=${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:latest
+          cache-to: type=inline
+
+  manifest:
+    if: github.repository == 'arc53/DocsGPT'
+    needs: build
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+    steps:
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver: docker-container
+          install: true
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Login to ghcr.io
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Create and push manifest for DockerHub
+        run: |
+          set -e
+          docker manifest create ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:${{ github.event.release.tag_name }} \
+            --amend ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:${{ github.event.release.tag_name }}-amd64 \
+            --amend ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:${{ github.event.release.tag_name }}-arm64
+          docker manifest push ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:${{ github.event.release.tag_name }}
+          docker manifest create ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:latest \
+            --amend ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:${{ github.event.release.tag_name }}-amd64 \
+            --amend ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:${{ github.event.release.tag_name }}-arm64
+          docker manifest push ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:latest
+
+      - name: Create and push manifest for ghcr.io
+        run: |
+          set -e
+          docker manifest create ghcr.io/${{ github.repository_owner }}/docsgpt-fe:${{ github.event.release.tag_name }} \
+            --amend ghcr.io/${{ github.repository_owner }}/docsgpt-fe:${{ github.event.release.tag_name }}-amd64 \
+            --amend ghcr.io/${{ github.repository_owner }}/docsgpt-fe:${{ github.event.release.tag_name }}-arm64
+          docker manifest push ghcr.io/${{ github.repository_owner }}/docsgpt-fe:${{ github.event.release.tag_name }}
+          docker manifest create ghcr.io/${{ github.repository_owner }}/docsgpt-fe:latest \
+            --amend ghcr.io/${{ github.repository_owner }}/docsgpt-fe:${{ github.event.release.tag_name }}-amd64 \
+            --amend ghcr.io/${{ github.repository_owner }}/docsgpt-fe:${{ github.event.release.tag_name }}-arm64
+          docker manifest push ghcr.io/${{ github.repository_owner }}/docsgpt-fe:latest
--- a/.github/workflows/docker-develop-build.yml
+++ b/.github/workflows/docker-develop-build.yml
@@ -0,0 +1,100 @@
+name: Build and push multi-arch DocsGPT Docker image
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+
+jobs:
+  build:
+    if: github.repository == 'arc53/DocsGPT'
+    strategy:
+      matrix:
+        include:
+          - platform: linux/amd64
+            runner: ubuntu-latest
+            suffix: amd64
+          - platform: linux/arm64
+            runner: ubuntu-24.04-arm
+            suffix: arm64
+    runs-on: ${{ matrix.runner }}
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver: docker-container
+          install: true
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+      
+      - name: Login to ghcr.io
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push platform-specific images
+        uses: docker/build-push-action@v6
+        with:
+          file: './application/Dockerfile'
+          platforms: ${{ matrix.platform }}
+          context: ./application
+          push: true
+          tags: |
+            ${{ secrets.DOCKER_USERNAME }}/docsgpt:develop-${{ matrix.suffix }}
+            ghcr.io/${{ github.repository_owner }}/docsgpt:develop-${{ matrix.suffix }}
+          provenance: false
+          sbom: false
+          cache-from: type=registry,ref=${{ secrets.DOCKER_USERNAME }}/docsgpt:develop
+          cache-to: type=inline
+
+  manifest:
+    if: github.repository == 'arc53/DocsGPT'
+    needs: build
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+    steps:
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver: docker-container
+          install: true
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+      
+      - name: Login to ghcr.io
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+        
+      - name: Create and push manifest for DockerHub
+        run: |
+          docker manifest create ${{ secrets.DOCKER_USERNAME }}/docsgpt:develop \
+            --amend ${{ secrets.DOCKER_USERNAME }}/docsgpt:develop-amd64 \
+            --amend ${{ secrets.DOCKER_USERNAME }}/docsgpt:develop-arm64
+          docker manifest push ${{ secrets.DOCKER_USERNAME }}/docsgpt:develop
+
+      - name: Create and push manifest for ghcr.io
+        run: |
+          docker manifest create ghcr.io/${{ github.repository_owner }}/docsgpt:develop \
+            --amend ghcr.io/${{ github.repository_owner }}/docsgpt:develop-amd64 \
+            --amend ghcr.io/${{ github.repository_owner }}/docsgpt:develop-arm64
+          docker manifest push ghcr.io/${{ github.repository_owner }}/docsgpt:develop
--- a/.github/workflows/docker-develop-fe-build.yml
+++ b/.github/workflows/docker-develop-fe-build.yml
@@ -0,0 +1,104 @@
+name: Build and push DocsGPT FE Docker image for development
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+
+jobs:
+  build:
+    if: github.repository == 'arc53/DocsGPT'
+    strategy:
+      matrix:
+        include:
+          - platform: linux/amd64
+            runner: ubuntu-latest
+            suffix: amd64
+          - platform: linux/arm64
+            runner: ubuntu-24.04-arm
+            suffix: arm64
+    runs-on: ${{ matrix.runner }}
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up QEMU # Only needed for emulation, not for native arm64 builds
+        if: matrix.platform == 'linux/arm64'
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver: docker-container
+          install: true
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Login to ghcr.io
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push platform-specific images
+        uses: docker/build-push-action@v6
+        with:
+          file: './frontend/Dockerfile'
+          platforms: ${{ matrix.platform }}
+          context: ./frontend
+          push: true
+          tags: |
+            ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:develop-${{ matrix.suffix }}
+            ghcr.io/${{ github.repository_owner }}/docsgpt-fe:develop-${{ matrix.suffix }}
+          provenance: false
+          sbom: false
+          cache-from: type=registry,ref=${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:develop
+          cache-to: type=inline
+
+  manifest:
+    if: github.repository == 'arc53/DocsGPT'
+    needs: build
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+    steps:
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver: docker-container
+          install: true
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Login to ghcr.io
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Create and push manifest for DockerHub
+        run: |
+          docker manifest create ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:develop \
+            --amend ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:develop-amd64 \
+            --amend ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:develop-arm64
+          docker manifest push ${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:develop
+
+      - name: Create and push manifest for ghcr.io
+        run: |
+          docker manifest create ghcr.io/${{ github.repository_owner }}/docsgpt-fe:develop \
+            --amend ghcr.io/${{ github.repository_owner }}/docsgpt-fe:develop-amd64 \
+            --amend ghcr.io/${{ github.repository_owner }}/docsgpt-fe:develop-arm64
+          docker manifest push ghcr.io/${{ github.repository_owner }}/docsgpt-fe:develop
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -0,0 +1,16 @@
+# https://github.com/actions/labeler
+name: Pull Request Labeler
+on:
+  - pull_request_target
+jobs:
+  triage:
+    if: github.repository == 'arc53/DocsGPT'
+    permissions:
+      contents: read
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/labeler@v5
+        with:
+          repo-token: "${{ secrets.GITHUB_TOKEN }}"
+          sync-labels: true
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,17 @@
+name: Python linting
+
+on:
+  push:
+    branches:
+      - '*'
+  pull_request:
+    types: [ opened, synchronize ]
+
+jobs:
+  ruff:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Lint with Ruff
+        uses: chartboost/ruff-action@v1
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -0,0 +1,30 @@
+name: Run python tests with pytest
+on: [push, pull_request]
+jobs:
+  pytest_and_coverage:
+    name: Run tests and count coverage
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.12"]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install pytest pytest-cov
+          cd application
+          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+      - name: Test with pytest and generate coverage report
+        run: |
+          python -m pytest --cov=application --cov-report=xml
+      - name: Upload coverage reports to Codecov
+        if: github.event_name == 'pull_request' && matrix.python-version == '3.12'
+        uses: codecov/codecov-action@v5
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+
--- a/.github/workflows/sync_fork.yaml
+++ b/.github/workflows/sync_fork.yaml
@@ -0,0 +1,41 @@
+name: Upstream Sync
+
+permissions:
+  contents: write
+
+on:
+  schedule:
+    - cron: "0 0 * * *" # every hour
+  workflow_dispatch:
+
+jobs:
+  sync_latest_from_upstream:
+    name: Sync latest commits from upstream repo
+    runs-on: ubuntu-latest
+    if: ${{ github.event.repository.fork }}
+
+    steps:
+      # Step 1: run a standard checkout action
+      - name: Checkout target repo
+        uses: actions/checkout@v4
+
+      # Step 2: run the sync action
+      - name: Sync upstream changes
+        id: sync
+        uses: aormsby/Fork-Sync-With-Upstream-action@v3.4
+        with:
+          # set your upstream repo and branch
+          upstream_sync_repo: arc53/DocsGPT
+          upstream_sync_branch: main
+          target_sync_branch: main
+          target_repo_token: ${{ secrets.GITHUB_TOKEN }} # automatically generated, no need to set
+
+          # Set test_mode true to run tests instead of the true action!!
+          test_mode: false
+
+      - name: Sync check
+        if: failure()
+        run: |
+          echo "::error::由于权限不足，导致同步失败（这是预期的行为），请前往仓库首页手动执行[Sync fork]。"
+          echo "::error::Due to insufficient permissions, synchronization failed (as expected). Please go to the repository homepage and manually perform [Sync fork]."
+          exit 1
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,177 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+*.next
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+**/*.ipynb
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+.flaskenv
+# Spyder project settings
+.spyderproject
+.spyproject
+.jwt_secret_key
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+#pycharm
+.idea/
+
+# macOS
+.DS_Store
+
+#frontend
+# Logs
+frontend/logs
+frontend/*.log
+frontend/npm-debug.log*
+frontend/yarn-debug.log*
+frontend/yarn-error.log*
+frontend/pnpm-debug.log*
+frontend/lerna-debug.log*
+
+frontend/node_modules
+frontend/dist
+frontend/dist-ssr
+frontend/*.local
+
+# Editor directories and files
+frontend/.vscode/*
+frontend/!.vscode/extensions.json
+frontend/.idea
+frontend/.DS_Store
+frontend/*.suo
+frontend/*.ntvs*
+frontend/*.njsproj
+frontend/*.sln
+frontend/*.sw?
+
+application/vectors/
+
+**/inputs
+
+**/indexes
+
+**/temp
+
+**/yarn.lock
+
+node_modules/
+.vscode/settings.json
+/models/
+model/
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -0,0 +1,2 @@
+# Allow lines to be as long as 120 characters.
+line-length = 120
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -0,0 +1,54 @@
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Docker Debug Frontend",
+      "request": "launch",
+      "type": "chrome",
+      "preLaunchTask": "docker-compose: debug:frontend",
+      "url": "http://127.0.0.1:5173",
+      "webRoot": "${workspaceFolder}/frontend",
+      "skipFiles": [
+        "<node_internals>/**"
+      ]
+    },
+    {
+        "name": "Flask Debugger",
+        "type": "debugpy",
+        "request": "launch",
+        "module": "flask",
+        "env": {
+            "FLASK_APP": "application/app.py",
+            "PYTHONPATH": "${workspaceFolder}",
+            "FLASK_ENV": "development",
+            "FLASK_DEBUG": "1",
+            "FLASK_RUN_PORT": "7091",
+            "FLASK_RUN_HOST": "0.0.0.0"
+
+        },
+        "args": [
+            "run",
+            "--no-debugger"
+        ],
+        "cwd": "${workspaceFolder}",
+    },
+    {
+      "name": "Celery Debugger",
+      "type": "debugpy",
+      "request": "launch",
+      "module": "celery",
+      "env": {
+        "PYTHONPATH": "${workspaceFolder}",
+      },
+      "args": [
+        "-A",
+        "application.app.celery",
+        "worker",
+        "-l",
+        "INFO",
+        "--pool=solo"
+      ],
+      "cwd": "${workspaceFolder}"
+    }
+  ]
+}
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@@ -0,0 +1,21 @@
+{
+  "version": "2.0.0",
+  "tasks": [
+    {
+      "type": "docker-compose",
+      "label": "docker-compose: debug:frontend",
+      "dockerCompose": {
+        "up": {
+          "detached": true,
+          "services": [
+            "frontend"
+          ],
+          "build": true
+        },
+        "files": [
+          "${workspaceFolder}/docker-compose.yaml"
+        ]
+      }
+    }
+  ]
+}
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,124 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors and leaders pledge to make participation in our
+community, a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, religion or sexual identity
+and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive and a healthy community.
+
+## Our Standards
+
+Examples of behavior that contribute to a positive environment for our
+community include:
+
+## Demonstrating empathy and kindness towards other people
+1. Being respectful and open to differing opinions, viewpoints, and experiences
+2. Giving and gracefully accepting constructive feedback
+3. Taking accountability and offering apologies to those who have been impacted by our errors,
+  while also gaining insights from the situation
+4. Focusing on what is best not just for us as individuals but for the
+  community as a whole
+
+Examples of unacceptable behavior include:
+
+1. The use of sexualized language or imagery, and sexual attention or
+  advances of any kind
+2. Trolling, insulting or derogatory comments, and personal or political attacks
+3. Public or private harassment
+4. Publishing other's private information, such as a physical or email
+  address, without their explicit permission
+5. Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official e-mail address,
+posting via an official social media account or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+contact@arc53.com.
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to be respectful towards the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action that they deem in violation of this Code of Conduct:
+
+### 1. Correction
+* **Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community space.
+
+* **Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+* **Community Impact**: A violation through a single incident or series
+of actions.
+
+* **Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or
+permanent ban.
+
+### 3. Temporary Ban
+* **Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+* **Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+* **Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior,harassment of an
+individual or aggression towards or disparagement of classes of individuals.
+
+* **Consequence**: A permanent ban from any sort of public interaction within
+the community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.0, available at
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
+
+Community Impact Guidelines were inspired by [Mozilla's code of conduct
+enforcement ladder](https://github.com/mozilla/diversity).
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see the FAQ at
+https://www.contributor-covenant.org/faq. Translations are available at
+https://www.contributor-covenant.org/translations.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -0,0 +1,151 @@
+# Welcome to DocsGPT Contributing Guidelines
+
+Thank you for choosing to contribute to DocsGPT! We are all very grateful! 
+
+# We accept different types of contributions
+
+📣 **Discussions** - Engage in conversations, start new topics, or help answer questions.
+
+🐞 **Issues** - This is where we keep track of tasks. It could be bugs, fixes or suggestions for new features.
+
+🛠️ **Pull requests** - Suggest changes to our repository, either by working on existing issues or adding new features.
+
+📚 **Wiki** - This is where our documentation resides.
+
+
+## 🐞 Issues and Pull requests
+
+- We value contributions in the form of discussions or suggestions. We recommend taking a look at existing issues and our [roadmap](https://github.com/orgs/arc53/projects/2).
+
+
+- If you're interested in contributing code, here are some important things to know:
+
+- We have a frontend built on React (Vite) and a backend in Python.
+
+  
+Before creating issues, please check out how the latest version of our app looks and works by launching it via [Quickstart](https://github.com/arc53/DocsGPT#quickstart) the version on our live demo is slightly modified with login. Your issues should relate to the version you can launch via [Quickstart](https://github.com/arc53/DocsGPT#quickstart).
+
+### 👨‍💻 If you're interested in contributing code, here are some important things to know:
+
+For instructions on setting up a development environment, please refer to our [Development Deployment Guide](https://docs.docsgpt.cloud/Deploying/Development-Environment).
+
+Tech Stack Overview:
+
+- 🌐 Frontend: Built with React (Vite) ⚛️,
+
+- 🖥 Backend: Developed in Python 🐍
+
+### 🌐 Frontend Contributions (⚛️ React, Vite)
+
+*   The updated Figma design can be found [here](https://www.figma.com/file/OXLtrl1EAy885to6S69554/DocsGPT?node-id=0%3A1&t=hjWVuxRg9yi5YkJ9-1).  Please try to follow the guidelines.
+*   **Coding Style:** We follow a strict coding style enforced by ESLint and Prettier. Please ensure your code adheres to the configuration provided in our repository's `fronetend/.eslintrc.js` file.  We recommend configuring your editor with ESLint and Prettier to help with this.
+* **Component Structure:** Strive for small, reusable components.  Favor functional components and hooks over class components where possible.
+* **State Management** If you need to add stores, please use Redux.
+
+### 🖥 Backend Contributions (🐍 Python)
+
+- Review our issues and contribute to [`/application`](https://github.com/arc53/DocsGPT/tree/main/application) 
+- All new code should be covered with unit tests ([pytest](https://github.com/pytest-dev/pytest)). Please find tests under [`/tests`](https://github.com/arc53/DocsGPT/tree/main/tests) folder.
+- Before submitting your Pull Request, ensure it can be queried after ingesting some test data.
+- **Coding Style:** We adhere to the [PEP 8](https://www.python.org/dev/peps/pep-0008/) style guide for Python code. We use `ruff` as our linter and code formatter.  Please ensure your code is formatted correctly and passes `ruff` checks before submitting.
+- **Type Hinting:**  Please use type hints for all function arguments and return values. This improves code readability and helps catch errors early.  Example:
+
+    ```python
+    def my_function(name: str, count: int) -> list[str]:
+        ...
+    ```
+- **Docstrings:**  All functions and classes should have docstrings explaining their purpose, parameters, and return values.  We prefer the [Google style docstrings](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html). Example:
+
+    ```python
+    def my_function(name: str, count: int) -> list[str]:
+        """Does something with a name and a count.
+
+        Args:
+            name: The name to use.
+            count: The number of times to do it.
+
+        Returns:
+            A list of strings.
+        """
+        ...
+    ```
+  
+### Testing
+
+To run unit tests from the root of the repository, execute:
+```
+python -m pytest
+```
+
+## Workflow 📈
+
+Here's a step-by-step guide on how to contribute to DocsGPT:
+
+1. **Fork the Repository:**
+   - Click the "Fork" button at the top-right of this repository to create your fork.
+
+2. **Clone the Forked Repository:**
+   - Clone the repository using:
+      ``` shell
+      git clone https://github.com/<your-github-username>/DocsGPT.git
+      ```
+
+3. **Keep your Fork in Sync:**
+   - Before you make any changes, make sure that your fork is in sync to avoid merge conflicts using:
+     ```shell
+     git remote add upstream https://github.com/arc53/DocsGPT.git
+     git pull upstream main
+     ```
+
+4. **Create and Switch to a New Branch:**
+   - Create a new branch for your contribution using:
+     ```shell
+     git checkout -b your-branch-name
+     ```
+
+5. **Make Changes:**
+   - Make the required changes in your branch.
+
+6. **Add Changes to the Staging Area:**
+   - Add your changes to the staging area using:
+     ```shell
+     git add .
+     ```
+
+7. **Commit Your Changes:**
+   - Commit your changes with a descriptive commit message using:
+     ```shell
+     git commit -m "Your descriptive commit message"
+     ```
+
+8. **Push Your Changes to the Remote Repository:**
+   - Push your branch with changes to your fork on GitHub using:
+     ```shell
+     git push origin your-branch-name
+     ```
+
+9. **Submit a Pull Request (PR):**
+   - Create a Pull Request from your branch to the main repository. Make sure to include a detailed description of your changes and reference any related issues.
+
+10. **Collaborate:**
+   - Be responsive to comments and feedback on your PR.
+   - Make necessary updates as suggested.
+   - Once your PR is approved, it will be merged into the main repository.
+
+11. **Testing:**
+   - Before submitting a Pull Request, ensure your code passes all unit tests.
+   - To run unit tests from the root of the repository, execute:
+     ```shell
+     python -m pytest
+     ```
+
+*Note: You should run the unit test only after making the changes to the backend code.*
+
+12. **Questions and Collaboration:**
+    - Feel free to join our Discord. We're very friendly and welcoming to new contributors, so don't hesitate to reach out.
+
+Thank you for considering contributing to DocsGPT! 🙏
+
+## Questions/collaboration
+Feel free to join our [Discord](https://discord.gg/n5BX8dh8rU). We're very friendly and welcoming to new contributors, so don't hesitate to reach out.
+# Thank you so much for considering to contributing DocsGPT!🙏
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 arc53
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,158 @@
+<h1 align="center">
+  DocsGPT  🦖
+</h1>
+
+<p align="center">
+  <strong>Open-Source RAG Assistant</strong>
+</p>
+
+<p align="left">
+  <strong><a href="https://www.docsgpt.cloud/">DocsGPT</a></strong> is an open-source genAI tool that helps users get reliable answers from any knowledge source, while avoiding hallucinations. It enables quick and reliable information retrieval, with tooling and agentic system capability built in.
+</p>
+
+<div align="center">
+  
+  <a href="https://github.com/arc53/DocsGPT">![link to main GitHub showing Stars number](https://img.shields.io/github/stars/arc53/docsgpt?style=social)</a>
+  <a href="https://github.com/arc53/DocsGPT">![link to main GitHub showing Forks number](https://img.shields.io/github/forks/arc53/docsgpt?style=social)</a>
+  <a href="https://github.com/arc53/DocsGPT/blob/main/LICENSE">![link to license file](https://img.shields.io/github/license/arc53/docsgpt)</a>
+  <a href="https://www.bestpractices.dev/projects/9907"><img src="https://www.bestpractices.dev/projects/9907/badge"></a>
+  <a href="https://discord.gg/n5BX8dh8rU">![link to discord](https://img.shields.io/discord/1070046503302877216)</a>
+  <a href="https://twitter.com/docsgptai">![X (formerly Twitter) URL](https://img.shields.io/twitter/follow/docsgptai)</a>
+
+  <a href="https://docs.docsgpt.cloud/quickstart">⚡️ Quickstart</a> • <a href="https://app.docsgpt.cloud/">☁️ Cloud Version</a> • <a href="https://discord.gg/n5BX8dh8rU">💬 Discord</a>
+  <br>
+  <a href="https://docs.docsgpt.cloud/">📖 Documentation</a> • <a href="https://github.com/arc53/DocsGPT/blob/main/CONTRIBUTING.md">👫 Contribute</a> • <a href="https://blog.docsgpt.cloud/">🗞 Blog</a>
+  <br>
+
+</div>
+<div align="center">
+<img src="https://d3dg1063dc54p9.cloudfront.net/videos/demov7.gif" alt="video-example-of-docs-gpt" width="800" height="450">
+</div>
+<h3 align="left">
+  <strong>Key Features:</strong>
+</h3>
+<ul align="left">
+    <li><strong>🗂️ Wide Format Support:</strong> Reads PDF, DOCX, CSV, XLSX, EPUB, MD, RST, HTML, MDX, JSON, PPTX, and images.</li>
+    <li><strong>🌐 Web & Data Integration:</strong> Ingests from URLs, sitemaps, Reddit, GitHub and web crawlers.</li>
+    <li><strong>✅ Reliable Answers:</strong> Get accurate, hallucination-free responses with source citations viewable in a clean UI.</li>
+    <li><strong>🔑 Streamlined API Keys:</strong>  Generate keys linked to your settings, documents, and models, simplifying chatbot and integration setup.</li>
+    <li><strong>🔗 Actionable Tooling:</strong> Connect to APIs, tools, and other services to enable LLM actions.</li>
+    <li><strong>🧩 Pre-built Integrations:</strong> Use readily available HTML/React chat widgets, search tools, Discord/Telegram bots, and more.</li>
+    <li><strong>🔌 Flexible Deployment:</strong> Works with major LLMs (OpenAI, Google, Anthropic) and local models (Ollama, llama_cpp).</li>
+    <li><strong>🏢 Secure & Scalable:</strong> Run privately and securely with Kubernetes support, designed for enterprise-grade reliability.</li>
+</ul>
+
+## Roadmap
+
+- [x] Full GoogleAI compatibility (Jan 2025)
+- [x] Add tools (Jan 2025)
+- [x] Manually updating chunks in the app UI (Feb 2025)
+- [x] Devcontainer for easy development (Feb 2025)
+- [x] ReACT agent (March 2025)
+- [x] Chatbots menu re-design to handle tools, agent types, and more (April 2025)
+- [x] New input box in the conversation menu (April 2025)
+- [x] Add triggerable actions / tools (webhook) (April 2025)
+- [ ] Anthropic Tool compatibility (May 2025)
+- [ ] Add OAuth 2.0 authentication for tools and sources
+- [ ] Agent scheduling
+
+You can find our full roadmap [here](https://github.com/orgs/arc53/projects/2). Please don't hesitate to contribute or create issues, it helps us improve DocsGPT!
+
+### Production Support / Help for Companies:
+
+We're eager to provide personalized assistance when deploying your DocsGPT to a live environment.
+
+[Get a Demo :wave:](https://www.docsgpt.cloud/contact)⁠
+
+[Send Email :email:](mailto:support@docsgpt.cloud?subject=DocsGPT%20support%2Fsolutions)
+
+## Join the Lighthouse Program 🌟
+
+Calling all developers and GenAI innovators! The **DocsGPT Lighthouse Program** connects technical leaders actively deploying or extending DocsGPT in real-world scenarios. Collaborate directly with our team to shape the roadmap, access priority support, and build enterprise-ready solutions with exclusive community insights.  
+
+[Learn More & Apply →](https://docs.google.com/forms/d/1KAADiJinUJ8EMQyfTXUIGyFbqINNClNR3jBNWq7DgTE)
+
+
+## QuickStart
+
+> [!Note]
+> Make sure you have [Docker](https://docs.docker.com/engine/install/) installed
+
+A more detailed [Quickstart](https://docs.docsgpt.cloud/quickstart) is available in our documentation
+
+1. **Clone the repository:**
+
+   ```bash
+   git clone https://github.com/arc53/DocsGPT.git
+   cd DocsGPT
+   ```
+
+**For macOS and Linux:**
+
+2. **Run the setup script:**
+
+   ```bash
+   ./setup.sh
+   ```
+
+**For Windows:**
+
+2. **Run the PowerShell setup script:**
+
+   ```powershell
+   PowerShell -ExecutionPolicy Bypass -File .\setup.ps1
+   ```
+
+Either script will guide you through setting up DocsGPT. Four options available: using the public API, running locally, connecting to a local inference engine, or using a cloud API provider.  Scripts will automatically configure your `.env` file and handle necessary downloads and installations based on your chosen option.
+
+**Navigate to http://localhost:5173/**
+
+To stop DocsGPT, open a terminal in the `DocsGPT` directory and run:
+
+```bash
+docker compose -f deployment/docker-compose.yaml down
+```
+(or use the specific `docker compose down` command shown after running the setup script).
+
+> [!Note]
+> For development environment setup instructions, please refer to the [Development Environment Guide](https://docs.docsgpt.cloud/Deploying/Development-Environment).
+
+## Contributing
+
+Please refer to the [CONTRIBUTING.md](CONTRIBUTING.md) file for information about how to get involved. We welcome issues, questions, and pull requests.
+
+## Architecture
+
+![Architecture chart](https://github.com/user-attachments/assets/fc6a7841-ddfc-45e6-b5a0-d05fe648cbe2)
+
+## Project Structure
+
+- Application - Flask app (main application).
+
+- Extensions - Extensions, like react widget or discord bot.
+
+- Frontend - Frontend uses <a href="https://vitejs.dev/">Vite</a> and <a href="https://react.dev/">React</a>.
+
+- Scripts - Miscellaneous scripts.
+
+## Code Of Conduct
+
+We as members, contributors, and leaders, pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. Please refer to the [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) file for more information about contributing.
+
+
+## Many Thanks To Our Contributors⚡
+
+<a href="https://github.com/arc53/DocsGPT/graphs/contributors" alt="View Contributors">
+  <img src="https://contrib.rocks/image?repo=arc53/DocsGPT" alt="Contributors" />
+</a>
+
+## License
+
+The source code license is [MIT](https://opensource.org/license/mit/), as described in the [LICENSE](LICENSE) file.
+
+<p>This project is supported by:</p>
+<p>
+  <a href="https://www.digitalocean.com/?utm_medium=opensource&utm_source=DocsGPT">
+    <img src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/SVG/DO_Logo_horizontal_blue.svg" width="201px">
+  </a>
+</p>
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -0,0 +1,14 @@
+# Security Policy
+
+## Supported Versions
+
+Supported Versions:
+
+Currently, we support security patches by committing changes and bumping the version published on Github.
+
+## Reporting a Vulnerability
+
+Found a vulnerability? Please email us:
+
+security@arc53.com
+
--- a/application/.env_sample
+++ b/application/.env_sample
@@ -0,0 +1,11 @@
+API_KEY=your_api_key
+EMBEDDINGS_KEY=your_api_key
+API_URL=http://localhost:7091
+FLASK_APP=application/app.py
+FLASK_DEBUG=true
+
+#For OPENAI on Azure
+OPENAI_API_BASE=
+OPENAI_API_VERSION=
+AZURE_DEPLOYMENT_NAME=
+AZURE_EMBEDDINGS_DEPLOYMENT_NAME=
--- a/application/Dockerfile
+++ b/application/Dockerfile
@@ -0,0 +1,87 @@
+# Builder Stage
+FROM ubuntu:24.04 as builder
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && \
+    apt-get install -y software-properties-common && \
+    add-apt-repository ppa:deadsnakes/ppa && \
+    apt-get update && \
+    apt-get install -y --no-install-recommends gcc wget unzip libc6-dev python3.12 python3.12-venv && \
+    rm -rf /var/lib/apt/lists/* 
+
+# Verify Python installation and setup symlink
+RUN if [ -f /usr/bin/python3.12 ]; then \
+        ln -s /usr/bin/python3.12 /usr/bin/python; \
+    else \
+        echo "Python 3.12 not found"; exit 1; \
+    fi
+
+# Download and unzip the model
+RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip && \
+    unzip mpnet-base-v2.zip -d models && \
+    rm mpnet-base-v2.zip
+
+# Install Rust
+RUN wget -q -O - https://sh.rustup.rs | sh -s -- -y
+
+# Clean up to reduce container size
+RUN apt-get remove --purge -y wget unzip && apt-get autoremove -y && rm -rf /var/lib/apt/lists/*
+
+# Copy requirements.txt
+COPY requirements.txt .
+
+# Setup Python virtual environment
+RUN python3.12 -m venv /venv
+
+# Activate virtual environment and install Python packages
+ENV PATH="/venv/bin:$PATH"
+
+# Install Python packages
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir tiktoken && \
+    pip install --no-cache-dir -r requirements.txt
+
+# Final Stage
+FROM ubuntu:24.04 as final
+
+RUN apt-get update && \
+    apt-get install -y software-properties-common && \
+    add-apt-repository ppa:deadsnakes/ppa && \
+    apt-get update && apt-get install -y --no-install-recommends python3.12 && \
+    ln -s /usr/bin/python3.12 /usr/bin/python && \
+    rm -rf /var/lib/apt/lists/*
+
+# Set working directory
+WORKDIR /app
+
+# Create a non-root user: `appuser` (Feel free to choose a name)
+RUN groupadd -r appuser && \
+    useradd -r -g appuser -d /app -s /sbin/nologin -c "Docker image user" appuser
+
+# Copy the virtual environment and model from the builder stage
+COPY --from=builder /venv /venv
+
+COPY --from=builder /models /app/models
+
+# Copy your application code
+COPY . /app/application
+
+# Change the ownership of the /app directory to the appuser
+	
+RUN mkdir -p /app/application/inputs/local
+RUN chown -R appuser:appuser /app
+
+# Set environment variables
+ENV FLASK_APP=app.py \
+    FLASK_DEBUG=true \
+    PATH="/venv/bin:$PATH"
+
+# Expose the port the app runs on
+EXPOSE 7091
+
+# Switch to non-root user
+USER appuser
+
+# Start Gunicorn
+CMD ["gunicorn", "-w", "1", "--timeout", "120", "--bind", "0.0.0.0:7091", "--preload", "application.wsgi:app"]
--- a/application/init.py
+++ b/application/init.py
--- a/application/agents/init.py
+++ b/application/agents/init.py
--- a/application/agents/agent_creator.py
+++ b/application/agents/agent_creator.py
@@ -0,0 +1,16 @@
+from application.agents.classic_agent import ClassicAgent
+from application.agents.react_agent import ReActAgent
+
+
+class AgentCreator:
+    agents = {
+        "classic": ClassicAgent,
+        "react": ReActAgent,
+    }
+
+    @classmethod
+    def create_agent(cls, type, *args, **kwargs):
+        agent_class = cls.agents.get(type.lower())
+        if not agent_class:
+            raise ValueError(f"No agent class found for type {type}")
+        return agent_class(*args, **kwargs)
--- a/application/agents/base.py
+++ b/application/agents/base.py
@@ -0,0 +1,327 @@
+import uuid
+from abc import ABC, abstractmethod
+from typing import Dict, Generator, List, Optional
+
+from bson.objectid import ObjectId
+
+from application.agents.tools.tool_action_parser import ToolActionParser
+from application.agents.tools.tool_manager import ToolManager
+
+from application.core.mongo_db import MongoDB
+from application.core.settings import settings
+
+from application.llm.handlers.handler_creator import LLMHandlerCreator
+from application.llm.llm_creator import LLMCreator
+from application.logging import build_stack_data, log_activity, LogContext
+from application.retriever.base import BaseRetriever
+
+
+class BaseAgent(ABC):
+    def __init__(
+        self,
+        endpoint: str,
+        llm_name: str,
+        gpt_model: str,
+        api_key: str,
+        user_api_key: Optional[str] = None,
+        prompt: str = "",
+        chat_history: Optional[List[Dict]] = None,
+        decoded_token: Optional[Dict] = None,
+        attachments: Optional[List[Dict]] = None,
+    ):
+        self.endpoint = endpoint
+        self.llm_name = llm_name
+        self.gpt_model = gpt_model
+        self.api_key = api_key
+        self.user_api_key = user_api_key
+        self.prompt = prompt
+        self.decoded_token = decoded_token or {}
+        self.user: str = decoded_token.get("sub")
+        self.tool_config: Dict = {}
+        self.tools: List[Dict] = []
+        self.tool_calls: List[Dict] = []
+        self.chat_history: List[Dict] = chat_history if chat_history is not None else []
+        self.llm = LLMCreator.create_llm(
+            llm_name,
+            api_key=api_key,
+            user_api_key=user_api_key,
+            decoded_token=decoded_token,
+        )
+        self.llm_handler = LLMHandlerCreator.create_handler(
+            llm_name if llm_name else "default"
+        )
+        self.attachments = attachments or []
+
+    @log_activity()
+    def gen(
+        self, query: str, retriever: BaseRetriever, log_context: LogContext = None
+    ) -> Generator[Dict, None, None]:
+        yield from self._gen_inner(query, retriever, log_context)
+
+    @abstractmethod
+    def _gen_inner(
+        self, query: str, retriever: BaseRetriever, log_context: LogContext
+    ) -> Generator[Dict, None, None]:
+        pass
+
+    def _get_tools(self, api_key: str = None) -> Dict[str, Dict]:
+        mongo = MongoDB.get_client()
+        db = mongo[settings.MONGO_DB_NAME]
+        agents_collection = db["agents"]
+        tools_collection = db["user_tools"]
+
+        agent_data = agents_collection.find_one({"key": api_key or self.user_api_key})
+        tool_ids = agent_data.get("tools", []) if agent_data else []
+
+        tools = (
+            tools_collection.find(
+                {"_id": {"$in": [ObjectId(tool_id) for tool_id in tool_ids]}}
+            )
+            if tool_ids
+            else []
+        )
+        tools = list(tools)
+        tools_by_id = {str(tool["_id"]): tool for tool in tools} if tools else {}
+
+        return tools_by_id
+
+    def _get_user_tools(self, user="local"):
+        mongo = MongoDB.get_client()
+        db = mongo[settings.MONGO_DB_NAME]
+        user_tools_collection = db["user_tools"]
+        user_tools = user_tools_collection.find({"user": user, "status": True})
+        user_tools = list(user_tools)
+        tools_by_id = {str(tool["_id"]): tool for tool in user_tools}
+        return tools_by_id
+
+    def _build_tool_parameters(self, action):
+        params = {"type": "object", "properties": {}, "required": []}
+        for param_type in ["query_params", "headers", "body", "parameters"]:
+            if param_type in action and action[param_type].get("properties"):
+                for k, v in action[param_type]["properties"].items():
+                    if v.get("filled_by_llm", True):
+                        params["properties"][k] = {
+                            key: value
+                            for key, value in v.items()
+                            if key != "filled_by_llm" and key != "value"
+                        }
+
+                        params["required"].append(k)
+        return params
+
+    def _prepare_tools(self, tools_dict):
+        self.tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": f"{action['name']}_{tool_id}",
+                    "description": action["description"],
+                    "parameters": self._build_tool_parameters(action),
+                },
+            }
+            for tool_id, tool in tools_dict.items()
+            if (
+                (tool["name"] == "api_tool" and "actions" in tool.get("config", {}))
+                or (tool["name"] != "api_tool" and "actions" in tool)
+            )
+            for action in (
+                tool["config"]["actions"].values()
+                if tool["name"] == "api_tool"
+                else tool["actions"]
+            )
+            if action.get("active", True)
+        ]
+
+    def _execute_tool_action(self, tools_dict, call):
+        parser = ToolActionParser(self.llm.__class__.__name__)
+        tool_id, action_name, call_args = parser.parse_args(call)
+
+        call_id = getattr(call, "id", None) or str(uuid.uuid4())
+        tool_call_data = {
+            "tool_name": tools_dict[tool_id]["name"],
+            "call_id": call_id,
+            "action_name": f"{action_name}_{tool_id}",
+            "arguments": call_args,
+        }
+        yield {"type": "tool_call", "data": {**tool_call_data, "status": "pending"}}
+
+        tool_data = tools_dict[tool_id]
+        action_data = (
+            tool_data["config"]["actions"][action_name]
+            if tool_data["name"] == "api_tool"
+            else next(
+                action
+                for action in tool_data["actions"]
+                if action["name"] == action_name
+            )
+        )
+
+        query_params, headers, body, parameters = {}, {}, {}, {}
+        param_types = {
+            "query_params": query_params,
+            "headers": headers,
+            "body": body,
+            "parameters": parameters,
+        }
+
+        for param_type, target_dict in param_types.items():
+            if param_type in action_data and action_data[param_type].get("properties"):
+                for param, details in action_data[param_type]["properties"].items():
+                    if param not in call_args and "value" in details:
+                        target_dict[param] = details["value"]
+        for param, value in call_args.items():
+            for param_type, target_dict in param_types.items():
+                if param_type in action_data and param in action_data[param_type].get(
+                    "properties", {}
+                ):
+                    target_dict[param] = value
+        tm = ToolManager(config={})
+        tool = tm.load_tool(
+            tool_data["name"],
+            tool_config=(
+                {
+                    "url": tool_data["config"]["actions"][action_name]["url"],
+                    "method": tool_data["config"]["actions"][action_name]["method"],
+                    "headers": headers,
+                    "query_params": query_params,
+                }
+                if tool_data["name"] == "api_tool"
+                else tool_data["config"]
+            ),
+        )
+        if tool_data["name"] == "api_tool":
+            print(
+                f"Executing api: {action_name} with query_params: {query_params}, headers: {headers}, body: {body}"
+            )
+            result = tool.execute_action(action_name, **body)
+        else:
+            print(f"Executing tool: {action_name} with args: {call_args}")
+            result = tool.execute_action(action_name, **parameters)
+        tool_call_data["result"] = (
+            f"{str(result)[:50]}..." if len(str(result)) > 50 else result
+        )
+
+        yield {"type": "tool_call", "data": {**tool_call_data, "status": "completed"}}
+        self.tool_calls.append(tool_call_data)
+
+        return result, call_id
+
+    def _get_truncated_tool_calls(self):
+        return [
+            {
+                **tool_call,
+                "result": (
+                    f"{str(tool_call['result'])[:50]}..."
+                    if len(str(tool_call["result"])) > 50
+                    else tool_call["result"]
+                ),
+                "status": "completed",
+            }
+            for tool_call in self.tool_calls
+        ]
+
+    def _build_messages(
+        self,
+        system_prompt: str,
+        query: str,
+        retrieved_data: List[Dict],
+    ) -> List[Dict]:
+        docs_together = "\n".join([doc["text"] for doc in retrieved_data])
+        p_chat_combine = system_prompt.replace("{summaries}", docs_together)
+        messages_combine = [{"role": "system", "content": p_chat_combine}]
+
+        for i in self.chat_history:
+            if "prompt" in i and "response" in i:
+                messages_combine.append({"role": "user", "content": i["prompt"]})
+                messages_combine.append({"role": "assistant", "content": i["response"]})
+            if "tool_calls" in i:
+                for tool_call in i["tool_calls"]:
+                    call_id = tool_call.get("call_id") or str(uuid.uuid4())
+
+                    function_call_dict = {
+                        "function_call": {
+                            "name": tool_call.get("action_name"),
+                            "args": tool_call.get("arguments"),
+                            "call_id": call_id,
+                        }
+                    }
+                    function_response_dict = {
+                        "function_response": {
+                            "name": tool_call.get("action_name"),
+                            "response": {"result": tool_call.get("result")},
+                            "call_id": call_id,
+                        }
+                    }
+
+                    messages_combine.append(
+                        {"role": "assistant", "content": [function_call_dict]}
+                    )
+                    messages_combine.append(
+                        {"role": "tool", "content": [function_response_dict]}
+                    )
+        messages_combine.append({"role": "user", "content": query})
+        return messages_combine
+
+    def _retriever_search(
+        self,
+        retriever: BaseRetriever,
+        query: str,
+        log_context: Optional[LogContext] = None,
+    ) -> List[Dict]:
+        retrieved_data = retriever.search(query)
+        if log_context:
+            data = build_stack_data(retriever, exclude_attributes=["llm"])
+            log_context.stacks.append({"component": "retriever", "data": data})
+        return retrieved_data
+
+    def _llm_gen(self, messages: List[Dict], log_context: Optional[LogContext] = None):
+        gen_kwargs = {"model": self.gpt_model, "messages": messages}
+
+        if (
+            hasattr(self.llm, "_supports_tools")
+            and self.llm._supports_tools
+            and self.tools
+        ):
+            gen_kwargs["tools"] = self.tools
+        resp = self.llm.gen_stream(**gen_kwargs)
+
+        if log_context:
+            data = build_stack_data(self.llm, exclude_attributes=["client"])
+            log_context.stacks.append({"component": "llm", "data": data})
+        return resp
+
+    def _llm_handler(
+        self,
+        resp,
+        tools_dict: Dict,
+        messages: List[Dict],
+        log_context: Optional[LogContext] = None,
+        attachments: Optional[List[Dict]] = None,
+    ):
+        resp = self.llm_handler.process_message_flow(
+            self, resp, tools_dict, messages, attachments, True
+        )
+        if log_context:
+            data = build_stack_data(self.llm_handler, exclude_attributes=["tool_calls"])
+            log_context.stacks.append({"component": "llm_handler", "data": data})
+        return resp
+
+    def _handle_response(self, response, tools_dict, messages, log_context):
+        if isinstance(response, str):
+            yield {"answer": response}
+            return
+        if hasattr(response, "message") and getattr(response.message, "content", None):
+            yield {"answer": response.message.content}
+            return
+
+        processed_response_gen = self._llm_handler(
+            response, tools_dict, messages, log_context, self.attachments
+        )
+
+        for event in processed_response_gen:
+            if isinstance(event, str):
+                yield {"answer": event}
+            elif hasattr(event, "message") and getattr(event.message, "content", None):
+                yield {"answer": event.message.content}
+            elif isinstance(event, dict) and "type" in event:
+                yield event
--- a/application/agents/classic_agent.py
+++ b/application/agents/classic_agent.py
@@ -0,0 +1,53 @@
+from typing import Dict, Generator
+from application.agents.base import BaseAgent
+from application.logging import LogContext
+from application.retriever.base import BaseRetriever
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class ClassicAgent(BaseAgent):
+    """A simplified classic agent with clear execution flow.
+
+    Usage:
+    1. Processes a query through retrieval
+    2. Sets up available tools
+    3. Generates responses using LLM
+    4. Handles tool interactions if needed
+    5. Returns standardized outputs
+
+    Easy to extend by overriding specific steps.
+    """
+
+    def _gen_inner(
+        self, query: str, retriever: BaseRetriever, log_context: LogContext
+    ) -> Generator[Dict, None, None]:
+        # Step 1: Retrieve relevant data
+        retrieved_data = self._retriever_search(retriever, query, log_context)
+
+        # Step 2: Prepare tools
+        tools_dict = (
+            self._get_user_tools(self.user)
+            if not self.user_api_key
+            else self._get_tools(self.user_api_key)
+        )
+        self._prepare_tools(tools_dict)
+
+        # Step 3: Build and process messages
+        messages = self._build_messages(self.prompt, query, retrieved_data)
+        llm_response = self._llm_gen(messages, log_context)
+
+        # Step 4: Handle the response
+        yield from self._handle_response(
+            llm_response, tools_dict, messages, log_context
+        )
+
+        # Step 5: Return metadata
+        yield {"sources": retrieved_data}
+        yield {"tool_calls": self._get_truncated_tool_calls()}
+
+        # Log tool calls for debugging
+        log_context.stacks.append(
+            {"component": "agent", "data": {"tool_calls": self.tool_calls.copy()}}
+        )
--- a/application/agents/react_agent.py
+++ b/application/agents/react_agent.py
@@ -0,0 +1,229 @@
+import os
+from typing import Dict, Generator, List, Any
+import logging
+
+from application.agents.base import BaseAgent
+from application.logging import build_stack_data, LogContext
+from application.retriever.base import BaseRetriever
+
+logger = logging.getLogger(__name__)
+
+current_dir = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+)
+with open(
+    os.path.join(current_dir, "application/prompts", "react_planning_prompt.txt"), "r"
+) as f:
+    planning_prompt_template = f.read()
+with open(
+    os.path.join(current_dir, "application/prompts", "react_final_prompt.txt"),
+    "r",
+) as f:
+    final_prompt_template = f.read()
+    
+MAX_ITERATIONS_REASONING = 10
+
+class ReActAgent(BaseAgent):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.plan: str = ""
+        self.observations: List[str] = []
+
+    def _extract_content_from_llm_response(self, resp: Any) -> str:
+        """
+        Helper to extract string content from various LLM response types.
+        Handles strings, message objects (OpenAI-like), and streams.
+        Adapt stream handling for your specific LLM client if not OpenAI.
+        """
+        collected_content = []
+        if isinstance(resp, str):
+            collected_content.append(resp)
+        elif ( # OpenAI non-streaming or Anthropic non-streaming (older SDK style)
+            hasattr(resp, "message")
+            and hasattr(resp.message, "content")
+            and resp.message.content is not None
+        ):
+            collected_content.append(resp.message.content)
+        elif ( # OpenAI non-streaming (Pydantic model), Anthropic new SDK non-streaming
+            hasattr(resp, "choices") and resp.choices and
+            hasattr(resp.choices[0], "message") and
+            hasattr(resp.choices[0].message, "content") and
+            resp.choices[0].message.content is not None
+        ):
+            collected_content.append(resp.choices[0].message.content) # OpenAI
+        elif ( # Anthropic new SDK non-streaming content block
+             hasattr(resp, "content") and isinstance(resp.content, list) and resp.content and
+             hasattr(resp.content[0], "text")
+        ):
+            collected_content.append(resp.content[0].text) # Anthropic
+        else:
+            # Assume resp is a stream if not a recognized object
+            try:
+                for chunk in resp: # This will fail if resp is not iterable (e.g. a non-streaming response object)
+                    content_piece = ""
+                    # OpenAI-like stream
+                    if hasattr(chunk, 'choices') and len(chunk.choices) > 0 and \
+                       hasattr(chunk.choices[0], 'delta') and \
+                       hasattr(chunk.choices[0].delta, 'content') and \
+                       chunk.choices[0].delta.content is not None:
+                        content_piece = chunk.choices[0].delta.content
+                    # Anthropic-like stream (ContentBlockDelta)
+                    elif hasattr(chunk, 'type') and chunk.type == 'content_block_delta' and \
+                         hasattr(chunk, 'delta') and hasattr(chunk.delta, 'text'):
+                        content_piece = chunk.delta.text
+                    elif isinstance(chunk, str): # Simplest case: stream of strings
+                        content_piece = chunk
+
+                    if content_piece:
+                        collected_content.append(content_piece)
+            except TypeError: # If resp is not iterable (e.g. a final response object that wasn't caught above)
+                logger.debug(f"Response type {type(resp)} could not be iterated as a stream. It might be a non-streaming object not handled by specific checks.")
+            except Exception as e:
+                logger.error(f"Error processing potential stream chunk: {e}, chunk was: {getattr(chunk, '__dict__', chunk)}")
+
+
+        return "".join(collected_content)
+
+    def _gen_inner(
+        self, query: str, retriever: BaseRetriever, log_context: LogContext
+    ) -> Generator[Dict, None, None]:
+        # Reset state for this generation call
+        self.plan = ""
+        self.observations = []
+        retrieved_data = self._retriever_search(retriever, query, log_context)
+
+        if self.user_api_key:
+            tools_dict = self._get_tools(self.user_api_key)
+        else:
+            tools_dict = self._get_user_tools(self.user)
+        self._prepare_tools(tools_dict)
+
+        docs_together = "\n".join([doc["text"] for doc in retrieved_data])
+        iterating_reasoning = 0
+        while iterating_reasoning < MAX_ITERATIONS_REASONING:
+            iterating_reasoning += 1
+            # 1. Create Plan
+            logger.info("ReActAgent: Creating plan...")
+            plan_stream = self._create_plan(query, docs_together, log_context)
+            current_plan_parts = []
+            yield {"thought": f"Reasoning... (iteration {iterating_reasoning})\n\n"}
+            for line_chunk in plan_stream:
+                current_plan_parts.append(line_chunk)
+                yield {"thought": line_chunk}
+            self.plan = "".join(current_plan_parts)
+            if self.plan:
+                self.observations.append(f"Plan: {self.plan} Iteration: {iterating_reasoning}")
+
+
+            max_obs_len = 20000
+            obs_str = "\n".join(self.observations)
+            if len(obs_str) > max_obs_len:
+                obs_str = obs_str[:max_obs_len] + "\n...[observations truncated]"
+            execution_prompt_str = (
+                (self.prompt or "")
+                + f"\n\nFollow this plan:\n{self.plan}"
+                + f"\n\nObservations:\n{obs_str}"
+                + f"\n\nIf there is enough data to complete user query '{query}', Respond with 'SATISFIED' only. Otherwise, continue. Dont Menstion 'SATISFIED' in your response if you are not ready. "
+            )
+            
+            messages = self._build_messages(execution_prompt_str, query, retrieved_data)
+
+            resp_from_llm_gen = self._llm_gen(messages, log_context)
+
+            initial_llm_thought_content = self._extract_content_from_llm_response(resp_from_llm_gen)
+            if initial_llm_thought_content:
+                self.observations.append(f"Initial thought/response: {initial_llm_thought_content}")
+            else:
+                logger.info("ReActAgent: Initial LLM response (before handler) had no textual content (might be only tool calls).")
+            resp_after_handler = self._llm_handler(resp_from_llm_gen, tools_dict, messages, log_context)
+            
+            for tool_call_info in self.tool_calls: # Iterate over self.tool_calls populated by _llm_handler
+                observation_string = (
+                    f"Executed Action: Tool '{tool_call_info.get('tool_name', 'N/A')}' "
+                    f"with arguments '{tool_call_info.get('arguments', '{}')}'. Result: '{str(tool_call_info.get('result', ''))[:200]}...'"
+                )
+                self.observations.append(observation_string)
+
+            content_after_handler = self._extract_content_from_llm_response(resp_after_handler)
+            if content_after_handler:
+                self.observations.append(f"Response after tool execution: {content_after_handler}")
+            else:
+                logger.info("ReActAgent: LLM response after handler had no textual content.")
+
+            if log_context:
+                log_context.stacks.append(
+                    {"component": "agent_tool_calls", "data": {"tool_calls": self.tool_calls.copy()}}
+                )
+
+            yield {"sources": retrieved_data}
+
+            display_tool_calls = []
+            for tc in self.tool_calls:
+                cleaned_tc = tc.copy()
+                if len(str(cleaned_tc.get("result", ""))) > 50:
+                    cleaned_tc["result"] = str(cleaned_tc["result"])[:50] + "..."
+                display_tool_calls.append(cleaned_tc)
+            if display_tool_calls:
+                yield {"tool_calls": display_tool_calls}
+            
+            if "SATISFIED" in content_after_handler:
+                logger.info("ReActAgent: LLM satisfied with the plan and data. Stopping reasoning.")
+                break
+
+        # 3. Create Final Answer based on all observations
+        final_answer_stream = self._create_final_answer(query, self.observations, log_context)
+        for answer_chunk in final_answer_stream:
+            yield {"answer": answer_chunk}
+        logger.info("ReActAgent: Finished generating final answer.")
+
+    def _create_plan(
+        self, query: str, docs_data: str, log_context: LogContext = None
+    ) -> Generator[str, None, None]:
+        plan_prompt_filled = planning_prompt_template.replace("{query}", query)
+        if "{summaries}" in plan_prompt_filled:
+            summaries = docs_data if docs_data else "No documents retrieved."
+            plan_prompt_filled = plan_prompt_filled.replace("{summaries}", summaries)
+        plan_prompt_filled = plan_prompt_filled.replace("{prompt}", self.prompt or "")
+        plan_prompt_filled = plan_prompt_filled.replace("{observations}", "\n".join(self.observations))
+
+        messages = [{"role": "user", "content": plan_prompt_filled}]
+
+        plan_stream_from_llm = self.llm.gen_stream(
+            model=self.gpt_model, messages=messages, tools=getattr(self, 'tools', None) # Use self.tools
+        )
+        if log_context:
+            data = build_stack_data(self.llm)
+            log_context.stacks.append({"component": "planning_llm", "data": data})
+
+        for chunk in plan_stream_from_llm:
+            content_piece = self._extract_content_from_llm_response(chunk)
+            if content_piece:
+                yield content_piece
+
+    def _create_final_answer(
+        self, query: str, observations: List[str], log_context: LogContext = None
+    ) -> Generator[str, None, None]:
+        observation_string = "\n".join(observations)
+        max_obs_len = 10000
+        if len(observation_string) > max_obs_len:
+            observation_string = observation_string[:max_obs_len] + "\n...[observations truncated]"
+            logger.warning("ReActAgent: Truncated observations for final answer prompt due to length.")
+
+        final_answer_prompt_filled = final_prompt_template.format(
+            query=query, observations=observation_string
+        )
+
+        messages = [{"role": "user", "content": final_answer_prompt_filled}]
+
+        # Final answer should synthesize, not call tools.
+        final_answer_stream_from_llm = self.llm.gen_stream(
+            model=self.gpt_model, messages=messages, tools=None
+        )
+        if log_context:
+            data = build_stack_data(self.llm)
+            log_context.stacks.append({"component": "final_answer_llm", "data": data})
+
+        for chunk in final_answer_stream_from_llm:
+            content_piece = self._extract_content_from_llm_response(chunk)
+            if content_piece:
+                yield content_piece
--- a/application/agents/tools/api_tool.py
+++ b/application/agents/tools/api_tool.py
@@ -0,0 +1,72 @@
+import json
+
+import requests
+from application.agents.tools.base import Tool
+
+
+class APITool(Tool):
+    """
+    API Tool
+    A flexible tool for performing various API actions (e.g., sending messages, retrieving data) via custom user-specified APIs
+    """
+
+    def __init__(self, config):
+        self.config = config
+        self.url = config.get("url", "")
+        self.method = config.get("method", "GET")
+        self.headers = config.get("headers", {"Content-Type": "application/json"})
+        self.query_params = config.get("query_params", {})
+
+    def execute_action(self, action_name, **kwargs):
+        return self._make_api_call(
+            self.url, self.method, self.headers, self.query_params, kwargs
+        )
+
+    def _make_api_call(self, url, method, headers, query_params, body):
+        if query_params:
+            url = f"{url}?{requests.compat.urlencode(query_params)}"
+        # if isinstance(body, dict):
+        #     body = json.dumps(body)
+        try:
+            print(f"Making API call: {method} {url} with body: {body}")
+            if body == "{}":
+                body = None
+            response = requests.request(method, url, headers=headers, data=body)
+            response.raise_for_status()
+            content_type = response.headers.get(
+                "Content-Type", "application/json"
+            ).lower()
+            if "application/json" in content_type:
+                try:
+                    data = response.json()
+                except json.JSONDecodeError as e:
+                    print(f"Error decoding JSON: {e}.  Raw response: {response.text}")
+                    return {
+                        "status_code": response.status_code,
+                        "message": f"API call returned invalid JSON.  Error: {e}",
+                        "data": response.text,
+                    }
+            elif "text/" in content_type or "application/xml" in content_type:
+                data = response.text
+            elif not response.content:
+                data = None
+            else:
+                print(f"Unsupported content type: {content_type}")
+                data = response.content
+
+            return {
+                "status_code": response.status_code,
+                "data": data,
+                "message": "API call successful.",
+            }
+        except requests.exceptions.RequestException as e:
+            return {
+                "status_code": response.status_code if response else None,
+                "message": f"API call failed: {str(e)}",
+            }
+
+    def get_actions_metadata(self):
+        return []
+
+    def get_config_requirements(self):
+        return {}
--- a/application/agents/tools/base.py
+++ b/application/agents/tools/base.py
@@ -0,0 +1,21 @@
+from abc import ABC, abstractmethod
+
+
+class Tool(ABC):
+    @abstractmethod
+    def execute_action(self, action_name: str, **kwargs):
+        pass
+
+    @abstractmethod
+    def get_actions_metadata(self):
+        """
+        Returns a list of JSON objects describing the actions supported by the tool.
+        """
+        pass
+
+    @abstractmethod
+    def get_config_requirements(self):
+        """
+        Returns a dictionary describing the configuration requirements for the tool.
+        """
+        pass
--- a/application/agents/tools/brave.py
+++ b/application/agents/tools/brave.py
@@ -0,0 +1,217 @@
+import requests
+from application.agents.tools.base import Tool
+
+
+class BraveSearchTool(Tool):
+    """
+    Brave Search
+    A tool for performing web and image searches using the Brave Search API.
+    Requires an API key for authentication.
+    """
+
+    def __init__(self, config):
+        self.config = config
+        self.token = config.get("token", "")
+        self.base_url = "https://api.search.brave.com/res/v1"
+
+    def execute_action(self, action_name, **kwargs):
+        actions = {
+            "brave_web_search": self._web_search,
+            "brave_image_search": self._image_search,
+        }
+
+        if action_name in actions:
+            return actions[action_name](**kwargs)
+        else:
+            raise ValueError(f"Unknown action: {action_name}")
+
+    def _web_search(self, query, country="ALL", search_lang="en", count=10, 
+                   offset=0, safesearch="off", freshness=None, 
+                   result_filter=None, extra_snippets=False, summary=False):
+        """
+        Performs a web search using the Brave Search API.
+        """
+        print(f"Performing Brave web search for: {query}")
+        
+        url = f"{self.base_url}/web/search"
+        
+        # Build query parameters
+        params = {
+            "q": query,
+            "country": country,
+            "search_lang": search_lang,
+            "count": min(count, 20),
+            "offset": min(offset, 9),
+            "safesearch": safesearch
+        }
+        
+        # Add optional parameters only if they have values
+        if freshness:
+            params["freshness"] = freshness
+        if result_filter:
+            params["result_filter"] = result_filter
+        if extra_snippets:
+            params["extra_snippets"] = 1
+        if summary:
+            params["summary"] = 1
+        
+        # Set up headers
+        headers = {
+            "Accept": "application/json",
+            "Accept-Encoding": "gzip",
+            "X-Subscription-Token": self.token
+        }
+        
+        # Make the request
+        response = requests.get(url, params=params, headers=headers)
+        
+        if response.status_code == 200:
+            return {
+                "status_code": response.status_code,
+                "results": response.json(),
+                "message": "Search completed successfully."
+            }
+        else:
+            return {
+                "status_code": response.status_code,
+                "message": f"Search failed with status code: {response.status_code}."
+            }
+    
+    def _image_search(self, query, country="ALL", search_lang="en", count=5, 
+                     safesearch="off", spellcheck=False):
+        """
+        Performs an image search using the Brave Search API.
+        """
+        print(f"Performing Brave image search for: {query}")
+        
+        url = f"{self.base_url}/images/search"
+        
+        # Build query parameters
+        params = {
+            "q": query,
+            "country": country,
+            "search_lang": search_lang,
+            "count": min(count, 100),  # API max is 100
+            "safesearch": safesearch,
+            "spellcheck": 1 if spellcheck else 0
+        }
+        
+        # Set up headers
+        headers = {
+            "Accept": "application/json",
+            "Accept-Encoding": "gzip",
+            "X-Subscription-Token": self.token
+        }
+        
+        # Make the request
+        response = requests.get(url, params=params, headers=headers)
+        
+        if response.status_code == 200:
+            return {
+                "status_code": response.status_code,
+                "results": response.json(),
+                "message": "Image search completed successfully."
+            }
+        else:
+            return {
+                "status_code": response.status_code,
+                "message": f"Image search failed with status code: {response.status_code}."
+            }
+
+    def get_actions_metadata(self):
+        return [
+            {
+                "name": "brave_web_search",
+                "description": "Perform a web search using Brave Search",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "The search query (max 400 characters, 50 words)",
+                        },
+                        # "country": {
+                        #     "type": "string",
+                        #     "description": "The 2-character country code (default: US)",
+                        # },
+                        "search_lang": {
+                            "type": "string",
+                            "description": "The search language preference (default: en)",
+                        },
+                        # "count": {
+                        #     "type": "integer",
+                        #     "description": "Number of results to return (max 20, default: 10)",
+                        # },
+                        # "offset": {
+                        #     "type": "integer",
+                        #     "description": "Pagination offset (max 9, default: 0)",
+                        # },
+                        # "safesearch": {
+                        #     "type": "string",
+                        #     "description": "Filter level for adult content (off, moderate, strict)",
+                        # },
+                        "freshness": {
+                            "type": "string",
+                            "description": "Time filter for results (pd: last 24h, pw: last week, pm: last month, py: last year)",
+                        },
+                        # "result_filter": {
+                        #     "type": "string",
+                        #     "description": "Comma-delimited list of result types to include",
+                        # },
+                        # "extra_snippets": {
+                        #     "type": "boolean",
+                        #     "description": "Get additional excerpts from result pages",
+                        # },
+                        # "summary": {
+                        #     "type": "boolean",
+                        #     "description": "Enable summary generation in search results",
+                        # }
+                    },
+                    "required": ["query"],
+                    "additionalProperties": False,
+                },
+            },
+            {
+                "name": "brave_image_search",
+                "description": "Perform an image search using Brave Search",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "The search query (max 400 characters, 50 words)",
+                        },
+                        # "country": {
+                        #     "type": "string",
+                        #     "description": "The 2-character country code (default: US)",
+                        # },
+                        # "search_lang": {
+                        #     "type": "string",
+                        #     "description": "The search language preference (default: en)",
+                        # },
+                        "count": {
+                            "type": "integer",
+                            "description": "Number of results to return (max 100, default: 5)",
+                        },
+                        # "safesearch": {
+                        #     "type": "string",
+                        #     "description": "Filter level for adult content (off, strict). Default: strict",
+                        # },
+                        # "spellcheck": {
+                        #     "type": "boolean",
+                        #     "description": "Whether to spellcheck provided query (default: true)",
+                        # }
+                    },
+                    "required": ["query"],
+                    "additionalProperties": False,
+                },
+            }
+        ]
+
+    def get_config_requirements(self):
+        return {
+            "token": {
+                "type": "string", 
+                "description": "Brave Search API key for authentication"
+            },
+        }
--- a/application/agents/tools/cryptoprice.py
+++ b/application/agents/tools/cryptoprice.py
@@ -0,0 +1,76 @@
+import requests
+from application.agents.tools.base import Tool
+
+
+class CryptoPriceTool(Tool):
+    """
+    CryptoPrice
+    A tool for retrieving cryptocurrency prices using the CryptoCompare public API
+    """
+
+    def __init__(self, config):
+        self.config = config
+
+    def execute_action(self, action_name, **kwargs):
+        actions = {"cryptoprice_get": self._get_price}
+
+        if action_name in actions:
+            return actions[action_name](**kwargs)
+        else:
+            raise ValueError(f"Unknown action: {action_name}")
+
+    def _get_price(self, symbol, currency):
+        """
+        Fetches the current price of a given cryptocurrency symbol in the specified currency.
+        Example:
+            symbol = "BTC"
+            currency = "USD"
+            returns price in USD.
+        """
+        url = f"https://min-api.cryptocompare.com/data/price?fsym={symbol.upper()}&tsyms={currency.upper()}"
+        response = requests.get(url)
+        if response.status_code == 200:
+            data = response.json()
+            if currency.upper() in data:
+                return {
+                    "status_code": response.status_code,
+                    "price": data[currency.upper()],
+                    "message": f"Price of {symbol.upper()} in {currency.upper()} retrieved successfully.",
+                }
+            else:
+                return {
+                    "status_code": response.status_code,
+                    "message": f"Couldn't find price for {symbol.upper()} in {currency.upper()}.",
+                }
+        else:
+            return {
+                "status_code": response.status_code,
+                "message": "Failed to retrieve price.",
+            }
+
+    def get_actions_metadata(self):
+        return [
+            {
+                "name": "cryptoprice_get",
+                "description": "Retrieve the price of a specified cryptocurrency in a given currency",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "symbol": {
+                            "type": "string",
+                            "description": "The cryptocurrency symbol (e.g. BTC)",
+                        },
+                        "currency": {
+                            "type": "string",
+                            "description": "The currency in which you want the price (e.g. USD)",
+                        },
+                    },
+                    "required": ["symbol", "currency"],
+                    "additionalProperties": False,
+                },
+            }
+        ]
+
+    def get_config_requirements(self):
+        # No specific configuration needed for this tool as it just queries a public endpoint
+        return {}
--- a/application/agents/tools/ntfy.py
+++ b/application/agents/tools/ntfy.py
@@ -0,0 +1,127 @@
+import requests
+from application.agents.tools.base import Tool
+
+class NtfyTool(Tool):
+    """
+    Ntfy Tool
+    A tool for sending notifications to ntfy topics on a specified server.
+    """
+
+    def __init__(self, config):
+        """
+        Initialize the NtfyTool with configuration.
+
+        Args:
+            config (dict): Configuration dictionary containing the access token.
+        """
+        self.config = config
+        self.token = config.get("token", "")
+
+    def execute_action(self, action_name, **kwargs):
+        """
+        Execute the specified action with given parameters.
+
+        Args:
+            action_name (str): Name of the action to execute.
+            **kwargs: Parameters for the action, including server_url.
+
+        Returns:
+            dict: Result of the action with status code and message.
+
+        Raises:
+            ValueError: If the action name is unknown.
+        """
+        actions = {
+            "ntfy_send_message": self._send_message,
+        }
+        if action_name in actions:
+            return actions[action_name](**kwargs)
+        else:
+            raise ValueError(f"Unknown action: {action_name}")
+
+    def _send_message(self, server_url, message, topic, title=None, priority=None):
+        """
+        Send a message to an ntfy topic on the specified server.
+
+        Args:
+            server_url (str): Base URL of the ntfy server (e.g., https://ntfy.sh).
+            message (str): The message text to send.
+            topic (str): The topic to send the message to.
+            title (str, optional): Title of the notification.
+            priority (int, optional): Priority of the notification (1-5).
+
+        Returns:
+            dict: Response with status code and a confirmation message.
+
+        Raises:
+            ValueError: If priority is not an integer between 1 and 5.
+        """
+        url = f"{server_url.rstrip('/')}/{topic}"
+        headers = {}
+        if title:
+            headers["X-Title"] = title
+        if priority:
+            try:
+                priority = int(priority)
+            except (ValueError, TypeError):
+                raise ValueError("Priority must be convertible to an integer")
+            if priority < 1 or priority > 5:
+                raise ValueError("Priority must be an integer between 1 and 5")
+            headers["X-Priority"] = str(priority)
+        if self.token:
+            headers["Authorization"] = f"Basic {self.token}"
+        data = message.encode("utf-8")
+        response = requests.post(url, headers=headers, data=data)
+        return {"status_code": response.status_code, "message": "Message sent"}
+
+    def get_actions_metadata(self):
+        """
+        Provide metadata about available actions.
+
+        Returns:
+            list: List of dictionaries describing each action.
+        """
+        return [
+            {
+                "name": "ntfy_send_message",
+                "description": "Send a notification to an ntfy topic",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "server_url": {
+                            "type": "string",
+                            "description": "Base URL of the ntfy server",
+                        },
+                        "message": {
+                            "type": "string",
+                            "description": "Text to send in the notification",
+                        },
+                        "topic": {
+                            "type": "string",
+                            "description": "Topic to send the notification to",
+                        },
+                        "title": {
+                            "type": "string",
+                            "description": "Title of the notification (optional)",
+                        },
+                        "priority": {
+                            "type": "integer",
+                            "description": "Priority of the notification (1-5, optional)",
+                        },
+                    },
+                    "required": ["server_url", "message", "topic"],
+                    "additionalProperties": False,
+                },
+            },
+        ]
+
+    def get_config_requirements(self):
+        """
+        Specify the configuration requirements.
+
+        Returns:
+            dict: Dictionary describing required config parameters.
+        """
+        return {
+            "token": {"type": "string", "description": "Access token for authentication"},
+        }
--- a/application/agents/tools/postgres.py
+++ b/application/agents/tools/postgres.py
@@ -0,0 +1,163 @@
+import psycopg2
+from application.agents.tools.base import Tool
+
+class PostgresTool(Tool):
+    """
+    PostgreSQL Database Tool
+    A tool for connecting to a PostgreSQL database using a connection string,
+    executing SQL queries, and retrieving schema information.
+    """
+
+    def __init__(self, config):
+        self.config = config
+        self.connection_string = config.get("token", "")
+
+    def execute_action(self, action_name, **kwargs):
+        actions = {
+            "postgres_execute_sql": self._execute_sql,
+            "postgres_get_schema": self._get_schema,
+        }
+
+        if action_name in actions:
+            return actions[action_name](**kwargs)
+        else:
+            raise ValueError(f"Unknown action: {action_name}")
+
+    def _execute_sql(self, sql_query):
+        """
+        Executes an SQL query against the PostgreSQL database using a connection string.
+        """
+        conn = None  # Initialize conn to None for error handling
+        try:
+            conn = psycopg2.connect(self.connection_string)
+            cur = conn.cursor()
+            cur.execute(sql_query)
+            conn.commit()
+
+            if sql_query.strip().lower().startswith("select"):
+                column_names = [desc[0] for desc in cur.description] if cur.description else []
+                results = []
+                rows = cur.fetchall()
+                for row in rows:
+                    results.append(dict(zip(column_names, row)))
+                response_data = {"data": results, "column_names": column_names}
+            else:
+                row_count = cur.rowcount
+                response_data = {"message": f"Query executed successfully, {row_count} rows affected."}
+
+            cur.close()
+            return {
+                "status_code": 200,
+                "message": "SQL query executed successfully.",
+                "response_data": response_data,
+            }
+
+        except psycopg2.Error as e:
+            error_message = f"Database error: {e}"
+            print(f"Database error: {e}")
+            return {
+                "status_code": 500,
+                "message": "Failed to execute SQL query.",
+                "error": error_message,
+            }
+        finally:
+            if conn:  # Ensure connection is closed even if errors occur
+                conn.close()
+
+    def _get_schema(self, db_name):
+        """
+        Retrieves the schema of the PostgreSQL database using a connection string.
+        """
+        conn = None # Initialize conn to None for error handling
+        try:
+            conn = psycopg2.connect(self.connection_string)
+            cur = conn.cursor()
+
+            cur.execute("""
+                SELECT
+                    table_name,
+                    column_name,
+                    data_type,
+                    column_default,
+                    is_nullable
+                FROM
+                    information_schema.columns
+                WHERE
+                    table_schema = 'public'
+                ORDER BY
+                    table_name,
+                    ordinal_position;
+            """)
+
+            schema_data = {}
+            for row in cur.fetchall():
+                table_name, column_name, data_type, column_default, is_nullable = row
+                if table_name not in schema_data:
+                    schema_data[table_name] = []
+                schema_data[table_name].append({
+                    "column_name": column_name,
+                    "data_type": data_type,
+                    "column_default": column_default,
+                    "is_nullable": is_nullable
+                })
+
+            cur.close()
+            return {
+                "status_code": 200,
+                "message": "Database schema retrieved successfully.",
+                "schema": schema_data,
+            }
+
+        except psycopg2.Error as e:
+            error_message = f"Database error: {e}"
+            print(f"Database error: {e}")
+            return {
+                "status_code": 500,
+                "message": "Failed to retrieve database schema.",
+                "error": error_message,
+            }
+        finally:
+            if conn: # Ensure connection is closed even if errors occur
+                conn.close()
+
+    def get_actions_metadata(self):
+        return [
+            {
+                "name": "postgres_execute_sql",
+                "description": "Execute an SQL query against the PostgreSQL database and return the results. Use this tool to interact with the database, e.g., retrieve specific data or perform updates. Only SELECT queries will return data, other queries will return execution status.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "sql_query": {
+                            "type": "string",
+                            "description": "The SQL query to execute.",
+                        },
+                    },
+                    "required": ["sql_query"],
+                    "additionalProperties": False,
+                },
+            },
+            {
+                "name": "postgres_get_schema",
+                "description": "Retrieve the schema of the PostgreSQL database, including tables and their columns. Use this to understand the database structure before executing queries. db_name is 'default' if not provided.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "db_name": {
+                            "type": "string",
+                            "description": "The name of the database to retrieve the schema for.",
+                        },
+                    },
+                    "required": ["db_name"],
+                    "additionalProperties": False,
+                },
+            },
+        ]
+
+    def get_config_requirements(self):
+        return {
+            "token": {
+                "type": "string",
+                "description": "PostgreSQL database connection string (e.g., 'postgresql://user:password@host:port/dbname')",
+            },
+        }
--- a/application/agents/tools/read_webpage.py
+++ b/application/agents/tools/read_webpage.py
@@ -0,0 +1,83 @@
+import requests
+from markdownify import markdownify
+from application.agents.tools.base import Tool
+from urllib.parse import urlparse
+
+class ReadWebpageTool(Tool):
+    """
+    Read Webpage (browser)
+    A tool to fetch the HTML content of a URL and convert it to Markdown.
+    """
+
+    def __init__(self, config=None):
+        """
+        Initializes the tool.
+        :param config: Optional configuration dictionary. Not used by this tool.
+        """
+        self.config = config
+
+    def execute_action(self, action_name: str, **kwargs) -> str:
+        """
+        Executes the specified action. For this tool, the only action is 'read_webpage'.
+
+        :param action_name: The name of the action to execute. Should be 'read_webpage'.
+        :param kwargs: Keyword arguments, must include 'url'.
+        :return: The Markdown content of the webpage or an error message.
+        """
+        if action_name != "read_webpage":
+            return f"Error: Unknown action '{action_name}'. This tool only supports 'read_webpage'."
+
+        url = kwargs.get("url")
+        if not url:
+            return "Error: URL parameter is missing."
+
+        # Ensure the URL has a scheme (if not, default to http)
+        parsed_url = urlparse(url)
+        if not parsed_url.scheme:
+            url = "http://" + url
+        
+        try:
+            response = requests.get(url, timeout=10, headers={'User-Agent': 'DocsGPT-Agent/1.0'})
+            response.raise_for_status()  # Raise an exception for HTTP errors (4xx or 5xx)
+            
+            html_content = response.text
+            #soup = BeautifulSoup(html_content, 'html.parser')
+            
+            
+            markdown_content = markdownify(html_content, heading_style="ATX", newline_style="BACKSLASH")
+            
+            return markdown_content
+
+        except requests.exceptions.RequestException as e:
+            return f"Error fetching URL {url}: {e}"
+        except Exception as e:
+            return f"Error processing URL {url}: {e}"
+
+    def get_actions_metadata(self):
+        """
+        Returns metadata for the actions supported by this tool.
+        """
+        return [
+            {
+                "name": "read_webpage",
+                "description": "Fetches the HTML content of a given URL and returns it as clean Markdown text. Input must be a valid URL.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "url": {
+                            "type": "string",
+                            "description": "The fully qualified URL of the webpage to read (e.g., 'https://www.example.com').",
+                        }
+                    },
+                    "required": ["url"],
+                    "additionalProperties": False,
+                },
+            }
+        ]
+
+    def get_config_requirements(self):
+        """
+        Returns a dictionary describing the configuration requirements for the tool.
+        This tool does not require any specific configuration.
+        """
+        return {}
--- a/application/agents/tools/telegram.py
+++ b/application/agents/tools/telegram.py
@@ -0,0 +1,86 @@
+import requests
+from application.agents.tools.base import Tool
+
+
+class TelegramTool(Tool):
+    """
+    Telegram Bot
+    A flexible Telegram tool for performing various actions (e.g., sending messages, images).
+    Requires a bot token and chat ID for configuration
+    """
+
+    def __init__(self, config):
+        self.config = config
+        self.token = config.get("token", "")
+
+    def execute_action(self, action_name, **kwargs):
+        actions = {
+            "telegram_send_message": self._send_message,
+            "telegram_send_image": self._send_image,
+        }
+
+        if action_name in actions:
+            return actions[action_name](**kwargs)
+        else:
+            raise ValueError(f"Unknown action: {action_name}")
+
+    def _send_message(self, text, chat_id):
+        print(f"Sending message: {text}")
+        url = f"https://api.telegram.org/bot{self.token}/sendMessage"
+        payload = {"chat_id": chat_id, "text": text}
+        response = requests.post(url, data=payload)
+        return {"status_code": response.status_code, "message": "Message sent"}
+
+    def _send_image(self, image_url, chat_id):
+        print(f"Sending image: {image_url}")
+        url = f"https://api.telegram.org/bot{self.token}/sendPhoto"
+        payload = {"chat_id": chat_id, "photo": image_url}
+        response = requests.post(url, data=payload)
+        return {"status_code": response.status_code, "message": "Image sent"}
+
+    def get_actions_metadata(self):
+        return [
+            {
+                "name": "telegram_send_message",
+                "description": "Send a notification to Telegram chat",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "text": {
+                            "type": "string",
+                            "description": "Text to send in the notification",
+                        },
+                        "chat_id": {
+                            "type": "string",
+                            "description": "Chat ID to send the notification to",
+                        },
+                    },
+                    "required": ["text"],
+                    "additionalProperties": False,
+                },
+            },
+            {
+                "name": "telegram_send_image",
+                "description": "Send an image to the Telegram chat",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "image_url": {
+                            "type": "string",
+                            "description": "URL of the image to send",
+                        },
+                        "chat_id": {
+                            "type": "string",
+                            "description": "Chat ID to send the image to",
+                        },
+                    },
+                    "required": ["image_url"],
+                    "additionalProperties": False,
+                },
+            },
+        ]
+
+    def get_config_requirements(self):
+        return {
+            "token": {"type": "string", "description": "Bot token for authentication"},
+        }
--- a/application/agents/tools/tool_action_parser.py
+++ b/application/agents/tools/tool_action_parser.py
@@ -0,0 +1,37 @@
+import json
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class ToolActionParser:
+    def __init__(self, llm_type):
+        self.llm_type = llm_type
+        self.parsers = {
+            "OpenAILLM": self._parse_openai_llm,
+            "GoogleLLM": self._parse_google_llm,
+        }
+
+    def parse_args(self, call):
+        parser = self.parsers.get(self.llm_type, self._parse_openai_llm)
+        return parser(call)
+
+    def _parse_openai_llm(self, call):
+        try:
+            call_args = json.loads(call.arguments)
+            tool_id = call.name.split("_")[-1]
+            action_name = call.name.rsplit("_", 1)[0]
+        except (AttributeError, TypeError) as e:
+            logger.error(f"Error parsing OpenAI LLM call: {e}")
+            return None, None, None
+        return tool_id, action_name, call_args
+
+    def _parse_google_llm(self, call):
+        try:
+            call_args = call.arguments
+            tool_id = call.name.split("_")[-1]
+            action_name = call.name.rsplit("_", 1)[0]
+        except (AttributeError, TypeError) as e:
+            logger.error(f"Error parsing Google LLM call: {e}")
+            return None, None, None
+        return tool_id, action_name, call_args
--- a/application/agents/tools/tool_manager.py
+++ b/application/agents/tools/tool_manager.py
@@ -0,0 +1,42 @@
+import importlib
+import inspect
+import os
+import pkgutil
+
+from application.agents.tools.base import Tool
+
+
+class ToolManager:
+    def __init__(self, config):
+        self.config = config
+        self.tools = {}
+        self.load_tools()
+
+    def load_tools(self):
+        tools_dir = os.path.join(os.path.dirname(__file__))
+        for finder, name, ispkg in pkgutil.iter_modules([tools_dir]):
+            if name == "base" or name.startswith("__"):
+                continue
+            module = importlib.import_module(f"application.agents.tools.{name}")
+            for member_name, obj in inspect.getmembers(module, inspect.isclass):
+                if issubclass(obj, Tool) and obj is not Tool:
+                    tool_config = self.config.get(name, {})
+                    self.tools[name] = obj(tool_config)
+
+    def load_tool(self, tool_name, tool_config):
+        self.config[tool_name] = tool_config
+        module = importlib.import_module(f"application.agents.tools.{tool_name}")
+        for member_name, obj in inspect.getmembers(module, inspect.isclass):
+            if issubclass(obj, Tool) and obj is not Tool:
+                return obj(tool_config)
+
+    def execute_action(self, tool_name, action_name, **kwargs):
+        if tool_name not in self.tools:
+            raise ValueError(f"Tool '{tool_name}' not loaded")
+        return self.tools[tool_name].execute_action(action_name, **kwargs)
+
+    def get_all_actions_metadata(self):
+        metadata = []
+        for tool in self.tools.values():
+            metadata.extend(tool.get_actions_metadata())
+        return metadata
--- a/application/api/init.py
+++ b/application/api/init.py
--- a/application/api/answer/init.py
+++ b/application/api/answer/init.py
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -0,0 +1,914 @@
+import asyncio
+import datetime
+import json
+import logging
+import os
+import traceback
+
+from bson.dbref import DBRef
+from bson.objectid import ObjectId
+from flask import Blueprint, make_response, request, Response
+from flask_restx import fields, Namespace, Resource
+
+from application.agents.agent_creator import AgentCreator
+
+from application.core.mongo_db import MongoDB
+from application.core.settings import settings
+from application.error import bad_request
+from application.extensions import api
+from application.llm.llm_creator import LLMCreator
+from application.retriever.retriever_creator import RetrieverCreator
+from application.utils import check_required_fields, limit_chat_history
+
+logger = logging.getLogger(__name__)
+
+mongo = MongoDB.get_client()
+db = mongo[settings.MONGO_DB_NAME]
+conversations_collection = db["conversations"]
+sources_collection = db["sources"]
+prompts_collection = db["prompts"]
+agents_collection = db["agents"]
+user_logs_collection = db["user_logs"]
+attachments_collection = db["attachments"]
+
+answer = Blueprint("answer", __name__)
+answer_ns = Namespace("answer", description="Answer related operations", path="/")
+api.add_namespace(answer_ns)
+
+gpt_model = ""
+# to have some kind of default behaviour
+if settings.LLM_PROVIDER == "openai":
+    gpt_model = "gpt-4o-mini"
+elif settings.LLM_PROVIDER == "anthropic":
+    gpt_model = "claude-2"
+elif settings.LLM_PROVIDER == "groq":
+    gpt_model = "llama3-8b-8192"
+elif settings.LLM_PROVIDER == "novita":
+    gpt_model = "deepseek/deepseek-r1"
+
+if settings.LLM_NAME:  # in case there is particular model name configured
+    gpt_model = settings.LLM_NAME
+
+# load the prompts
+current_dir = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+)
+with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
+    chat_combine_template = f.read()
+
+with open(os.path.join(current_dir, "prompts", "chat_reduce_prompt.txt"), "r") as f:
+    chat_reduce_template = f.read()
+
+with open(os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r") as f:
+    chat_combine_creative = f.read()
+
+with open(os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r") as f:
+    chat_combine_strict = f.read()
+
+api_key_set = settings.API_KEY is not None
+embeddings_key_set = settings.EMBEDDINGS_KEY is not None
+
+
+async def async_generate(chain, question, chat_history):
+    result = await chain.arun({"question": question, "chat_history": chat_history})
+    return result
+
+
+def run_async_chain(chain, question, chat_history):
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    result = {}
+    try:
+        answer = loop.run_until_complete(async_generate(chain, question, chat_history))
+    finally:
+        loop.close()
+    result["answer"] = answer
+    return result
+
+
+def get_agent_key(agent_id, user_id):
+    if not agent_id:
+        return None, False, None
+
+    try:
+        agent = agents_collection.find_one({"_id": ObjectId(agent_id)})
+        if agent is None:
+            raise Exception("Agent not found", 404)
+
+        is_owner = agent.get("user") == user_id
+
+        if is_owner:
+            agents_collection.update_one(
+                {"_id": ObjectId(agent_id)},
+                {"$set": {"lastUsedAt": datetime.datetime.now(datetime.timezone.utc)}},
+            )
+            return str(agent["key"]), False, None
+
+        is_shared_with_user = agent.get(
+            "shared_publicly", False
+        ) or user_id in agent.get("shared_with", [])
+
+        if is_shared_with_user:
+            return str(agent["key"]), True, agent.get("shared_token")
+
+        raise Exception("Unauthorized access to the agent", 403)
+
+    except Exception as e:
+        logger.error(f"Error in get_agent_key: {str(e)}", exc_info=True)
+        raise
+
+
+def get_data_from_api_key(api_key):
+    data = agents_collection.find_one({"key": api_key})
+    if not data:
+        raise Exception("Invalid API Key, please generate a new key", 401)
+
+    source = data.get("source")
+    if isinstance(source, DBRef):
+        source_doc = db.dereference(source)
+        data["source"] = str(source_doc["_id"])
+        data["retriever"] = source_doc.get("retriever", data.get("retriever"))
+    else:
+        data["source"] = {}
+
+    return data
+
+
+def get_retriever(source_id: str):
+    doc = sources_collection.find_one({"_id": ObjectId(source_id)})
+    if doc is None:
+        raise Exception("Source document does not exist", 404)
+    retriever_name = None if "retriever" not in doc else doc["retriever"]
+    return retriever_name
+
+
+def is_azure_configured():
+    return (
+        settings.OPENAI_API_BASE
+        and settings.OPENAI_API_VERSION
+        and settings.AZURE_DEPLOYMENT_NAME
+    )
+
+
+def save_conversation(
+    conversation_id,
+    question,
+    response,
+    thought,
+    source_log_docs,
+    tool_calls,
+    llm,
+    decoded_token,
+    index=None,
+    api_key=None,
+    agent_id=None,
+    is_shared_usage=False,
+    shared_token=None,
+    attachment_ids=None,
+):
+    current_time = datetime.datetime.now(datetime.timezone.utc)
+    if conversation_id is not None and index is not None:
+        conversations_collection.update_one(
+            {"_id": ObjectId(conversation_id), f"queries.{index}": {"$exists": True}},
+            {
+                "$set": {
+                    f"queries.{index}.prompt": question,
+                    f"queries.{index}.response": response,
+                    f"queries.{index}.thought": thought,
+                    f"queries.{index}.sources": source_log_docs,
+                    f"queries.{index}.tool_calls": tool_calls,
+                    f"queries.{index}.timestamp": current_time,
+                    f"queries.{index}.attachments": attachment_ids,
+                }
+            },
+        )
+        ##remove following queries from the array
+        conversations_collection.update_one(
+            {"_id": ObjectId(conversation_id), f"queries.{index}": {"$exists": True}},
+            {"$push": {"queries": {"$each": [], "$slice": index + 1}}},
+        )
+    elif conversation_id is not None and conversation_id != "None":
+        conversations_collection.update_one(
+            {"_id": ObjectId(conversation_id)},
+            {
+                "$push": {
+                    "queries": {
+                        "prompt": question,
+                        "response": response,
+                        "thought": thought,
+                        "sources": source_log_docs,
+                        "tool_calls": tool_calls,
+                        "timestamp": current_time,
+                        "attachments": attachment_ids,
+                    }
+                }
+            },
+        )
+
+    else:
+        # create new conversation
+        # generate summary
+        messages_summary = [
+            {
+                "role": "assistant",
+                "content": "Summarise following conversation in no more than 3 "
+                "words, respond ONLY with the summary, use the same "
+                "language as the system",
+            },
+            {
+                "role": "user",
+                "content": "Summarise following conversation in no more than 3 words, "
+                "respond ONLY with the summary, use the same language as the "
+                "system \n\nUser: " + question + "\n\n" + "AI: " + response,
+            },
+        ]
+
+        completion = llm.gen(model=gpt_model, messages=messages_summary, max_tokens=30)
+        conversation_data = {
+            "user": decoded_token.get("sub"),
+            "date": datetime.datetime.utcnow(),
+            "name": completion,
+            "queries": [
+                {
+                    "prompt": question,
+                    "response": response,
+                    "thought": thought,
+                    "sources": source_log_docs,
+                    "tool_calls": tool_calls,
+                    "timestamp": current_time,
+                    "attachments": attachment_ids,
+                }
+            ],
+        }
+        if api_key:
+            if agent_id:
+                conversation_data["agent_id"] = agent_id
+                if is_shared_usage:
+                    conversation_data["is_shared_usage"] = is_shared_usage
+                    conversation_data["shared_token"] = shared_token
+            api_key_doc = agents_collection.find_one({"key": api_key})
+            if api_key_doc:
+                conversation_data["api_key"] = api_key_doc["key"]
+        conversation_id = conversations_collection.insert_one(
+            conversation_data
+        ).inserted_id
+    return conversation_id
+
+
+def get_prompt(prompt_id):
+    if prompt_id == "default":
+        prompt = chat_combine_template
+    elif prompt_id == "creative":
+        prompt = chat_combine_creative
+    elif prompt_id == "strict":
+        prompt = chat_combine_strict
+    else:
+        prompt = prompts_collection.find_one({"_id": ObjectId(prompt_id)})["content"]
+    return prompt
+
+
+def complete_stream(
+    question,
+    agent,
+    retriever,
+    conversation_id,
+    user_api_key,
+    decoded_token,
+    isNoneDoc=False,
+    index=None,
+    should_save_conversation=True,
+    attachment_ids=None,
+    agent_id=None,
+    is_shared_usage=False,
+    shared_token=None,
+):
+    try:
+        response_full, thought, source_log_docs, tool_calls = "", "", [], []
+
+        answer = agent.gen(query=question, retriever=retriever)
+
+        for line in answer:
+            if "answer" in line:
+                response_full += str(line["answer"])
+                data = json.dumps({"type": "answer", "answer": line["answer"]})
+                yield f"data: {data}\n\n"
+            elif "sources" in line:
+                truncated_sources = []
+                source_log_docs = line["sources"]
+                for source in line["sources"]:
+                    truncated_source = source.copy()
+                    if "text" in truncated_source:
+                        truncated_source["text"] = (
+                            truncated_source["text"][:100].strip() + "..."
+                        )
+                    truncated_sources.append(truncated_source)
+                if len(truncated_sources) > 0:
+                    data = json.dumps({"type": "source", "source": truncated_sources})
+                    yield f"data: {data}\n\n"
+            elif "tool_calls" in line:
+                tool_calls = line["tool_calls"]
+            elif "thought" in line:
+                thought += line["thought"]
+                data = json.dumps({"type": "thought", "thought": line["thought"]})
+                yield f"data: {data}\n\n"
+            elif "type" in line:
+                data = json.dumps(line)
+                yield f"data: {data}\n\n"
+
+        if isNoneDoc:
+            for doc in source_log_docs:
+                doc["source"] = "None"
+
+        llm = LLMCreator.create_llm(
+            settings.LLM_PROVIDER,
+            api_key=settings.API_KEY,
+            user_api_key=user_api_key,
+            decoded_token=decoded_token,
+        )
+
+        if should_save_conversation:
+            conversation_id = save_conversation(
+                conversation_id,
+                question,
+                response_full,
+                thought,
+                source_log_docs,
+                tool_calls,
+                llm,
+                decoded_token,
+                index,
+                api_key=user_api_key,
+                attachment_ids=attachment_ids,
+                agent_id=agent_id,
+                is_shared_usage=is_shared_usage,
+                shared_token=shared_token,
+            )
+        else:
+            conversation_id = None
+
+        # send data.type = "end" to indicate that the stream has ended as json
+        data = json.dumps({"type": "id", "id": str(conversation_id)})
+        yield f"data: {data}\n\n"
+
+        retriever_params = retriever.get_params()
+        user_logs_collection.insert_one(
+            {
+                "action": "stream_answer",
+                "level": "info",
+                "user": decoded_token.get("sub"),
+                "api_key": user_api_key,
+                "question": question,
+                "response": response_full,
+                "sources": source_log_docs,
+                "retriever_params": retriever_params,
+                "attachments": attachment_ids,
+                "timestamp": datetime.datetime.now(datetime.timezone.utc),
+            }
+        )
+        data = json.dumps({"type": "end"})
+        yield f"data: {data}\n\n"
+    except Exception as e:
+        logger.error(f"Error in stream: {str(e)}", exc_info=True)
+        data = json.dumps(
+            {
+                "type": "error",
+                "error": "Please try again later. We apologize for any inconvenience.",
+            }
+        )
+        yield f"data: {data}\n\n"
+        return
+
+
+@answer_ns.route("/stream")
+class Stream(Resource):
+    stream_model = api.model(
+        "StreamModel",
+        {
+            "question": fields.String(
+                required=True, description="Question to be asked"
+            ),
+            "history": fields.List(
+                fields.String, required=False, description="Chat history"
+            ),
+            "conversation_id": fields.String(
+                required=False, description="Conversation ID"
+            ),
+            "prompt_id": fields.String(
+                required=False, default="default", description="Prompt ID"
+            ),
+            "chunks": fields.Integer(
+                required=False, default=2, description="Number of chunks"
+            ),
+            "token_limit": fields.Integer(required=False, description="Token limit"),
+            "retriever": fields.String(required=False, description="Retriever type"),
+            "api_key": fields.String(required=False, description="API key"),
+            "active_docs": fields.String(
+                required=False, description="Active documents"
+            ),
+            "isNoneDoc": fields.Boolean(
+                required=False, description="Flag indicating if no document is used"
+            ),
+            "index": fields.Integer(
+                required=False, description="Index of the query to update"
+            ),
+            "save_conversation": fields.Boolean(
+                required=False,
+                default=True,
+                description="Whether to save the conversation",
+            ),
+            "attachments": fields.List(
+                fields.String, required=False, description="List of attachment IDs"
+            ),
+        },
+    )
+
+    @api.expect(stream_model)
+    @api.doc(description="Stream a response based on the question and retriever")
+    def post(self):
+        data = request.get_json()
+        required_fields = ["question"]
+        if "index" in data:
+            required_fields = ["question", "conversation_id"]
+        missing_fields = check_required_fields(data, required_fields)
+        if missing_fields:
+            return missing_fields
+
+        save_conv = data.get("save_conversation", True)
+
+        try:
+            question = data["question"]
+            history = limit_chat_history(
+                json.loads(data.get("history", "[]")), gpt_model=gpt_model
+            )
+            conversation_id = data.get("conversation_id")
+            prompt_id = data.get("prompt_id", "default")
+            attachment_ids = data.get("attachments", [])
+
+            index = data.get("index", None)
+            chunks = int(data.get("chunks", 2))
+            token_limit = data.get("token_limit", settings.DEFAULT_MAX_HISTORY)
+            retriever_name = data.get("retriever", "classic")
+            agent_id = data.get("agent_id", None)
+            agent_type = settings.AGENT_NAME
+            decoded_token = getattr(request, "decoded_token", None)
+            user_sub = decoded_token.get("sub") if decoded_token else None
+            agent_key, is_shared_usage, shared_token = get_agent_key(agent_id, user_sub)
+
+            if agent_key:
+                data.update({"api_key": agent_key})
+            else:
+                agent_id = None
+
+            if "api_key" in data:
+                data_key = get_data_from_api_key(data["api_key"])
+                chunks = int(data_key.get("chunks", 2))
+                prompt_id = data_key.get("prompt_id", "default")
+                source = {"active_docs": data_key.get("source")}
+                retriever_name = data_key.get("retriever", retriever_name)
+                user_api_key = data["api_key"]
+                agent_type = data_key.get("agent_type", agent_type)
+                if is_shared_usage:
+                    decoded_token = request.decoded_token
+                else:
+                    decoded_token = {"sub": data_key.get("user")}
+                    is_shared_usage = False
+
+            elif "active_docs" in data:
+                source = {"active_docs": data["active_docs"]}
+                retriever_name = get_retriever(data["active_docs"]) or retriever_name
+                user_api_key = None
+                decoded_token = request.decoded_token
+
+            else:
+                source = {}
+                user_api_key = None
+                decoded_token = request.decoded_token
+
+            if not decoded_token:
+                return make_response({"error": "Unauthorized"}, 401)
+
+            attachments = get_attachments_content(
+                attachment_ids, decoded_token.get("sub")
+            )
+
+            logger.info(
+                f"/stream - request_data: {data}, source: {source}, attachments: {len(attachments)}",
+                extra={"data": json.dumps({"request_data": data, "source": source})},
+            )
+
+            prompt = get_prompt(prompt_id)
+            if "isNoneDoc" in data and data["isNoneDoc"] is True:
+                chunks = 0
+
+            agent = AgentCreator.create_agent(
+                agent_type,
+                endpoint="stream",
+                llm_name=settings.LLM_PROVIDER,
+                gpt_model=gpt_model,
+                api_key=settings.API_KEY,
+                user_api_key=user_api_key,
+                prompt=prompt,
+                chat_history=history,
+                decoded_token=decoded_token,
+                attachments=attachments,
+            )
+
+            retriever = RetrieverCreator.create_retriever(
+                retriever_name,
+                source=source,
+                chat_history=history,
+                prompt=prompt,
+                chunks=chunks,
+                token_limit=token_limit,
+                gpt_model=gpt_model,
+                user_api_key=user_api_key,
+                decoded_token=decoded_token,
+            )
+
+            return Response(
+                complete_stream(
+                    question=question,
+                    agent=agent,
+                    retriever=retriever,
+                    conversation_id=conversation_id,
+                    user_api_key=user_api_key,
+                    decoded_token=decoded_token,
+                    isNoneDoc=data.get("isNoneDoc"),
+                    index=index,
+                    should_save_conversation=save_conv,
+                    attachment_ids=attachment_ids,
+                    agent_id=agent_id,
+                    is_shared_usage=is_shared_usage,
+                    shared_token=shared_token,
+                ),
+                mimetype="text/event-stream",
+            )
+
+        except ValueError:
+            message = "Malformed request body"
+            logger.error(f"/stream - error: {message}")
+            return Response(
+                error_stream_generate(message),
+                status=400,
+                mimetype="text/event-stream",
+            )
+        except Exception as e:
+            logger.error(
+                f"/stream - error: {str(e)} - traceback: {traceback.format_exc()}",
+                extra={"error": str(e), "traceback": traceback.format_exc()},
+            )
+            status_code = 400
+            return Response(
+                error_stream_generate("Unknown error occurred"),
+                status=status_code,
+                mimetype="text/event-stream",
+            )
+
+
+def error_stream_generate(err_response):
+    data = json.dumps({"type": "error", "error": err_response})
+    yield f"data: {data}\n\n"
+
+
+@answer_ns.route("/api/answer")
+class Answer(Resource):
+    answer_model = api.model(
+        "AnswerModel",
+        {
+            "question": fields.String(
+                required=True, description="The question to answer"
+            ),
+            "history": fields.List(
+                fields.String, required=False, description="Conversation history"
+            ),
+            "conversation_id": fields.String(
+                required=False, description="Conversation ID"
+            ),
+            "prompt_id": fields.String(
+                required=False, default="default", description="Prompt ID"
+            ),
+            "chunks": fields.Integer(
+                required=False, default=2, description="Number of chunks"
+            ),
+            "token_limit": fields.Integer(required=False, description="Token limit"),
+            "retriever": fields.String(required=False, description="Retriever type"),
+            "api_key": fields.String(required=False, description="API key"),
+            "active_docs": fields.String(
+                required=False, description="Active documents"
+            ),
+            "isNoneDoc": fields.Boolean(
+                required=False, description="Flag indicating if no document is used"
+            ),
+        },
+    )
+
+    @api.expect(answer_model)
+    @api.doc(description="Provide an answer based on the question and retriever")
+    def post(self):
+        data = request.get_json()
+        required_fields = ["question"]
+        missing_fields = check_required_fields(data, required_fields)
+        if missing_fields:
+            return missing_fields
+
+        try:
+            question = data["question"]
+            history = limit_chat_history(
+                json.loads(data.get("history", [])), gpt_model=gpt_model
+            )
+            conversation_id = data.get("conversation_id")
+            prompt_id = data.get("prompt_id", "default")
+            chunks = int(data.get("chunks", 2))
+            token_limit = data.get("token_limit", settings.DEFAULT_MAX_HISTORY)
+            retriever_name = data.get("retriever", "classic")
+            agent_type = settings.AGENT_NAME
+
+            if "api_key" in data:
+                data_key = get_data_from_api_key(data["api_key"])
+                chunks = int(data_key.get("chunks", 2))
+                prompt_id = data_key.get("prompt_id", "default")
+                source = {"active_docs": data_key.get("source")}
+                retriever_name = data_key.get("retriever", retriever_name)
+                user_api_key = data["api_key"]
+                agent_type = data_key.get("agent_type", agent_type)
+                decoded_token = {"sub": data_key.get("user")}
+
+            elif "active_docs" in data:
+                source = {"active_docs": data["active_docs"]}
+                retriever_name = get_retriever(data["active_docs"]) or retriever_name
+                user_api_key = None
+                decoded_token = request.decoded_token
+
+            else:
+                source = {}
+                user_api_key = None
+                decoded_token = request.decoded_token
+
+            if not decoded_token:
+                return make_response({"error": "Unauthorized"}, 401)
+
+            prompt = get_prompt(prompt_id)
+
+            logger.info(
+                f"/api/answer - request_data: {data}, source: {source}",
+                extra={"data": json.dumps({"request_data": data, "source": source})},
+            )
+
+            agent = AgentCreator.create_agent(
+                agent_type,
+                endpoint="api/answer",
+                llm_name=settings.LLM_PROVIDER,
+                gpt_model=gpt_model,
+                api_key=settings.API_KEY,
+                user_api_key=user_api_key,
+                prompt=prompt,
+                chat_history=history,
+                decoded_token=decoded_token,
+            )
+
+            retriever = RetrieverCreator.create_retriever(
+                retriever_name,
+                source=source,
+                chat_history=history,
+                prompt=prompt,
+                chunks=chunks,
+                token_limit=token_limit,
+                gpt_model=gpt_model,
+                user_api_key=user_api_key,
+                decoded_token=decoded_token,
+            )
+
+            response_full = ""
+            source_log_docs = []
+            tool_calls = []
+            stream_ended = False
+            thought = ""
+
+            for line in complete_stream(
+                question=question,
+                agent=agent,
+                retriever=retriever,
+                conversation_id=conversation_id,
+                user_api_key=user_api_key,
+                decoded_token=decoded_token,
+                isNoneDoc=data.get("isNoneDoc"),
+                index=None,
+                should_save_conversation=False,
+            ):
+                try:
+                    event_data = line.replace("data: ", "").strip()
+                    event = json.loads(event_data)
+
+                    if event["type"] == "answer":
+                        response_full += event["answer"]
+                    elif event["type"] == "source":
+                        source_log_docs = event["source"]
+                    elif event["type"] == "tool_calls":
+                        tool_calls = event["tool_calls"]
+                    elif event["type"] == "thought":
+                        thought = event["thought"]
+                    elif event["type"] == "error":
+                        logger.error(f"Error from stream: {event['error']}")
+                        return bad_request(500, event["error"])
+                    elif event["type"] == "end":
+                        stream_ended = True
+
+                except (json.JSONDecodeError, KeyError) as e:
+                    logger.warning(f"Error parsing stream event: {e}, line: {line}")
+                    continue
+
+            if not stream_ended:
+                logger.error("Stream ended unexpectedly without an 'end' event.")
+                return bad_request(500, "Stream ended unexpectedly.")
+
+            if data.get("isNoneDoc"):
+                for doc in source_log_docs:
+                    doc["source"] = "None"
+
+            llm = LLMCreator.create_llm(
+                settings.LLM_PROVIDER,
+                api_key=settings.API_KEY,
+                user_api_key=user_api_key,
+                decoded_token=decoded_token,
+            )
+
+            result = {"answer": response_full, "sources": source_log_docs}
+            result["conversation_id"] = str(
+                save_conversation(
+                    conversation_id,
+                    question,
+                    response_full,
+                    thought,
+                    source_log_docs,
+                    tool_calls,
+                    llm,
+                    decoded_token,
+                    api_key=user_api_key,
+                )
+            )
+
+            retriever_params = retriever.get_params()
+            user_logs_collection.insert_one(
+                {
+                    "action": "api_answer",
+                    "level": "info",
+                    "user": decoded_token.get("sub"),
+                    "api_key": user_api_key,
+                    "question": question,
+                    "response": response_full,
+                    "sources": source_log_docs,
+                    "retriever_params": retriever_params,
+                    "timestamp": datetime.datetime.now(datetime.timezone.utc),
+                }
+            )
+
+        except Exception as e:
+            logger.error(
+                f"/api/answer - error: {str(e)} - traceback: {traceback.format_exc()}",
+                extra={"error": str(e), "traceback": traceback.format_exc()},
+            )
+            return bad_request(500, str(e))
+
+        return make_response(result, 200)
+
+
+@answer_ns.route("/api/search")
+class Search(Resource):
+    search_model = api.model(
+        "SearchModel",
+        {
+            "question": fields.String(
+                required=True, description="The question to search"
+            ),
+            "chunks": fields.Integer(
+                required=False, default=2, description="Number of chunks"
+            ),
+            "api_key": fields.String(
+                required=False, description="API key for authentication"
+            ),
+            "active_docs": fields.String(
+                required=False, description="Active documents for retrieval"
+            ),
+            "retriever": fields.String(required=False, description="Retriever type"),
+            "token_limit": fields.Integer(
+                required=False, description="Limit for tokens"
+            ),
+            "isNoneDoc": fields.Boolean(
+                required=False, description="Flag indicating if no document is used"
+            ),
+        },
+    )
+
+    @api.expect(search_model)
+    @api.doc(
+        description="Search for relevant documents based on the question and retriever"
+    )
+    def post(self):
+        data = request.get_json()
+        required_fields = ["question"]
+        missing_fields = check_required_fields(data, required_fields)
+        if missing_fields:
+            return missing_fields
+
+        try:
+            question = data["question"]
+            chunks = int(data.get("chunks", 2))
+            token_limit = data.get("token_limit", settings.DEFAULT_MAX_HISTORY)
+            retriever_name = data.get("retriever", "classic")
+
+            if "api_key" in data:
+                data_key = get_data_from_api_key(data["api_key"])
+                chunks = int(data_key.get("chunks", 2))
+                source = {"active_docs": data_key.get("source")}
+                user_api_key = data["api_key"]
+                decoded_token = {"sub": data_key.get("user")}
+
+            elif "active_docs" in data:
+                source = {"active_docs": data["active_docs"]}
+                user_api_key = None
+                decoded_token = request.decoded_token
+
+            else:
+                source = {}
+                user_api_key = None
+                decoded_token = request.decoded_token
+
+            if not decoded_token:
+                return make_response({"error": "Unauthorized"}, 401)
+
+            logger.info(
+                f"/api/answer - request_data: {data}, source: {source}",
+                extra={"data": json.dumps({"request_data": data, "source": source})},
+            )
+
+            retriever = RetrieverCreator.create_retriever(
+                retriever_name,
+                source=source,
+                chat_history=[],
+                prompt="default",
+                chunks=chunks,
+                token_limit=token_limit,
+                gpt_model=gpt_model,
+                user_api_key=user_api_key,
+                decoded_token=decoded_token,
+            )
+
+            docs = retriever.search(question)
+            retriever_params = retriever.get_params()
+
+            user_logs_collection.insert_one(
+                {
+                    "action": "api_search",
+                    "level": "info",
+                    "user": decoded_token.get("sub"),
+                    "api_key": user_api_key,
+                    "question": question,
+                    "sources": docs,
+                    "retriever_params": retriever_params,
+                    "timestamp": datetime.datetime.now(datetime.timezone.utc),
+                }
+            )
+
+            if data.get("isNoneDoc"):
+                for doc in docs:
+                    doc["source"] = "None"
+
+        except Exception as e:
+            logger.error(
+                f"/api/search - error: {str(e)} - traceback: {traceback.format_exc()}",
+                extra={"error": str(e), "traceback": traceback.format_exc()},
+            )
+            return bad_request(500, str(e))
+
+        return make_response(docs, 200)
+
+
+def get_attachments_content(attachment_ids, user):
+    """
+    Retrieve content from attachment documents based on their IDs.
+
+    Args:
+        attachment_ids (list): List of attachment document IDs
+        user (str): User identifier to verify ownership
+
+    Returns:
+        list: List of dictionaries containing attachment content and metadata
+    """
+    if not attachment_ids:
+        return []
+
+    attachments = []
+    for attachment_id in attachment_ids:
+        try:
+            attachment_doc = attachments_collection.find_one(
+                {"_id": ObjectId(attachment_id), "user": user}
+            )
+
+            if attachment_doc:
+                attachments.append(attachment_doc)
+        except Exception as e:
+            logger.error(
+                f"Error retrieving attachment {attachment_id}: {e}", exc_info=True
+            )
+
+    return attachments
--- a/application/api/internal/init.py
+++ b/application/api/internal/init.py
--- a/application/api/internal/routes.py
+++ b/application/api/internal/routes.py
@@ -0,0 +1,111 @@
+import os
+import datetime
+from flask import Blueprint, request, send_from_directory
+from werkzeug.utils import secure_filename
+from bson.objectid import ObjectId
+import logging
+from application.core.mongo_db import MongoDB
+from application.core.settings import settings
+from application.storage.storage_creator import StorageCreator
+
+
+logger = logging.getLogger(__name__)
+mongo = MongoDB.get_client()
+db = mongo[settings.MONGO_DB_NAME]
+conversations_collection = db["conversations"]
+sources_collection = db["sources"]
+
+current_dir = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+)
+
+
+internal = Blueprint("internal", __name__)
+
+
+@internal.route("/api/download", methods=["get"])
+def download_file():
+    user = secure_filename(request.args.get("user"))
+    job_name = secure_filename(request.args.get("name"))
+    filename = secure_filename(request.args.get("file"))
+    save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
+    return send_from_directory(save_dir, filename, as_attachment=True)
+
+
+@internal.route("/api/upload_index", methods=["POST"])
+def upload_index_files():
+    """Upload two files(index.faiss, index.pkl) to the user's folder."""
+    if "user" not in request.form:
+        return {"status": "no user"}
+    user = request.form["user"] 
+    if "name" not in request.form:
+        return {"status": "no name"}
+    job_name = request.form["name"]
+    tokens = request.form["tokens"]
+    retriever = request.form["retriever"]
+    id = request.form["id"]
+    type = request.form["type"]
+    remote_data = request.form["remote_data"] if "remote_data" in request.form else None
+    sync_frequency = request.form["sync_frequency"] if "sync_frequency" in request.form else None
+    
+    original_file_path = request.form.get("original_file_path")
+
+    storage = StorageCreator.get_storage()
+    index_base_path = f"indexes/{id}"
+    
+    if settings.VECTOR_STORE == "faiss":
+        if "file_faiss" not in request.files:
+            logger.error("No file_faiss part")
+            return {"status": "no file"}
+        file_faiss = request.files["file_faiss"]
+        if file_faiss.filename == "":
+            return {"status": "no file name"}
+        if "file_pkl" not in request.files:
+            logger.error("No file_pkl part")
+            return {"status": "no file"}
+        file_pkl = request.files["file_pkl"]
+        if file_pkl.filename == "":
+            return {"status": "no file name"}
+        
+        # Save index files to storage
+        storage.save_file(file_faiss, f"{index_base_path}/index.faiss")
+        storage.save_file(file_pkl, f"{index_base_path}/index.pkl")
+
+    existing_entry = sources_collection.find_one({"_id": ObjectId(id)})
+    if existing_entry:
+        sources_collection.update_one(
+            {"_id": ObjectId(id)},
+            {
+                "$set": {
+                    "user": user,
+                    "name": job_name,
+                    "language": job_name,
+                    "date": datetime.datetime.now(),
+                    "model": settings.EMBEDDINGS_NAME,
+                    "type": type,
+                    "tokens": tokens,
+                    "retriever": retriever,
+                    "remote_data": remote_data,
+                    "sync_frequency": sync_frequency,
+                    "file_path": original_file_path,
+                }
+            },
+        )
+    else:
+        sources_collection.insert_one(
+            {
+                "_id": ObjectId(id),
+                "user": user,
+                "name": job_name,
+                "language": job_name,
+                "date": datetime.datetime.now(),
+                "model": settings.EMBEDDINGS_NAME,
+                "type": type,
+                "tokens": tokens,
+                "retriever": retriever,
+                "remote_data": remote_data,
+                "sync_frequency": sync_frequency,
+                "file_path": original_file_path,
+            }
+        )
+    return {"status": "ok"}
--- a/application/api/user/init.py
+++ b/application/api/user/init.py
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
--- a/application/api/user/tasks.py
+++ b/application/api/user/tasks.py
@@ -0,0 +1,56 @@
+from datetime import timedelta
+
+from application.celery_init import celery
+from application.worker import (
+    agent_webhook_worker,
+    attachment_worker,
+    ingest_worker,
+    remote_worker,
+    sync_worker,
+)
+
+
+@celery.task(bind=True)
+def ingest(self, directory, formats, job_name, filename, user, dir_name, user_dir):
+    resp = ingest_worker(self, directory, formats, job_name, filename, user, dir_name, user_dir)
+    return resp
+
+
+@celery.task(bind=True)
+def ingest_remote(self, source_data, job_name, user, loader):
+    resp = remote_worker(self, source_data, job_name, user, loader)
+    return resp
+
+
+@celery.task(bind=True)
+def schedule_syncs(self, frequency):
+    resp = sync_worker(self, frequency)
+    return resp
+
+
+@celery.task(bind=True)
+def store_attachment(self, file_info, user):
+    resp = attachment_worker(self, file_info, user)
+    return resp
+
+
+@celery.task(bind=True)
+def process_agent_webhook(self, agent_id, payload):
+    resp = agent_webhook_worker(self, agent_id, payload)
+    return resp
+
+
+@celery.on_after_configure.connect
+def setup_periodic_tasks(sender, **kwargs):
+    sender.add_periodic_task(
+        timedelta(days=1),
+        schedule_syncs.s("daily"),
+    )
+    sender.add_periodic_task(
+        timedelta(weeks=1),
+        schedule_syncs.s("weekly"),
+    )
+    sender.add_periodic_task(
+        timedelta(days=30),
+        schedule_syncs.s("monthly"),
+    )
--- a/application/app.py
+++ b/application/app.py
@@ -0,0 +1,116 @@
+import os
+import platform
+import uuid
+
+import dotenv
+from flask import Flask, jsonify, redirect, request
+from jose import jwt
+
+from application.auth import handle_auth
+
+from application.core.logging_config import setup_logging
+
+setup_logging()
+
+from application.api.answer.routes import answer  # noqa: E402
+from application.api.internal.routes import internal  # noqa: E402
+from application.api.user.routes import user  # noqa: E402
+from application.celery_init import celery  # noqa: E402
+from application.core.settings import settings  # noqa: E402
+from application.extensions import api  # noqa: E402
+
+
+if platform.system() == "Windows":
+    import pathlib
+
+    pathlib.PosixPath = pathlib.WindowsPath
+
+dotenv.load_dotenv()
+
+app = Flask(__name__)
+app.register_blueprint(user)
+app.register_blueprint(answer)
+app.register_blueprint(internal)
+app.config.update(
+    UPLOAD_FOLDER="inputs",
+    CELERY_BROKER_URL=settings.CELERY_BROKER_URL,
+    CELERY_RESULT_BACKEND=settings.CELERY_RESULT_BACKEND,
+    MONGO_URI=settings.MONGO_URI,
+)
+celery.config_from_object("application.celeryconfig")
+api.init_app(app)
+
+if settings.AUTH_TYPE in ("simple_jwt", "session_jwt") and not settings.JWT_SECRET_KEY:
+    key_file = ".jwt_secret_key"
+    try:
+        with open(key_file, "r") as f:
+            settings.JWT_SECRET_KEY = f.read().strip()
+    except FileNotFoundError:
+        new_key = os.urandom(32).hex()
+        with open(key_file, "w") as f:
+            f.write(new_key)
+        settings.JWT_SECRET_KEY = new_key
+    except Exception as e:
+        raise RuntimeError(f"Failed to setup JWT_SECRET_KEY: {e}")
+
+SIMPLE_JWT_TOKEN = None
+if settings.AUTH_TYPE == "simple_jwt":
+    payload = {"sub": "local"}
+    SIMPLE_JWT_TOKEN = jwt.encode(payload, settings.JWT_SECRET_KEY, algorithm="HS256")
+    print(f"Generated Simple JWT Token: {SIMPLE_JWT_TOKEN}")
+
+
+@app.route("/")
+def home():
+    if request.remote_addr in ("0.0.0.0", "127.0.0.1", "localhost", "172.18.0.1"):
+        return redirect("http://localhost:5173")
+    else:
+        return "Welcome to DocsGPT Backend!"
+
+
+@app.route("/api/config")
+def get_config():
+    response = {
+        "auth_type": settings.AUTH_TYPE,
+        "requires_auth": settings.AUTH_TYPE in ["simple_jwt", "session_jwt"],
+    }
+    return jsonify(response)
+
+
+@app.route("/api/generate_token")
+def generate_token():
+    if settings.AUTH_TYPE == "session_jwt":
+        new_user_id = str(uuid.uuid4())
+        token = jwt.encode(
+            {"sub": new_user_id}, settings.JWT_SECRET_KEY, algorithm="HS256"
+        )
+        return jsonify({"token": token})
+    return jsonify({"error": "Token generation not allowed in current auth mode"}), 400
+
+
+@app.before_request
+def authenticate_request():
+    if request.method == "OPTIONS":
+        return "", 200
+
+    decoded_token = handle_auth(request)
+    if not decoded_token:
+        request.decoded_token = None
+    elif "error" in decoded_token:
+        return jsonify(decoded_token), 401
+    else:
+        request.decoded_token = decoded_token
+
+
+@app.after_request
+def after_request(response):
+    response.headers.add("Access-Control-Allow-Origin", "*")
+    response.headers.add("Access-Control-Allow-Headers", "Content-Type, Authorization")
+    response.headers.add(
+        "Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS"
+    )
+    return response
+
+
+if __name__ == "__main__":
+    app.run(debug=settings.FLASK_DEBUG_MODE, port=7091)
--- a/application/auth.py
+++ b/application/auth.py
@@ -0,0 +1,28 @@
+from jose import jwt
+
+from application.core.settings import settings
+
+
+def handle_auth(request, data={}):
+    if settings.AUTH_TYPE in ["simple_jwt", "session_jwt"]:
+        jwt_token = request.headers.get("Authorization")
+        if not jwt_token:
+            return None
+
+        jwt_token = jwt_token.replace("Bearer ", "")
+
+        try:
+            decoded_token = jwt.decode(
+                jwt_token,
+                settings.JWT_SECRET_KEY,
+                algorithms=["HS256"],
+                options={"verify_exp": False},
+            )
+            return decoded_token
+        except Exception as e:
+            return {
+                "message": f"Authentication error: {str(e)}",
+                "error": "invalid_token",
+            }
+    else:
+        return {"sub": "local"}
--- a/application/cache.py
+++ b/application/cache.py
@@ -0,0 +1,117 @@
+import json
+import logging
+import time
+from threading import Lock
+
+import redis
+
+from application.core.settings import settings
+from application.utils import get_hash
+
+logger = logging.getLogger(__name__)
+
+_redis_instance = None
+_redis_creation_failed = False
+_instance_lock = Lock()
+
+def get_redis_instance():
+    global _redis_instance, _redis_creation_failed
+    if _redis_instance is None and not _redis_creation_failed:
+        with _instance_lock:
+            if _redis_instance is None and not _redis_creation_failed:
+                try:
+                    _redis_instance = redis.Redis.from_url(
+                        settings.CACHE_REDIS_URL, socket_connect_timeout=2
+                    )
+                except ValueError as e:
+                    logger.error(f"Invalid Redis URL: {e}")
+                    _redis_creation_failed = True  # Stop future attempts
+                    _redis_instance = None
+                except redis.ConnectionError as e:
+                    logger.error(f"Redis connection error: {e}")
+                    _redis_instance = None  # Keep trying for connection errors
+    return _redis_instance
+
+
+def gen_cache_key(messages, model="docgpt", tools=None):
+    if not all(isinstance(msg, dict) for msg in messages):
+        raise ValueError("All messages must be dictionaries.")
+    messages_str = json.dumps(messages)
+    tools_str = json.dumps(str(tools)) if tools else ""
+    combined = f"{model}_{messages_str}_{tools_str}"
+    cache_key = get_hash(combined)
+    return cache_key
+
+
+def gen_cache(func):
+    def wrapper(self, model, messages, stream, tools=None, *args, **kwargs):
+        if tools is not None:
+            return func(self, model, messages, stream, tools, *args, **kwargs)
+        
+        try:
+            cache_key = gen_cache_key(messages, model, tools)
+        except ValueError as e:
+            logger.error(f"Cache key generation failed: {e}")
+            return func(self, model, messages, stream, tools, *args, **kwargs)
+
+        redis_client = get_redis_instance()
+        if redis_client:
+            try:
+                cached_response = redis_client.get(cache_key)
+                if cached_response:
+                    return cached_response.decode("utf-8")
+            except Exception as e:
+                logger.error(f"Error getting cached response: {e}", exc_info=True)
+
+        result = func(self, model, messages, stream, tools, *args, **kwargs)
+        if redis_client and isinstance(result, str):
+            try:
+                redis_client.set(cache_key, result, ex=1800)
+            except Exception as e:
+                logger.error(f"Error setting cache: {e}", exc_info=True)
+
+        return result
+
+    return wrapper
+
+
+def stream_cache(func):
+    def wrapper(self, model, messages, stream, tools=None, *args, **kwargs):
+        if tools is not None:
+            yield from func(self, model, messages, stream, tools, *args, **kwargs)
+            return
+        
+        try:
+            cache_key = gen_cache_key(messages, model, tools)
+        except ValueError as e:
+            logger.error(f"Cache key generation failed: {e}")
+            yield from func(self, model, messages, stream, tools, *args, **kwargs)
+            return
+
+        redis_client = get_redis_instance()
+        if redis_client:
+            try:
+                cached_response = redis_client.get(cache_key)
+                if cached_response:
+                    logger.info(f"Cache hit for stream key: {cache_key}")
+                    cached_response = json.loads(cached_response.decode("utf-8"))
+                    for chunk in cached_response:
+                        yield chunk
+                        time.sleep(0.03)  # Simulate streaming delay
+                    return
+            except Exception as e:
+                logger.error(f"Error getting cached stream: {e}", exc_info=True)
+
+        stream_cache_data = []
+        for chunk in func(self, model, messages, stream, tools, *args, **kwargs):
+            yield chunk
+            stream_cache_data.append(str(chunk))
+
+        if redis_client:
+            try:
+                redis_client.set(cache_key, json.dumps(stream_cache_data), ex=1800)
+                logger.info(f"Stream cache saved for key: {cache_key}")
+            except Exception as e:
+                logger.error(f"Error setting stream cache: {e}", exc_info=True)
+
+    return wrapper
--- a/application/celery_init.py
+++ b/application/celery_init.py
@@ -0,0 +1,23 @@
+from celery import Celery
+from application.core.settings import settings
+from celery.signals import setup_logging
+
+
+def make_celery(app_name=__name__):
+    celery = Celery(
+        app_name,
+        broker=settings.CELERY_BROKER_URL,
+        backend=settings.CELERY_RESULT_BACKEND,
+    )
+    celery.conf.update(settings)
+    return celery
+
+
+@setup_logging.connect
+def config_loggers(*args, **kwargs):
+    from application.core.logging_config import setup_logging
+
+    setup_logging()
+
+
+celery = make_celery()
--- a/application/celeryconfig.py
+++ b/application/celeryconfig.py
@@ -0,0 +1,8 @@
+import os
+
+broker_url = os.getenv("CELERY_BROKER_URL")
+result_backend = os.getenv("CELERY_RESULT_BACKEND")
+
+task_serializer = 'json'
+result_serializer = 'json'
+accept_content = ['json']
--- a/application/core/init.py
+++ b/application/core/init.py
--- a/application/core/logging_config.py
+++ b/application/core/logging_config.py
@@ -0,0 +1,22 @@
+from logging.config import dictConfig
+
+def setup_logging():
+    dictConfig({
+        'version': 1,
+        'formatters': {
+            'default': {
+                'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
+            }
+        },
+        "handlers": {
+            "console": {
+                "class": "logging.StreamHandler",
+                "stream": "ext://sys.stdout",
+                "formatter": "default",
+            }
+        },
+        'root': {
+            'level': 'INFO',
+            'handlers': ['console'],
+        },
+    })
--- a/application/core/mongo_db.py
+++ b/application/core/mongo_db.py
@@ -0,0 +1,24 @@
+from application.core.settings import settings
+from pymongo import MongoClient
+
+
+class MongoDB:
+    _client = None
+
+    @classmethod
+    def get_client(cls):
+        """
+        Get the MongoDB client instance, creating it if necessary.
+        """
+        if cls._client is None:
+            cls._client = MongoClient(settings.MONGO_URI)
+        return cls._client
+
+    @classmethod
+    def close_client(cls):
+        """
+        Close the MongoDB client connection.
+        """
+        if cls._client is not None:
+            cls._client.close()
+            cls._client = None
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -0,0 +1,112 @@
+import os
+from pathlib import Path
+from typing import Optional
+
+from pydantic_settings import BaseSettings
+
+current_dir = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+)
+
+
+class Settings(BaseSettings):
+    AUTH_TYPE: Optional[str] = None
+    LLM_PROVIDER: str = "docsgpt"
+    LLM_NAME: Optional[str] = (
+        None  # if LLM_PROVIDER is openai, LLM_NAME can be gpt-4 or gpt-3.5-turbo
+    )
+    EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
+    CELERY_BROKER_URL: str = "redis://localhost:6379/0"
+    CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
+    MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
+    MONGO_DB_NAME: str = "docsgpt"
+    LLM_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
+    DEFAULT_MAX_HISTORY: int = 150
+    LLM_TOKEN_LIMITS: dict = {
+        "gpt-4o-mini": 128000,
+        "gpt-3.5-turbo": 4096,
+        "claude-2": 1e5,
+        "gemini-2.0-flash-exp": 1e6,
+    }
+    UPLOAD_FOLDER: str = "inputs"
+    PARSE_PDF_AS_IMAGE: bool = False
+    VECTOR_STORE: str = (
+        "faiss"  #  "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb"
+    )
+    RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"]  # also brave_search
+    AGENT_NAME: str = "classic"
+    FALLBACK_LLM_PROVIDER: Optional[str] = None  # provider for fallback llm
+    FALLBACK_LLM_NAME: Optional[str] = None  # model name for fallback llm
+    FALLBACK_LLM_API_KEY: Optional[str] = None  # api key for fallback llm
+
+    # LLM Cache
+    CACHE_REDIS_URL: str = "redis://localhost:6379/2"
+
+    API_URL: str = "http://localhost:7091"  # backend url for celery worker
+
+    API_KEY: Optional[str] = None  # LLM api key
+    EMBEDDINGS_KEY: Optional[str] = (
+        None  # api key for embeddings (if using openai, just copy API_KEY)
+    )
+    OPENAI_API_BASE: Optional[str] = None  # azure openai api base url
+    OPENAI_API_VERSION: Optional[str] = None  # azure openai api version
+    AZURE_DEPLOYMENT_NAME: Optional[str] = None  # azure deployment name for answering
+    AZURE_EMBEDDINGS_DEPLOYMENT_NAME: Optional[str] = (
+        None  # azure deployment name for embeddings
+    )
+    OPENAI_BASE_URL: Optional[str] = (
+        None  # openai base url for open ai compatable models
+    )
+
+    # elasticsearch
+    ELASTIC_CLOUD_ID: Optional[str] = None  # cloud id for elasticsearch
+    ELASTIC_USERNAME: Optional[str] = None  # username for elasticsearch
+    ELASTIC_PASSWORD: Optional[str] = None  # password for elasticsearch
+    ELASTIC_URL: Optional[str] = None  # url for elasticsearch
+    ELASTIC_INDEX: Optional[str] = "docsgpt"  # index name for elasticsearch
+
+    # SageMaker config
+    SAGEMAKER_ENDPOINT: Optional[str] = None  # SageMaker endpoint name
+    SAGEMAKER_REGION: Optional[str] = None  # SageMaker region name
+    SAGEMAKER_ACCESS_KEY: Optional[str] = None  # SageMaker access key
+    SAGEMAKER_SECRET_KEY: Optional[str] = None  # SageMaker secret key
+
+    # prem ai project id
+    PREMAI_PROJECT_ID: Optional[str] = None
+
+    # Qdrant vectorstore config
+    QDRANT_COLLECTION_NAME: Optional[str] = "docsgpt"
+    QDRANT_LOCATION: Optional[str] = None
+    QDRANT_URL: Optional[str] = None
+    QDRANT_PORT: Optional[int] = 6333
+    QDRANT_GRPC_PORT: int = 6334
+    QDRANT_PREFER_GRPC: bool = False
+    QDRANT_HTTPS: Optional[bool] = None
+    QDRANT_API_KEY: Optional[str] = None
+    QDRANT_PREFIX: Optional[str] = None
+    QDRANT_TIMEOUT: Optional[float] = None
+    QDRANT_HOST: Optional[str] = None
+    QDRANT_PATH: Optional[str] = None
+    QDRANT_DISTANCE_FUNC: str = "Cosine"
+
+    # Milvus vectorstore config
+    MILVUS_COLLECTION_NAME: Optional[str] = "docsgpt"
+    MILVUS_URI: Optional[str] = "./milvus_local.db"  # milvus lite version as default
+    MILVUS_TOKEN: Optional[str] = ""
+
+    # LanceDB vectorstore config
+    LANCEDB_PATH: str = "/tmp/lancedb"  # Path where LanceDB stores its local data
+    LANCEDB_TABLE_NAME: Optional[str] = (
+        "docsgpts"  # Name of the table to use for storing vectors
+    )
+    BRAVE_SEARCH_API_KEY: Optional[str] = None
+
+    FLASK_DEBUG_MODE: bool = False
+    STORAGE_TYPE: str = "local"  # local or s3
+    URL_STRATEGY: str = "backend"  # backend or s3
+
+    JWT_SECRET_KEY: str = ""
+
+
+path = Path(__file__).parent.parent.absolute()
+settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
--- a/application/error.py
+++ b/application/error.py
@@ -0,0 +1,15 @@
+from flask import jsonify
+from werkzeug.http import HTTP_STATUS_CODES
+
+
+def response_error(code_status, message=None):
+    payload = {'error': HTTP_STATUS_CODES.get(code_status, "something went wrong")}
+    if message:
+        payload['message'] = message
+    response = jsonify(payload)
+    response.status_code = code_status
+    return response
+
+
+def bad_request(status_code=400, message=''):
+    return response_error(code_status=status_code, message=message)
--- a/application/extensions.py
+++ b/application/extensions.py
@@ -0,0 +1,7 @@
+from flask_restx import Api
+
+api = Api(
+    version="1.0",
+    title="DocsGPT API",
+    description="API for DocsGPT",
+)
--- a/application/index.faiss
+++ b/application/index.faiss
--- a/application/index.pkl
+++ b/application/index.pkl
--- a/application/llm/init.py
+++ b/application/llm/init.py
--- a/application/llm/anthropic.py
+++ b/application/llm/anthropic.py
@@ -0,0 +1,50 @@
+from application.llm.base import BaseLLM
+from application.core.settings import settings
+
+
+class AnthropicLLM(BaseLLM):
+
+    def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
+        from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
+
+        super().__init__(*args, **kwargs)
+        self.api_key = (
+            api_key or settings.ANTHROPIC_API_KEY
+        )  # If not provided, use a default from settings
+        self.user_api_key = user_api_key
+        self.anthropic = Anthropic(api_key=self.api_key)
+        self.HUMAN_PROMPT = HUMAN_PROMPT
+        self.AI_PROMPT = AI_PROMPT
+
+    def _raw_gen(
+        self, baseself, model, messages, stream=False, tools=None, max_tokens=300, **kwargs
+    ):
+        context = messages[0]["content"]
+        user_question = messages[-1]["content"]
+        prompt = f"### Context \n {context} \n ### Question \n {user_question}"
+        if stream:
+            return self.gen_stream(model, prompt, stream, max_tokens, **kwargs)
+
+        completion = self.anthropic.completions.create(
+            model=model,
+            max_tokens_to_sample=max_tokens,
+            stream=stream,
+            prompt=f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT}",
+        )
+        return completion.completion
+
+    def _raw_gen_stream(
+        self, baseself, model, messages, stream=True, tools=None, max_tokens=300, **kwargs
+    ):
+        context = messages[0]["content"]
+        user_question = messages[-1]["content"]
+        prompt = f"### Context \n {context} \n ### Question \n {user_question}"
+        stream_response = self.anthropic.completions.create(
+            model=model,
+            prompt=f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT}",
+            max_tokens_to_sample=max_tokens,
+            stream=True,
+        )
+
+        for completion in stream_response:
+            yield completion.completion
--- a/application/llm/base.py
+++ b/application/llm/base.py
@@ -0,0 +1,130 @@
+import logging
+from abc import ABC, abstractmethod
+
+from application.cache import gen_cache, stream_cache
+
+from application.core.settings import settings
+from application.usage import gen_token_usage, stream_token_usage
+
+logger = logging.getLogger(__name__)
+
+
+class BaseLLM(ABC):
+    def __init__(
+        self,
+        decoded_token=None,
+    ):
+        self.decoded_token = decoded_token
+        self.token_usage = {"prompt_tokens": 0, "generated_tokens": 0}
+        self.fallback_provider = settings.FALLBACK_LLM_PROVIDER
+        self.fallback_model_name = settings.FALLBACK_LLM_NAME
+        self.fallback_llm_api_key = settings.FALLBACK_LLM_API_KEY
+        self._fallback_llm = None
+
+    @property
+    def fallback_llm(self):
+        """Lazy-loaded fallback LLM instance."""
+        if (
+            self._fallback_llm is None
+            and self.fallback_provider
+            and self.fallback_model_name
+        ):
+            try:
+                from application.llm.llm_creator import LLMCreator
+
+                self._fallback_llm = LLMCreator.create_llm(
+                    self.fallback_provider,
+                    self.fallback_llm_api_key,
+                    None,
+                    self.decoded_token,
+                )
+            except Exception as e:
+                logger.error(
+                    f"Failed to initialize fallback LLM: {str(e)}", exc_info=True
+                )
+        return self._fallback_llm
+
+    def _execute_with_fallback(
+        self, method_name: str, decorators: list, *args, **kwargs
+    ):
+        """
+        Unified method execution with fallback support.
+
+        Args:
+            method_name: Name of the raw method ('_raw_gen' or '_raw_gen_stream')
+            decorators: List of decorators to apply
+            *args: Positional arguments
+            **kwargs: Keyword arguments
+        """
+
+        def decorated_method():
+            method = getattr(self, method_name)
+            for decorator in decorators:
+                method = decorator(method)
+            return method(self, *args, **kwargs)
+
+        try:
+            return decorated_method()
+        except Exception as e:
+            if not self.fallback_llm:
+                logger.error(f"Primary LLM failed and no fallback available: {str(e)}")
+                raise
+            logger.warning(
+                f"Falling back to {self.fallback_provider}/{self.fallback_model_name}. Error: {str(e)}"
+            )
+
+            fallback_method = getattr(
+                self.fallback_llm, method_name.replace("_raw_", "")
+            )
+            return fallback_method(*args, **kwargs)
+
+    def gen(self, model, messages, stream=False, tools=None, *args, **kwargs):
+        decorators = [gen_token_usage, gen_cache]
+        return self._execute_with_fallback(
+            "_raw_gen",
+            decorators,
+            model=model,
+            messages=messages,
+            stream=stream,
+            tools=tools,
+            *args,
+            **kwargs,
+        )
+
+    def gen_stream(self, model, messages, stream=True, tools=None, *args, **kwargs):
+        decorators = [stream_cache, stream_token_usage]
+        return self._execute_with_fallback(
+            "_raw_gen_stream",
+            decorators,
+            model=model,
+            messages=messages,
+            stream=stream,
+            tools=tools,
+            *args,
+            **kwargs,
+        )
+
+    @abstractmethod
+    def _raw_gen(self, model, messages, stream, tools, *args, **kwargs):
+        pass
+
+    @abstractmethod
+    def _raw_gen_stream(self, model, messages, stream, *args, **kwargs):
+        pass
+
+    def supports_tools(self):
+        return hasattr(self, "_supports_tools") and callable(
+            getattr(self, "_supports_tools")
+        )
+
+    def _supports_tools(self):
+        raise NotImplementedError("Subclass must implement _supports_tools method")
+
+    def get_supported_attachment_types(self):
+        """
+        Return a list of MIME types supported by this LLM for file uploads.
+
+        Returns:
+            list: List of supported MIME types
+        """
+        return []  # Default: no attachments supported
--- a/application/llm/docsgpt_provider.py
+++ b/application/llm/docsgpt_provider.py
@@ -0,0 +1,131 @@
+import json
+
+from application.core.settings import settings
+from application.llm.base import BaseLLM
+
+
+class DocsGPTAPILLM(BaseLLM):
+
+    def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
+        from openai import OpenAI
+
+        super().__init__(*args, **kwargs)
+        self.client = OpenAI(api_key="sk-docsgpt-public", base_url="https://oai.arc53.com")
+        self.user_api_key = user_api_key
+        self.api_key = api_key
+
+    def _clean_messages_openai(self, messages):
+        cleaned_messages = []
+        for message in messages:
+            role = message.get("role")
+            content = message.get("content")
+
+            if role == "model":
+                role = "assistant"
+
+            if role and content is not None:
+                if isinstance(content, str):
+                    cleaned_messages.append({"role": role, "content": content})
+                elif isinstance(content, list):
+                    for item in content:
+                        if "text" in item:
+                            cleaned_messages.append(
+                                {"role": role, "content": item["text"]}
+                            )
+                        elif "function_call" in item:
+                            tool_call = {
+                                "id": item["function_call"]["call_id"],
+                                "type": "function",
+                                "function": {
+                                    "name": item["function_call"]["name"],
+                                    "arguments": json.dumps(
+                                        item["function_call"]["args"]
+                                    ),
+                                },
+                            }
+                            cleaned_messages.append(
+                                {
+                                    "role": "assistant",
+                                    "content": None,
+                                    "tool_calls": [tool_call],
+                                }
+                            )
+                        elif "function_response" in item:
+                            cleaned_messages.append(
+                                {
+                                    "role": "tool",
+                                    "tool_call_id": item["function_response"][
+                                        "call_id"
+                                    ],
+                                    "content": json.dumps(
+                                        item["function_response"]["response"]["result"]
+                                    ),
+                                }
+                            )
+                        else:
+                            raise ValueError(
+                                f"Unexpected content dictionary format: {item}"
+                            )
+                else:
+                    raise ValueError(f"Unexpected content type: {type(content)}")
+
+        return cleaned_messages
+
+    def _raw_gen(
+        self,
+        baseself,
+        model,
+        messages,
+        stream=False,
+        tools=None,
+        engine=settings.AZURE_DEPLOYMENT_NAME,
+        **kwargs,
+    ):
+        messages = self._clean_messages_openai(messages)
+        if tools:
+            response = self.client.chat.completions.create(
+                model="docsgpt",
+                messages=messages,
+                stream=stream,
+                tools=tools,
+                **kwargs,
+            )
+            return response.choices[0]
+        else:
+            response = self.client.chat.completions.create(
+                model="docsgpt", messages=messages, stream=stream, **kwargs
+            )
+            return response.choices[0].message.content
+
+    def _raw_gen_stream(
+        self,
+        baseself,
+        model,
+        messages,
+        stream=True,
+        tools=None,
+        engine=settings.AZURE_DEPLOYMENT_NAME,
+        **kwargs,
+    ):
+        messages = self._clean_messages_openai(messages)
+        if tools:
+            response = self.client.chat.completions.create(
+                model="docsgpt",
+                messages=messages,
+                stream=stream,
+                tools=tools,
+                **kwargs,
+            )
+        else:
+            response = self.client.chat.completions.create(
+                model="docsgpt", messages=messages, stream=stream, **kwargs
+            )
+
+        for line in response:
+            if len(line.choices) > 0 and line.choices[0].delta.content is not None and len(line.choices[0].delta.content) > 0:
+                yield line.choices[0].delta.content
+            elif len(line.choices) > 0:
+                yield line.choices[0]
+
+    def _supports_tools(self):
+        return True
--- a/application/llm/google_ai.py
+++ b/application/llm/google_ai.py
@@ -0,0 +1,313 @@
+from google import genai
+from google.genai import types
+import logging
+import json
+
+from application.llm.base import BaseLLM
+from application.storage.storage_creator import StorageCreator
+from application.core.settings import settings
+
+
+class GoogleLLM(BaseLLM):
+    def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.api_key = api_key
+        self.user_api_key = user_api_key
+        self.client = genai.Client(api_key=self.api_key)
+        self.storage = StorageCreator.get_storage()
+
+    def get_supported_attachment_types(self):
+        """
+        Return a list of MIME types supported by Google Gemini for file uploads.
+
+        Returns:
+            list: List of supported MIME types
+        """
+        return [
+            'application/pdf',
+            'image/png',
+            'image/jpeg',
+            'image/jpg',
+            'image/webp',
+            'image/gif'
+        ]
+
+    def prepare_messages_with_attachments(self, messages, attachments=None):
+        """
+        Process attachments using Google AI's file API for more efficient handling.
+
+        Args:
+            messages (list): List of message dictionaries.
+            attachments (list): List of attachment dictionaries with content and metadata.
+
+        Returns:
+            list: Messages formatted with file references for Google AI API.
+        """
+        if not attachments:
+            return messages
+
+        prepared_messages = messages.copy()
+
+        # Find the user message to attach files to the last one
+        user_message_index = None
+        for i in range(len(prepared_messages) - 1, -1, -1):
+            if prepared_messages[i].get("role") == "user":
+                user_message_index = i
+                break
+
+        if user_message_index is None:
+            user_message = {"role": "user", "content": []}
+            prepared_messages.append(user_message)
+            user_message_index = len(prepared_messages) - 1
+
+        if isinstance(prepared_messages[user_message_index].get("content"), str):
+            text_content = prepared_messages[user_message_index]["content"]
+            prepared_messages[user_message_index]["content"] = [
+                {"type": "text", "text": text_content}
+            ]
+        elif not isinstance(prepared_messages[user_message_index].get("content"), list):
+            prepared_messages[user_message_index]["content"] = []
+
+        files = []
+        for attachment in attachments:
+            mime_type = attachment.get('mime_type')
+
+            if mime_type in self.get_supported_attachment_types():
+                try:
+                    file_uri = self._upload_file_to_google(attachment)
+                    logging.info(f"GoogleLLM: Successfully uploaded file, got URI: {file_uri}")
+                    files.append({"file_uri": file_uri, "mime_type": mime_type})
+                except Exception as e:
+                    logging.error(f"GoogleLLM: Error uploading file: {e}", exc_info=True)
+                    if 'content' in attachment:
+                        prepared_messages[user_message_index]["content"].append({
+                            "type": "text",
+                            "text": f"[File could not be processed: {attachment.get('path', 'unknown')}]"
+                        })
+
+        if files:
+            logging.info(f"GoogleLLM: Adding {len(files)} files to message")
+            prepared_messages[user_message_index]["content"].append({
+                "files": files
+            })
+
+        return prepared_messages
+
+    def _upload_file_to_google(self, attachment):
+        """
+        Upload a file to Google AI and return the file URI.
+
+        Args:
+            attachment (dict): Attachment dictionary with path and metadata.
+
+        Returns:
+            str: Google AI file URI for the uploaded file.
+        """
+        if 'google_file_uri' in attachment:
+            return attachment['google_file_uri']
+
+        file_path = attachment.get('path')
+        if not file_path:
+            raise ValueError("No file path provided in attachment")
+
+        if not self.storage.file_exists(file_path):
+            raise FileNotFoundError(f"File not found: {file_path}")
+
+        try:
+            file_uri = self.storage.process_file(
+                file_path,
+                lambda local_path, **kwargs: self.client.files.upload(file=local_path).uri
+            )
+
+            from application.core.mongo_db import MongoDB
+            mongo = MongoDB.get_client()
+            db = mongo[settings.MONGO_DB_NAME]
+            attachments_collection = db["attachments"]
+            if '_id' in attachment:
+                attachments_collection.update_one(
+                    {"_id": attachment['_id']},
+                    {"$set": {"google_file_uri": file_uri}}
+                )
+
+            return file_uri
+        except Exception as e:
+            logging.error(f"Error uploading file to Google AI: {e}", exc_info=True)
+            raise
+
+    def _clean_messages_google(self, messages):
+        cleaned_messages = []
+        for message in messages:
+            role = message.get("role")
+            content = message.get("content")
+
+            if role == "assistant":
+                role = "model"
+
+            parts = []
+            if role and content is not None:
+                if isinstance(content, str):
+                    parts = [types.Part.from_text(text=content)]
+                elif isinstance(content, list):
+                    for item in content:
+                        if "text" in item:
+                            parts.append(types.Part.from_text(text=item["text"]))
+                        elif "function_call" in item:
+                            parts.append(
+                                types.Part.from_function_call(
+                                    name=item["function_call"]["name"],
+                                    args=item["function_call"]["args"],
+                                )
+                            )
+                        elif "function_response" in item:
+                            parts.append(
+                                types.Part.from_function_response(
+                                    name=item["function_response"]["name"],
+                                    response=item["function_response"]["response"],
+                                )
+                            )
+                        elif "files" in item:
+                                for file_data in item["files"]:
+                                    parts.append(
+                                        types.Part.from_uri(
+                                            file_uri=file_data["file_uri"],
+                                            mime_type=file_data["mime_type"]
+                                        )
+                                    )
+                        else:
+                            raise ValueError(
+                                f"Unexpected content dictionary format:{item}"
+                            )
+                else:
+                    raise ValueError(f"Unexpected content type: {type(content)}")
+
+                cleaned_messages.append(types.Content(role=role, parts=parts))
+
+        return cleaned_messages
+
+    def _clean_tools_format(self, tools_list):
+        genai_tools = []
+        for tool_data in tools_list:
+            if tool_data["type"] == "function":
+                function = tool_data["function"]
+                parameters = function["parameters"]
+                properties = parameters.get("properties", {})
+
+                if properties:
+                    genai_function = dict(
+                        name=function["name"],
+                        description=function["description"],
+                        parameters={
+                            "type": "OBJECT",
+                            "properties": {
+                                k: {
+                                    **v,
+                                    "type": v["type"].upper() if v["type"] else None,
+                                }
+                                for k, v in properties.items()
+                            },
+                            "required": (
+                                parameters["required"]
+                                if "required" in parameters
+                                else []
+                            ),
+                        },
+                    )
+                else:
+                    genai_function = dict(
+                        name=function["name"],
+                        description=function["description"],
+                    )
+
+                genai_tool = types.Tool(function_declarations=[genai_function])
+                genai_tools.append(genai_tool)
+
+        return genai_tools
+
+    def _raw_gen(
+        self,
+        baseself,
+        model,
+        messages,
+        stream=False,
+        tools=None,
+        formatting="openai",
+        **kwargs,
+    ):
+        client = genai.Client(api_key=self.api_key)
+        if formatting == "openai":
+            messages = self._clean_messages_google(messages)
+        config = types.GenerateContentConfig()
+        if messages[0].role == "system":
+            config.system_instruction = messages[0].parts[0].text
+            messages = messages[1:]
+
+        if tools:
+            cleaned_tools = self._clean_tools_format(tools)
+            config.tools = cleaned_tools
+            response = client.models.generate_content(
+                model=model,
+                contents=messages,
+                config=config,
+            )
+            return response
+        else:
+            response = client.models.generate_content(
+                model=model, contents=messages, config=config
+            )
+            return response.text
+
+    def _raw_gen_stream(
+        self,
+        baseself,
+        model,
+        messages,
+        stream=True,
+        tools=None,
+        formatting="openai",
+        **kwargs,
+    ):
+        client = genai.Client(api_key=self.api_key)
+        if formatting == "openai":
+            messages = self._clean_messages_google(messages)
+        config = types.GenerateContentConfig()
+        if messages[0].role == "system":
+            config.system_instruction = messages[0].parts[0].text
+            messages = messages[1:]
+
+        if tools:
+            cleaned_tools = self._clean_tools_format(tools)
+            config.tools = cleaned_tools
+
+        # Check if we have both tools and file attachments
+        has_attachments = False
+        for message in messages:
+            for part in message.parts:
+                if hasattr(part, 'file_data') and part.file_data is not None:
+                    has_attachments = True
+                    break
+            if has_attachments:
+                break
+
+        logging.info(f"GoogleLLM: Starting stream generation. Model: {model}, Messages: {json.dumps(messages, default=str)}, Has attachments: {has_attachments}")
+
+        response = client.models.generate_content_stream(
+            model=model,
+            contents=messages,
+            config=config,
+        )
+
+
+        for chunk in response:
+            if hasattr(chunk, "candidates") and chunk.candidates:
+                for candidate in chunk.candidates:
+                    if candidate.content and candidate.content.parts:
+                        for part in candidate.content.parts:
+                            if part.function_call:
+                                yield part
+                            elif part.text:
+                                yield part.text
+            elif hasattr(chunk, "text"):
+                yield chunk.text
+
+    def _supports_tools(self):
+        return True
--- a/application/llm/groq.py
+++ b/application/llm/groq.py
@@ -0,0 +1,32 @@
+from application.llm.base import BaseLLM
+from openai import OpenAI
+
+
+class GroqLLM(BaseLLM):
+    def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.client = OpenAI(api_key=api_key, base_url="https://api.groq.com/openai/v1")
+        self.api_key = api_key
+        self.user_api_key = user_api_key
+
+    def _raw_gen(self, baseself, model, messages, stream=False, tools=None, **kwargs):
+        if tools:
+            response = self.client.chat.completions.create(
+                model=model, messages=messages, stream=stream, tools=tools, **kwargs
+            )
+            return response.choices[0]
+        else:
+            response = self.client.chat.completions.create(
+                model=model, messages=messages, stream=stream, **kwargs
+            )
+            return response.choices[0].message.content
+
+    def _raw_gen_stream(
+        self, baseself, model, messages, stream=True, tools=None, **kwargs
+    ):
+        response = self.client.chat.completions.create(
+            model=model, messages=messages, stream=stream, **kwargs
+        )
+        for line in response:
+            if line.choices[0].delta.content is not None:
+                yield line.choices[0].delta.content
--- a/application/llm/handlers/init.py
+++ b/application/llm/handlers/init.py
--- a/application/llm/handlers/base.py
+++ b/application/llm/handlers/base.py
@@ -0,0 +1,335 @@
+import logging
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Dict, Generator, List, Optional, Union
+
+from application.logging import build_stack_data
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ToolCall:
+    """Represents a tool/function call from the LLM."""
+
+    id: str
+    name: str
+    arguments: Union[str, Dict]
+    index: Optional[int] = None
+
+    @classmethod
+    def from_dict(cls, data: Dict) -> "ToolCall":
+        """Create ToolCall from dictionary."""
+        return cls(
+            id=data.get("id", ""),
+            name=data.get("name", ""),
+            arguments=data.get("arguments", {}),
+            index=data.get("index"),
+        )
+
+
+@dataclass
+class LLMResponse:
+    """Represents a response from the LLM."""
+
+    content: str
+    tool_calls: List[ToolCall]
+    finish_reason: str
+    raw_response: Any
+
+    @property
+    def requires_tool_call(self) -> bool:
+        """Check if the response requires tool calls."""
+        return bool(self.tool_calls) and self.finish_reason == "tool_calls"
+
+
+class LLMHandler(ABC):
+    """Abstract base class for LLM handlers."""
+
+    def __init__(self):
+        self.llm_calls = []
+        self.tool_calls = []
+
+    @abstractmethod
+    def parse_response(self, response: Any) -> LLMResponse:
+        """Parse raw LLM response into standardized format."""
+        pass
+
+    @abstractmethod
+    def create_tool_message(self, tool_call: ToolCall, result: Any) -> Dict:
+        """Create a tool result message for the conversation history."""
+        pass
+
+    @abstractmethod
+    def _iterate_stream(self, response: Any) -> Generator:
+        """Iterate through streaming response chunks."""
+        pass
+
+    def process_message_flow(
+        self,
+        agent,
+        initial_response,
+        tools_dict: Dict,
+        messages: List[Dict],
+        attachments: Optional[List] = None,
+        stream: bool = False,
+    ) -> Union[str, Generator]:
+        """
+        Main orchestration method for processing LLM message flow.
+
+        Args:
+            agent: The agent instance
+            initial_response: Initial LLM response
+            tools_dict: Dictionary of available tools
+            messages: Conversation history
+            attachments: Optional attachments
+            stream: Whether to use streaming
+
+        Returns:
+            Final response or generator for streaming
+        """
+        messages = self.prepare_messages(agent, messages, attachments)
+
+        if stream:
+            return self.handle_streaming(agent, initial_response, tools_dict, messages)
+        else:
+            return self.handle_non_streaming(
+                agent, initial_response, tools_dict, messages
+            )
+
+    def prepare_messages(
+        self, agent, messages: List[Dict], attachments: Optional[List] = None
+    ) -> List[Dict]:
+        """
+        Prepare messages with attachments and provider-specific formatting.
+
+        Args:
+            agent: The agent instance
+            messages: Original messages
+            attachments: List of attachments
+
+        Returns:
+            Prepared messages list
+        """
+        if not attachments:
+            return messages
+        logger.info(f"Preparing messages with {len(attachments)} attachments")
+        supported_types = agent.llm.get_supported_attachment_types()
+
+        supported_attachments = [
+            a for a in attachments if a.get("mime_type") in supported_types
+        ]
+        unsupported_attachments = [
+            a for a in attachments if a.get("mime_type") not in supported_types
+        ]
+
+        # Process supported attachments with the LLM's custom method
+
+        if supported_attachments:
+            logger.info(
+                f"Processing {len(supported_attachments)} supported attachments"
+            )
+            messages = agent.llm.prepare_messages_with_attachments(
+                messages, supported_attachments
+            )
+        # Process unsupported attachments with default method
+
+        if unsupported_attachments:
+            logger.info(
+                f"Processing {len(unsupported_attachments)} unsupported attachments"
+            )
+            messages = self._append_unsupported_attachments(
+                messages, unsupported_attachments
+            )
+        return messages
+
+    def _append_unsupported_attachments(
+        self, messages: List[Dict], attachments: List[Dict]
+    ) -> List[Dict]:
+        """
+        Default method to append unsupported attachment content to system prompt.
+
+        Args:
+            messages: Current messages
+            attachments: List of unsupported attachments
+
+        Returns:
+            Updated messages list
+        """
+        prepared_messages = messages.copy()
+        attachment_texts = []
+
+        for attachment in attachments:
+            logger.info(f"Adding attachment {attachment.get('id')} to context")
+            if "content" in attachment:
+                attachment_texts.append(
+                    f"Attached file content:\n\n{attachment['content']}"
+                )
+        if attachment_texts:
+            combined_text = "\n\n".join(attachment_texts)
+
+            system_msg = next(
+                (msg for msg in prepared_messages if msg.get("role") == "system"),
+                {"role": "system", "content": ""},
+            )
+
+            if system_msg not in prepared_messages:
+                prepared_messages.insert(0, system_msg)
+            system_msg["content"] += f"\n\n{combined_text}"
+        return prepared_messages
+
+    def handle_tool_calls(
+        self, agent, tool_calls: List[ToolCall], tools_dict: Dict, messages: List[Dict]
+    ) -> Generator:
+        """
+        Execute tool calls and update conversation history.
+
+        Args:
+            agent: The agent instance
+            tool_calls: List of tool calls to execute
+            tools_dict: Available tools dictionary
+            messages: Current conversation history
+
+        Returns:
+            Updated messages list
+        """
+        updated_messages = messages.copy()
+
+        for call in tool_calls:
+            try:
+                self.tool_calls.append(call)
+                tool_executor_gen = agent._execute_tool_action(tools_dict, call)
+                while True:
+                    try:
+                        yield next(tool_executor_gen)
+                    except StopIteration as e:
+                        tool_response, call_id = e.value
+                        break
+
+                updated_messages.append(
+                    {
+                        "role": "assistant",
+                        "content": [
+                            {
+                                "function_call": {
+                                    "name": call.name,
+                                    "args": call.arguments,
+                                    "call_id": call_id,
+                                }
+                            }
+                        ],
+                    }
+                )
+
+                updated_messages.append(self.create_tool_message(call, tool_response))
+
+            except Exception as e:
+                logger.error(f"Error executing tool: {str(e)}", exc_info=True)
+                updated_messages.append(
+                    {
+                        "role": "tool",
+                        "content": f"Error executing tool: {str(e)}",
+                        "tool_call_id": call.id,
+                    }
+                )
+
+        return updated_messages
+
+    def handle_non_streaming(
+        self, agent, response: Any, tools_dict: Dict, messages: List[Dict]
+    ) -> Generator:
+        """
+        Handle non-streaming response flow.
+
+        Args:
+            agent: The agent instance
+            response: Current LLM response
+            tools_dict: Available tools dictionary
+            messages: Conversation history
+
+        Returns:
+            Final response after processing all tool calls
+        """
+        parsed = self.parse_response(response)
+        self.llm_calls.append(build_stack_data(agent.llm))
+
+        while parsed.requires_tool_call:
+            tool_handler_gen = self.handle_tool_calls(
+                agent, parsed.tool_calls, tools_dict, messages
+            )
+            while True:
+                try:
+                    yield next(tool_handler_gen)
+                except StopIteration as e:
+                    messages = e.value
+                    break
+
+            response = agent.llm.gen(
+                model=agent.gpt_model, messages=messages, tools=agent.tools
+            )
+            parsed = self.parse_response(response)
+            self.llm_calls.append(build_stack_data(agent.llm))
+
+        return parsed.content
+
+    def handle_streaming(
+        self, agent, response: Any, tools_dict: Dict, messages: List[Dict]
+    ) -> Generator:
+        """
+        Handle streaming response flow.
+
+        Args:
+            agent: The agent instance
+            response: Current LLM response
+            tools_dict: Available tools dictionary
+            messages: Conversation history
+
+        Yields:
+            Streaming response chunks
+        """
+        buffer = ""
+        tool_calls = {}
+
+        for chunk in self._iterate_stream(response):
+            if isinstance(chunk, str):
+                yield chunk
+                continue
+            parsed = self.parse_response(chunk)
+
+            if parsed.tool_calls:
+                for call in parsed.tool_calls:
+                    if call.index not in tool_calls:
+                        tool_calls[call.index] = call
+                    else:
+                        existing = tool_calls[call.index]
+                        if call.id:
+                            existing.id = call.id
+                        if call.name:
+                            existing.name = call.name
+                        if call.arguments:
+                            existing.arguments += call.arguments
+            if parsed.finish_reason == "tool_calls":
+                tool_handler_gen = self.handle_tool_calls(
+                    agent, list(tool_calls.values()), tools_dict, messages
+                )
+                while True:
+                    try:
+                        yield next(tool_handler_gen)
+                    except StopIteration as e:
+                        messages = e.value
+                        break
+                tool_calls = {}
+
+                response = agent.llm.gen_stream(
+                    model=agent.gpt_model, messages=messages, tools=agent.tools
+                )
+                self.llm_calls.append(build_stack_data(agent.llm))
+
+                yield from self.handle_streaming(agent, response, tools_dict, messages)
+                return
+            if parsed.content:
+                buffer += parsed.content
+                yield buffer
+                buffer = ""
+            if parsed.finish_reason == "stop":
+                return
--- a/application/llm/handlers/google.py
+++ b/application/llm/handlers/google.py
@@ -0,0 +1,78 @@
+import uuid
+from typing import Any, Dict, Generator
+
+from application.llm.handlers.base import LLMHandler, LLMResponse, ToolCall
+
+
+class GoogleLLMHandler(LLMHandler):
+    """Handler for Google's GenAI API."""
+
+    def parse_response(self, response: Any) -> LLMResponse:
+        """Parse Google response into standardized format."""
+
+        if isinstance(response, str):
+            return LLMResponse(
+                content=response,
+                tool_calls=[],
+                finish_reason="stop",
+                raw_response=response,
+            )
+
+        if hasattr(response, "candidates"):
+            parts = response.candidates[0].content.parts if response.candidates else []
+            tool_calls = [
+                ToolCall(
+                    id=str(uuid.uuid4()),
+                    name=part.function_call.name,
+                    arguments=part.function_call.args,
+                )
+                for part in parts
+                if hasattr(part, "function_call") and part.function_call is not None
+            ]
+
+            content = " ".join(
+                part.text
+                for part in parts
+                if hasattr(part, "text") and part.text is not None
+            )
+            return LLMResponse(
+                content=content,
+                tool_calls=tool_calls,
+                finish_reason="tool_calls" if tool_calls else "stop",
+                raw_response=response,
+            )
+
+        else:
+            tool_calls = []
+            if hasattr(response, "function_call"):
+                tool_calls.append(
+                    ToolCall(
+                        id=str(uuid.uuid4()),
+                        name=response.function_call.name,
+                        arguments=response.function_call.args,
+                    )
+                )
+            return LLMResponse(
+                content=response.text if hasattr(response, "text") else "",
+                tool_calls=tool_calls,
+                finish_reason="tool_calls" if tool_calls else "stop",
+                raw_response=response,
+            )
+
+    def create_tool_message(self, tool_call: ToolCall, result: Any) -> Dict:
+        """Create Google-style tool message."""
+        from google.genai import types
+
+        return {
+            "role": "tool",
+            "content": [
+                types.Part.from_function_response(
+                    name=tool_call.name, response={"result": result}
+                ).to_json_dict()
+            ],
+        }
+
+    def _iterate_stream(self, response: Any) -> Generator:
+        """Iterate through Google streaming response."""
+        for chunk in response:
+            yield chunk
--- a/application/llm/handlers/handler_creator.py
+++ b/application/llm/handlers/handler_creator.py
@@ -0,0 +1,18 @@
+from application.llm.handlers.base import LLMHandler
+from application.llm.handlers.google import GoogleLLMHandler
+from application.llm.handlers.openai import OpenAILLMHandler
+
+
+class LLMHandlerCreator:
+    handlers = {
+        "openai": OpenAILLMHandler,
+        "google": GoogleLLMHandler,
+        "default": OpenAILLMHandler,
+    }
+
+    @classmethod
+    def create_handler(cls, llm_type: str, *args, **kwargs) -> LLMHandler:
+        handler_class = cls.handlers.get(llm_type.lower())
+        if not handler_class:
+            raise ValueError(f"No LLM handler class found for type {llm_type}")
+        return handler_class(*args, **kwargs)
--- a/application/llm/handlers/openai.py
+++ b/application/llm/handlers/openai.py
@@ -0,0 +1,57 @@
+from typing import Any, Dict, Generator
+
+from application.llm.handlers.base import LLMHandler, LLMResponse, ToolCall
+
+
+class OpenAILLMHandler(LLMHandler):
+    """Handler for OpenAI API."""
+
+    def parse_response(self, response: Any) -> LLMResponse:
+        """Parse OpenAI response into standardized format."""
+        if isinstance(response, str):
+            return LLMResponse(
+                content=response,
+                tool_calls=[],
+                finish_reason="stop",
+                raw_response=response,
+            )
+
+        message = getattr(response, "message", None) or getattr(response, "delta", None)
+
+        tool_calls = []
+        if hasattr(message, "tool_calls"):
+            tool_calls = [
+                ToolCall(
+                    id=getattr(tc, "id", ""),
+                    name=getattr(tc.function, "name", ""),
+                    arguments=getattr(tc.function, "arguments", ""),
+                    index=getattr(tc, "index", None),
+                )
+                for tc in message.tool_calls or []
+            ]
+        return LLMResponse(
+            content=getattr(message, "content", ""),
+            tool_calls=tool_calls,
+            finish_reason=getattr(response, "finish_reason", ""),
+            raw_response=response,
+        )
+
+    def create_tool_message(self, tool_call: ToolCall, result: Any) -> Dict:
+        """Create OpenAI-style tool message."""
+        return {
+            "role": "tool",
+            "content": [
+                {
+                    "function_response": {
+                        "name": tool_call.name,
+                        "response": {"result": result},
+                        "call_id": tool_call.id,
+                    }
+                }
+            ],
+        }
+
+    def _iterate_stream(self, response: Any) -> Generator:
+        """Iterate through OpenAI streaming response."""
+        for chunk in response:
+            yield chunk
--- a/application/llm/huggingface.py
+++ b/application/llm/huggingface.py
@@ -0,0 +1,68 @@
+from application.llm.base import BaseLLM
+
+
+class HuggingFaceLLM(BaseLLM):
+
+    def __init__(
+        self,
+        api_key=None,
+        user_api_key=None,
+        llm_name="Arc53/DocsGPT-7B",
+        q=False,
+        *args,
+        **kwargs,
+    ):
+        global hf
+
+        from langchain.llms import HuggingFacePipeline
+
+        if q:
+            import torch
+            from transformers import (
+                AutoModelForCausalLM,
+                AutoTokenizer,
+                pipeline,
+                BitsAndBytesConfig,
+            )
+
+            tokenizer = AutoTokenizer.from_pretrained(llm_name)
+            bnb_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type="nf4",
+                bnb_4bit_compute_dtype=torch.bfloat16,
+            )
+            model = AutoModelForCausalLM.from_pretrained(
+                llm_name, quantization_config=bnb_config
+            )
+        else:
+            from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+
+            tokenizer = AutoTokenizer.from_pretrained(llm_name)
+            model = AutoModelForCausalLM.from_pretrained(llm_name)
+
+        super().__init__(*args, **kwargs)
+        self.api_key = api_key
+        self.user_api_key = user_api_key
+        pipe = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_new_tokens=2000,
+            device_map="auto",
+            eos_token_id=tokenizer.eos_token_id,
+        )
+        hf = HuggingFacePipeline(pipeline=pipe)
+
+    def _raw_gen(self, baseself, model, messages, stream=False, **kwargs):
+        context = messages[0]["content"]
+        user_question = messages[-1]["content"]
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+
+        result = hf(prompt)
+
+        return result.content
+
+    def _raw_gen_stream(self, baseself, model, messages, stream=True, **kwargs):
+
+        raise NotImplementedError("HuggingFaceLLM Streaming is not implemented yet.")
--- a/application/llm/llama_cpp.py
+++ b/application/llm/llama_cpp.py
@@ -0,0 +1,60 @@
+from application.llm.base import BaseLLM
+from application.core.settings import settings
+import threading
+
+
+class LlamaSingleton:
+    _instances = {}
+    _lock = threading.Lock()  # Add a lock for thread synchronization
+
+    @classmethod
+    def get_instance(cls, llm_name):
+        if llm_name not in cls._instances:
+            try:
+                from llama_cpp import Llama
+            except ImportError:
+                raise ImportError(
+                    "Please install llama_cpp using pip install llama-cpp-python"
+                )
+            cls._instances[llm_name] = Llama(model_path=llm_name, n_ctx=2048)
+        return cls._instances[llm_name]
+
+    @classmethod
+    def query_model(cls, llm, prompt, **kwargs):
+        with cls._lock:
+            return llm(prompt, **kwargs)
+
+
+class LlamaCpp(BaseLLM):
+    def __init__(
+        self,
+        api_key=None,
+        user_api_key=None,
+        llm_name=settings.LLM_PATH,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+        self.api_key = api_key
+        self.user_api_key = user_api_key
+        self.llama = LlamaSingleton.get_instance(llm_name)
+
+    def _raw_gen(self, baseself, model, messages, stream=False, **kwargs):
+        context = messages[0]["content"]
+        user_question = messages[-1]["content"]
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+        result = LlamaSingleton.query_model(
+            self.llama, prompt, max_tokens=150, echo=False
+        )
+        return result["choices"][0]["text"].split("### Answer \n")[-1]
+
+    def _raw_gen_stream(self, baseself, model, messages, stream=True, **kwargs):
+        context = messages[0]["content"]
+        user_question = messages[-1]["content"]
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+        result = LlamaSingleton.query_model(
+            self.llama, prompt, max_tokens=150, echo=False, stream=stream
+        )
+        for item in result:
+            for choice in item["choices"]:
+                yield choice["text"]
--- a/application/llm/llm_creator.py
+++ b/application/llm/llm_creator.py
@@ -0,0 +1,35 @@
+from application.llm.groq import GroqLLM
+from application.llm.openai import OpenAILLM, AzureOpenAILLM
+from application.llm.sagemaker import SagemakerAPILLM
+from application.llm.huggingface import HuggingFaceLLM
+from application.llm.llama_cpp import LlamaCpp
+from application.llm.anthropic import AnthropicLLM
+from application.llm.docsgpt_provider import DocsGPTAPILLM
+from application.llm.premai import PremAILLM
+from application.llm.google_ai import GoogleLLM
+from application.llm.novita import NovitaLLM
+
+
+class LLMCreator:
+    llms = {
+        "openai": OpenAILLM,
+        "azure_openai": AzureOpenAILLM,
+        "sagemaker": SagemakerAPILLM,
+        "huggingface": HuggingFaceLLM,
+        "llama.cpp": LlamaCpp,
+        "anthropic": AnthropicLLM,
+        "docsgpt": DocsGPTAPILLM,
+        "premai": PremAILLM,
+        "groq": GroqLLM,
+        "google": GoogleLLM,
+        "novita": NovitaLLM,
+    }
+
+    @classmethod
+    def create_llm(cls, type, api_key, user_api_key, decoded_token, *args, **kwargs):
+        llm_class = cls.llms.get(type.lower())
+        if not llm_class:
+            raise ValueError(f"No LLM class found for type {type}")
+        return llm_class(
+            api_key, user_api_key, decoded_token=decoded_token, *args, **kwargs
+        )
--- a/application/llm/novita.py
+++ b/application/llm/novita.py
@@ -0,0 +1,32 @@
+from application.llm.base import BaseLLM
+from openai import OpenAI
+
+
+class NovitaLLM(BaseLLM):
+    def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.client = OpenAI(api_key=api_key, base_url="https://api.novita.ai/v3/openai")
+        self.api_key = api_key
+        self.user_api_key = user_api_key
+
+    def _raw_gen(self, baseself, model, messages, stream=False, tools=None, **kwargs):
+        if tools:
+            response = self.client.chat.completions.create(
+                model=model, messages=messages, stream=stream, tools=tools, **kwargs
+            )
+            return response.choices[0]
+        else:
+            response = self.client.chat.completions.create(
+                model=model, messages=messages, stream=stream, **kwargs
+            )
+            return response.choices[0].message.content
+
+    def _raw_gen_stream(
+        self, baseself, model, messages, stream=True, tools=None, **kwargs
+    ):
+        response = self.client.chat.completions.create(
+            model=model, messages=messages, stream=stream, **kwargs
+        )
+        for line in response:
+            if line.choices[0].delta.content is not None:
+                yield line.choices[0].delta.content
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -0,0 +1,325 @@
+import json
+import base64
+import logging
+
+from application.core.settings import settings
+from application.llm.base import BaseLLM
+from application.storage.storage_creator import StorageCreator
+
+
+class OpenAILLM(BaseLLM):
+
+    def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
+        from openai import OpenAI
+
+        super().__init__(*args, **kwargs)
+        if isinstance(settings.OPENAI_BASE_URL, str) and settings.OPENAI_BASE_URL.strip():
+            self.client = OpenAI(api_key=api_key, base_url=settings.OPENAI_BASE_URL)
+        else:
+            DEFAULT_OPENAI_API_BASE = "https://api.openai.com/v1"
+            self.client = OpenAI(api_key=api_key, base_url=DEFAULT_OPENAI_API_BASE)
+        self.api_key = api_key
+        self.user_api_key = user_api_key
+        self.storage = StorageCreator.get_storage()
+
+    def _clean_messages_openai(self, messages):
+        cleaned_messages = []
+        for message in messages:
+            role = message.get("role")
+            content = message.get("content")
+
+            if role == "model":
+                role = "assistant"
+
+            if role and content is not None:
+                if isinstance(content, str):
+                    cleaned_messages.append({"role": role, "content": content})
+                elif isinstance(content, list):
+                    for item in content:
+                        if "text" in item:
+                            cleaned_messages.append(
+                                {"role": role, "content": item["text"]}
+                            )
+                        elif "function_call" in item:
+                            tool_call = {
+                                "id": item["function_call"]["call_id"],
+                                "type": "function",
+                                "function": {
+                                    "name": item["function_call"]["name"],
+                                    "arguments": json.dumps(
+                                        item["function_call"]["args"]
+                                    ),
+                                },
+                            }
+                            cleaned_messages.append(
+                                {
+                                    "role": "assistant",
+                                    "content": None,
+                                    "tool_calls": [tool_call],
+                                }
+                            )
+                        elif "function_response" in item:
+                            cleaned_messages.append(
+                                {
+                                    "role": "tool",
+                                    "tool_call_id": item["function_response"][
+                                        "call_id"
+                                    ],
+                                    "content": json.dumps(
+                                        item["function_response"]["response"]["result"]
+                                    ),
+                                }
+                            )
+                        elif isinstance(item, dict):
+                            content_parts = []
+                            if "text" in item:
+                                content_parts.append({"type": "text", "text": item["text"]})
+                            elif "type" in item and item["type"] == "text" and "text" in item:
+                                content_parts.append(item)
+                            elif "type" in item and item["type"] == "file" and "file" in item:
+                                content_parts.append(item)
+                            elif "type" in item and item["type"] == "image_url" and "image_url" in item:
+                                content_parts.append(item)
+                            cleaned_messages.append({"role": role, "content": content_parts})
+                        else:
+                            raise ValueError(
+                                f"Unexpected content dictionary format: {item}"
+                            )
+                else:
+                    raise ValueError(f"Unexpected content type: {type(content)}")
+
+        return cleaned_messages
+
+    def _raw_gen(
+        self,
+        baseself,
+        model,
+        messages,
+        stream=False,
+        tools=None,
+        engine=settings.AZURE_DEPLOYMENT_NAME,
+        **kwargs,
+    ):
+        messages = self._clean_messages_openai(messages)
+        if tools:
+            response = self.client.chat.completions.create(
+                model=model,
+                messages=messages,
+                stream=stream,
+                tools=tools,
+                **kwargs,
+            )
+            return response.choices[0]
+        else:
+            response = self.client.chat.completions.create(
+                model=model, messages=messages, stream=stream, **kwargs
+            )
+            return response.choices[0].message.content
+
+    def _raw_gen_stream(
+        self,
+        baseself,
+        model,
+        messages,
+        stream=True,
+        tools=None,
+        engine=settings.AZURE_DEPLOYMENT_NAME,
+        **kwargs,
+    ):
+        messages = self._clean_messages_openai(messages)
+        if tools:
+            response = self.client.chat.completions.create(
+                model=model,
+                messages=messages,
+                stream=stream,
+                tools=tools,
+                **kwargs,
+            )
+        else:
+            response = self.client.chat.completions.create(
+                model=model, messages=messages, stream=stream, **kwargs
+            )
+
+        for line in response:
+            if len(line.choices) > 0 and line.choices[0].delta.content is not None and len(line.choices[0].delta.content) > 0:
+                yield line.choices[0].delta.content
+            elif len(line.choices) > 0:
+                yield line.choices[0]
+
+    def _supports_tools(self):
+        return True
+
+    def get_supported_attachment_types(self):
+        """
+        Return a list of MIME types supported by OpenAI for file uploads.
+
+        Returns:
+            list: List of supported MIME types
+        """
+        return [
+            'application/pdf',
+            'image/png',
+            'image/jpeg',
+            'image/jpg',
+            'image/webp',
+            'image/gif'
+        ]
+
+    def prepare_messages_with_attachments(self, messages, attachments=None):
+        """
+        Process attachments using OpenAI's file API for more efficient handling.
+
+        Args:
+            messages (list): List of message dictionaries.
+            attachments (list): List of attachment dictionaries with content and metadata.
+
+        Returns:
+            list: Messages formatted with file references for OpenAI API.
+        """
+        if not attachments:
+            return messages
+
+        prepared_messages = messages.copy()
+
+        # Find the user message to attach file_id to the last one
+        user_message_index = None
+        for i in range(len(prepared_messages) - 1, -1, -1):
+            if prepared_messages[i].get("role") == "user":
+                user_message_index = i
+                break
+
+        if user_message_index is None:
+            user_message = {"role": "user", "content": []}
+            prepared_messages.append(user_message)
+            user_message_index = len(prepared_messages) - 1
+
+        if isinstance(prepared_messages[user_message_index].get("content"), str):
+            text_content = prepared_messages[user_message_index]["content"]
+            prepared_messages[user_message_index]["content"] = [
+                {"type": "text", "text": text_content}
+            ]
+        elif not isinstance(prepared_messages[user_message_index].get("content"), list):
+            prepared_messages[user_message_index]["content"] = []
+
+        for attachment in attachments:
+            mime_type = attachment.get('mime_type')
+
+            if mime_type and mime_type.startswith('image/'):
+                try:
+                    base64_image = self._get_base64_image(attachment)
+                    prepared_messages[user_message_index]["content"].append({
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:{mime_type};base64,{base64_image}"
+                        }
+                    })
+                except Exception as e:
+                    logging.error(f"Error processing image attachment: {e}", exc_info=True)
+                    if 'content' in attachment:
+                        prepared_messages[user_message_index]["content"].append({
+                            "type": "text",
+                            "text": f"[Image could not be processed: {attachment.get('path', 'unknown')}]"
+                        })
+            # Handle PDFs using the file API
+            elif mime_type == 'application/pdf':
+                try:
+                    file_id = self._upload_file_to_openai(attachment)
+                    prepared_messages[user_message_index]["content"].append({
+                        "type": "file",
+                        "file": {"file_id": file_id}
+                    })
+                except Exception as e:
+                    logging.error(f"Error uploading PDF to OpenAI: {e}", exc_info=True)
+                    if 'content' in attachment:
+                        prepared_messages[user_message_index]["content"].append({
+                            "type": "text",
+                            "text": f"File content:\n\n{attachment['content']}"
+                        })
+
+        return prepared_messages
+
+    def _get_base64_image(self, attachment):
+        """
+        Convert an image file to base64 encoding.
+
+        Args:
+            attachment (dict): Attachment dictionary with path and metadata.
+
+        Returns:
+            str: Base64-encoded image data.
+        """
+        file_path = attachment.get('path')
+        if not file_path:
+            raise ValueError("No file path provided in attachment")
+
+        try:
+            with self.storage.get_file(file_path) as image_file:
+                return base64.b64encode(image_file.read()).decode('utf-8')
+        except FileNotFoundError:
+            raise FileNotFoundError(f"File not found: {file_path}")
+
+    def _upload_file_to_openai(self, attachment):
+        """
+        Upload a file to OpenAI and return the file_id.
+
+        Args:
+            attachment (dict): Attachment dictionary with path and metadata.
+                Expected keys:
+                - path: Path to the file
+                - id: Optional MongoDB ID for caching
+
+        Returns:
+            str: OpenAI file_id for the uploaded file.
+        """
+        import logging
+
+        if 'openai_file_id' in attachment:
+            return attachment['openai_file_id']
+
+        file_path = attachment.get('path')
+
+        if not self.storage.file_exists(file_path):
+            raise FileNotFoundError(f"File not found: {file_path}")
+
+        try:
+            file_id = self.storage.process_file(
+                file_path,
+                lambda local_path, **kwargs: self.client.files.create(
+                    file=open(local_path, 'rb'),
+                    purpose="assistants"
+                ).id
+            )
+
+            from application.core.mongo_db import MongoDB
+            mongo = MongoDB.get_client()
+            db = mongo[settings.MONGO_DB_NAME]
+            attachments_collection = db["attachments"]
+            if '_id' in attachment:
+                attachments_collection.update_one(
+                    {"_id": attachment['_id']},
+                    {"$set": {"openai_file_id": file_id}}
+                )
+
+            return file_id
+        except Exception as e:
+            logging.error(f"Error uploading file to OpenAI: {e}", exc_info=True)
+            raise
+
+
+class AzureOpenAILLM(OpenAILLM):
+
+    def __init__(
+        self, api_key, user_api_key, *args, **kwargs
+    ):
+
+        super().__init__(api_key)
+        self.api_base = (settings.OPENAI_API_BASE,)
+        self.api_version = (settings.OPENAI_API_VERSION,)
+        self.deployment_name = (settings.AZURE_DEPLOYMENT_NAME,)
+        from openai import AzureOpenAI
+
+        self.client = AzureOpenAI(
+            api_key=api_key,
+            api_version=settings.OPENAI_API_VERSION,
+            azure_endpoint=settings.OPENAI_API_BASE
+        )
--- a/application/llm/premai.py
+++ b/application/llm/premai.py
@@ -0,0 +1,38 @@
+from application.llm.base import BaseLLM
+from application.core.settings import settings
+
+
+class PremAILLM(BaseLLM):
+
+    def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
+        from premai import Prem
+
+        super().__init__(*args, **kwargs)
+        self.client = Prem(api_key=api_key)
+        self.api_key = api_key
+        self.user_api_key = user_api_key
+        self.project_id = settings.PREMAI_PROJECT_ID
+
+    def _raw_gen(self, baseself, model, messages, stream=False, **kwargs):
+        response = self.client.chat.completions.create(
+            model=model,
+            project_id=self.project_id,
+            messages=messages,
+            stream=stream,
+            **kwargs
+        )
+
+        return response.choices[0].message["content"]
+
+    def _raw_gen_stream(self, baseself, model, messages, stream=True, **kwargs):
+        response = self.client.chat.completions.create(
+            model=model,
+            project_id=self.project_id,
+            messages=messages,
+            stream=stream,
+            **kwargs
+        )
+
+        for line in response:
+            if line.choices[0].delta["content"] is not None:
+                yield line.choices[0].delta["content"]
--- a/application/llm/sagemaker.py
+++ b/application/llm/sagemaker.py
@@ -0,0 +1,140 @@
+from application.llm.base import BaseLLM
+from application.core.settings import settings
+import json
+import io
+
+
+class LineIterator:
+    """
+    A helper class for parsing the byte stream input.
+
+    The output of the model will be in the following format:
+    ```
+    b'{"outputs": [" a"]}\n'
+    b'{"outputs": [" challenging"]}\n'
+    b'{"outputs": [" problem"]}\n'
+    ...
+    ```
+
+    While usually each PayloadPart event from the event stream will contain a byte array
+    with a full json, this is not guaranteed and some of the json objects may be split across
+    PayloadPart events. For example:
+    ```
+    {'PayloadPart': {'Bytes': b'{"outputs": '}}
+    {'PayloadPart': {'Bytes': b'[" problem"]}\n'}}
+    ```
+
+    This class accounts for this by concatenating bytes written via the 'write' function
+    and then exposing a method which will return lines (ending with a '\n' character) within
+    the buffer via the 'scan_lines' function. It maintains the position of the last read
+    position to ensure that previous bytes are not exposed again.
+    """
+
+    def __init__(self, stream):
+        self.byte_iterator = iter(stream)
+        self.buffer = io.BytesIO()
+        self.read_pos = 0
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        while True:
+            self.buffer.seek(self.read_pos)
+            line = self.buffer.readline()
+            if line and line[-1] == ord("\n"):
+                self.read_pos += len(line)
+                return line[:-1]
+            try:
+                chunk = next(self.byte_iterator)
+            except StopIteration:
+                if self.read_pos < self.buffer.getbuffer().nbytes:
+                    continue
+                raise
+            if "PayloadPart" not in chunk:
+                print("Unknown event type:" + chunk)
+                continue
+            self.buffer.seek(0, io.SEEK_END)
+            self.buffer.write(chunk["PayloadPart"]["Bytes"])
+
+
+class SagemakerAPILLM(BaseLLM):
+
+    def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
+        import boto3
+
+        runtime = boto3.client(
+            "runtime.sagemaker",
+            aws_access_key_id="xxx",
+            aws_secret_access_key="xxx",
+            region_name="us-west-2",
+        )
+
+        super().__init__(*args, **kwargs)
+        self.api_key = api_key
+        self.user_api_key = user_api_key
+        self.endpoint = settings.SAGEMAKER_ENDPOINT
+        self.runtime = runtime
+
+    def _raw_gen(self, baseself, model, messages, stream=False, tools=None, **kwargs):
+        context = messages[0]["content"]
+        user_question = messages[-1]["content"]
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+
+        # Construct payload for endpoint
+        payload = {
+            "inputs": prompt,
+            "stream": False,
+            "parameters": {
+                "do_sample": True,
+                "temperature": 0.1,
+                "max_new_tokens": 30,
+                "repetition_penalty": 1.03,
+                "stop": ["</s>", "###"],
+            },
+        }
+        body_bytes = json.dumps(payload).encode("utf-8")
+
+        # Invoke the endpoint
+        response = self.runtime.invoke_endpoint(
+            EndpointName=self.endpoint, ContentType="application/json", Body=body_bytes
+        )
+        result = json.loads(response["Body"].read().decode())
+        import sys
+
+        print(result[0]["generated_text"], file=sys.stderr)
+        return result[0]["generated_text"][len(prompt) :]
+
+    def _raw_gen_stream(self, baseself, model, messages, stream=True, tools=None, **kwargs):
+        context = messages[0]["content"]
+        user_question = messages[-1]["content"]
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+
+        # Construct payload for endpoint
+        payload = {
+            "inputs": prompt,
+            "stream": True,
+            "parameters": {
+                "do_sample": True,
+                "temperature": 0.1,
+                "max_new_tokens": 512,
+                "repetition_penalty": 1.03,
+                "stop": ["</s>", "###"],
+            },
+        }
+        body_bytes = json.dumps(payload).encode("utf-8")
+
+        # Invoke the endpoint
+        response = self.runtime.invoke_endpoint_with_response_stream(
+            EndpointName=self.endpoint, ContentType="application/json", Body=body_bytes
+        )
+        # result = json.loads(response['Body'].read().decode())
+        event_stream = response["Body"]
+        start_json = b"{"
+        for line in LineIterator(event_stream):
+            if line != b"" and start_json in line:
+                # print(line)
+                data = json.loads(line[line.find(start_json) :].decode("utf-8"))
+                if data["token"]["text"] not in ["</s>", "###"]:
+                    print(data["token"]["text"], end="")
+                    yield data["token"]["text"]
--- a/application/logging.py
+++ b/application/logging.py
@@ -0,0 +1,154 @@
+import datetime
+import functools
+import inspect
+
+import logging
+import uuid
+from typing import Any, Callable, Dict, Generator, List
+
+from application.core.mongo_db import MongoDB
+from application.core.settings import settings
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+
+class LogContext:
+    def __init__(self, endpoint, activity_id, user, api_key, query):
+        self.endpoint = endpoint
+        self.activity_id = activity_id
+        self.user = user
+        self.api_key = api_key
+        self.query = query
+        self.stacks = []
+
+
+def build_stack_data(
+    obj: Any,
+    include_attributes: List[str] = None,
+    exclude_attributes: List[str] = None,
+    custom_data: Dict = None,
+) -> Dict:
+    if obj is None:
+        raise ValueError("The 'obj' parameter cannot be None")
+    data = {}
+    if include_attributes is None:
+        include_attributes = []
+        for name, value in inspect.getmembers(obj):
+            if (
+                not name.startswith("_")
+                and not inspect.ismethod(value)
+                and not inspect.isfunction(value)
+            ):
+                include_attributes.append(name)
+    for attr_name in include_attributes:
+        if exclude_attributes and attr_name in exclude_attributes:
+            continue
+        try:
+            attr_value = getattr(obj, attr_name)
+            if attr_value is not None:
+                if isinstance(attr_value, (int, float, str, bool)):
+                    data[attr_name] = attr_value
+                elif isinstance(attr_value, list):
+                    if all(isinstance(item, dict) for item in attr_value):
+                        data[attr_name] = attr_value
+                    elif all(hasattr(item, "__dict__") for item in attr_value):
+                        data[attr_name] = [item.__dict__ for item in attr_value]
+                    else:
+                        data[attr_name] = [str(item) for item in attr_value]
+                elif isinstance(attr_value, dict):
+                    data[attr_name] = {k: str(v) for k, v in attr_value.items()}
+        except AttributeError as e:
+            logging.warning(f"AttributeError while accessing {attr_name}: {e}")
+        except AttributeError:
+            pass
+    if custom_data:
+        data.update(custom_data)
+    return data
+
+
+def log_activity() -> Callable:
+    def decorator(func: Callable) -> Callable:
+        @functools.wraps(func)
+        def wrapper(*args: Any, **kwargs: Any) -> Any:
+            activity_id = str(uuid.uuid4())
+            data = build_stack_data(args[0])
+            endpoint = data.get("endpoint", "")
+            user = data.get("user", "local")
+            api_key = data.get("user_api_key", "")
+            query = kwargs.get("query", getattr(args[0], "query", ""))
+
+            context = LogContext(endpoint, activity_id, user, api_key, query)
+            kwargs["log_context"] = context
+
+            logging.info(
+                f"Starting activity: {endpoint} - {activity_id} - User: {user}"
+            )
+
+            generator = func(*args, **kwargs)
+            yield from _consume_and_log(generator, context)
+
+        return wrapper
+
+    return decorator
+
+
+def _consume_and_log(generator: Generator, context: "LogContext"):
+    try:
+        for item in generator:
+            yield item
+    except Exception as e:
+        logging.exception(f"Error in {context.endpoint} - {context.activity_id}: {e}")
+        context.stacks.append({"component": "error", "data": {"message": str(e)}})
+        _log_to_mongodb(
+            endpoint=context.endpoint,
+            activity_id=context.activity_id,
+            user=context.user,
+            api_key=context.api_key,
+            query=context.query,
+            stacks=context.stacks,
+            level="error",
+        )
+        raise
+    finally:
+        _log_to_mongodb(
+            endpoint=context.endpoint,
+            activity_id=context.activity_id,
+            user=context.user,
+            api_key=context.api_key,
+            query=context.query,
+            stacks=context.stacks,
+            level="info",
+        )
+
+
+def _log_to_mongodb(
+    endpoint: str,
+    activity_id: str,
+    user: str,
+    api_key: str,
+    query: str,
+    stacks: List[Dict],
+    level: str,
+) -> None:
+    try:
+        mongo = MongoDB.get_client()
+        db = mongo[settings.MONGO_DB_NAME]
+        user_logs_collection = db["stack_logs"]
+
+        log_entry = {
+            "endpoint": endpoint,
+            "id": activity_id,
+            "level": level,
+            "user": user,
+            "api_key": api_key,
+            "query": query,
+            "stacks": stacks,
+            "timestamp": datetime.datetime.now(datetime.timezone.utc),
+        }
+        user_logs_collection.insert_one(log_entry)
+        logging.debug(f"Logged activity to MongoDB: {activity_id}")
+
+    except Exception as e:
+        logging.error(f"Failed to log to MongoDB: {e}", exc_info=True)
--- a/application/parser/init.py
+++ b/application/parser/init.py
@@ -0,0 +1 @@
+
--- a/application/parser/chunking.py
+++ b/application/parser/chunking.py
@@ -0,0 +1,118 @@
+import re
+from typing import List, Tuple
+import logging
+from application.parser.schema.base import Document
+from application.utils import get_encoding
+
+logger = logging.getLogger(__name__)
+
+class Chunker:
+    def __init__(
+        self,
+        chunking_strategy: str = "classic_chunk",
+        max_tokens: int = 2000,
+        min_tokens: int = 150,
+        duplicate_headers: bool = False,
+    ):
+        if chunking_strategy not in ["classic_chunk"]:
+            raise ValueError(f"Unsupported chunking strategy: {chunking_strategy}")
+        self.chunking_strategy = chunking_strategy
+        self.max_tokens = max_tokens
+        self.min_tokens = min_tokens
+        self.duplicate_headers = duplicate_headers
+        self.encoding = get_encoding()
+
+    def separate_header_and_body(self, text: str) -> Tuple[str, str]:
+        header_pattern = r"^(.*?\n){3}"
+        match = re.match(header_pattern, text)
+        if match:
+            header = match.group(0)
+            body = text[len(header):]
+        else:
+            header, body = "", text  # No header, treat entire text as body
+        return header, body
+
+    def combine_documents(self, doc: Document, next_doc: Document) -> Document:
+        combined_text = doc.text + " " + next_doc.text
+        combined_token_count = len(self.encoding.encode(combined_text))
+        new_doc = Document(
+            text=combined_text,
+            doc_id=doc.doc_id,
+            embedding=doc.embedding,
+            extra_info={**(doc.extra_info or {}), "token_count": combined_token_count}
+        )
+        return new_doc
+    
+    def split_document(self, doc: Document) -> List[Document]:
+        split_docs = []
+        header, body = self.separate_header_and_body(doc.text)
+        header_tokens = self.encoding.encode(header) if header else []
+        body_tokens = self.encoding.encode(body)
+
+        current_position = 0
+        part_index = 0
+        while current_position < len(body_tokens):
+            end_position = current_position + self.max_tokens - len(header_tokens)
+            chunk_tokens = (header_tokens + body_tokens[current_position:end_position]
+                            if self.duplicate_headers or part_index == 0 else body_tokens[current_position:end_position])
+            chunk_text = self.encoding.decode(chunk_tokens)
+            new_doc = Document(
+                text=chunk_text,
+                doc_id=f"{doc.doc_id}-{part_index}",
+                embedding=doc.embedding,
+                extra_info={**(doc.extra_info or {}), "token_count": len(chunk_tokens)}
+            )
+            split_docs.append(new_doc)
+            current_position = end_position
+            part_index += 1
+            header_tokens = []
+        return split_docs
+
+    def classic_chunk(self, documents: List[Document]) -> List[Document]:
+        processed_docs = []
+        i = 0
+        while i < len(documents):
+            doc = documents[i]
+            tokens = self.encoding.encode(doc.text)
+            token_count = len(tokens)
+
+            if self.min_tokens <= token_count <= self.max_tokens:
+                doc.extra_info = doc.extra_info or {}
+                doc.extra_info["token_count"] = token_count
+                processed_docs.append(doc)
+                i += 1
+            elif token_count < self.min_tokens:
+                if i + 1 < len(documents):
+                    next_doc = documents[i + 1]
+                    next_tokens = self.encoding.encode(next_doc.text)
+                    if token_count + len(next_tokens) <= self.max_tokens:
+                        # Combine small documents
+                        combined_doc = self.combine_documents(doc, next_doc)
+                        processed_docs.append(combined_doc)
+                        i += 2
+                    else:
+                        # Keep the small document as is if adding next_doc would exceed max_tokens
+                        doc.extra_info = doc.extra_info or {}
+                        doc.extra_info["token_count"] = token_count
+                        processed_docs.append(doc)
+                        i += 1
+                else:
+                    # No next document to combine with; add the small document as is
+                    doc.extra_info = doc.extra_info or {}
+                    doc.extra_info["token_count"] = token_count
+                    processed_docs.append(doc)
+                    i += 1
+            else:
+                # Split large documents
+                processed_docs.extend(self.split_document(doc))
+                i += 1
+        return processed_docs
+
+    def chunk(
+        self,
+        documents: List[Document]
+    ) -> List[Document]:
+        if self.chunking_strategy == "classic_chunk":
+            return self.classic_chunk(documents)
+        else:
+            raise ValueError("Unsupported chunking strategy")
--- a/application/parser/embedding_pipeline.py
+++ b/application/parser/embedding_pipeline.py
@@ -0,0 +1,86 @@
+import os
+import logging
+from retry import retry
+from tqdm import tqdm
+from application.core.settings import settings
+from application.vectorstore.vector_creator import VectorCreator
+
+
+@retry(tries=10, delay=60)
+def add_text_to_store_with_retry(store, doc, source_id):
+    """
+    Add a document's text and metadata to the vector store with retry logic.
+    Args:
+        store: The vector store object.
+        doc: The document to be added.
+        source_id: Unique identifier for the source.
+    """
+    try:
+        doc.metadata["source_id"] = str(source_id)
+        store.add_texts([doc.page_content], metadatas=[doc.metadata])
+    except Exception as e:
+        logging.error(f"Failed to add document with retry: {e}", exc_info=True)
+        raise
+
+
+def embed_and_store_documents(docs, folder_name, source_id, task_status):
+    """
+    Embeds documents and stores them in a vector store.
+
+    Args:
+        docs (list): List of documents to be embedded and stored.
+        folder_name (str): Directory to save the vector store.
+        source_id (str): Unique identifier for the source.
+        task_status: Task state manager for progress updates.
+
+    Returns:
+        None
+    """
+    # Ensure the folder exists
+    if not os.path.exists(folder_name):
+        os.makedirs(folder_name)
+
+    # Initialize vector store
+    if settings.VECTOR_STORE == "faiss":
+        docs_init = [docs.pop(0)]
+        store = VectorCreator.create_vectorstore(
+            settings.VECTOR_STORE,
+            docs_init=docs_init,
+            source_id=folder_name,
+            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
+        )
+    else:
+        store = VectorCreator.create_vectorstore(
+            settings.VECTOR_STORE,
+            source_id=source_id,
+            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
+        )
+        store.delete_index()
+
+    total_docs = len(docs)
+
+    # Process and embed documents
+    for idx, doc in tqdm(
+        enumerate(docs),
+        desc="Embedding 🦖",
+        unit="docs",
+        total=total_docs,
+        bar_format="{l_bar}{bar}| Time Left: {remaining}",
+    ):
+        try:
+            # Update task status for progress tracking
+            progress = int(((idx + 1) / total_docs) * 100)
+            task_status.update_state(state="PROGRESS", meta={"current": progress})
+
+            # Add document to vector store
+            add_text_to_store_with_retry(store, doc, source_id)
+        except Exception as e:
+            logging.error(f"Error embedding document {idx}: {e}", exc_info=True)
+            logging.info(f"Saving progress at document {idx} out of {total_docs}")
+            store.save_local(folder_name)
+            break
+
+    # Save the vector store
+    if settings.VECTOR_STORE == "faiss":
+        store.save_local(folder_name)
+    logging.info("Vector store saved successfully.")
--- a/application/parser/file/init.py
+++ b/application/parser/file/init.py
@@ -0,0 +1 @@
+
--- a/application/parser/file/base.py
+++ b/application/parser/file/base.py
@@ -0,0 +1,19 @@
+"""Base reader class."""
+from abc import abstractmethod
+from typing import Any, List
+
+from langchain.docstore.document import Document as LCDocument
+from application.parser.schema.base import Document
+
+
+class BaseReader:
+    """Utilities for loading data from a directory."""
+
+    @abstractmethod
+    def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
+        """Load data from the input directory."""
+
+    def load_langchain_documents(self, **load_kwargs: Any) -> List[LCDocument]:
+        """Load data in LangChain document format."""
+        docs = self.load_data(**load_kwargs)
+        return [d.to_langchain_format() for d in docs]
--- a/application/parser/file/base_parser.py
+++ b/application/parser/file/base_parser.py
@@ -0,0 +1,38 @@
+"""Base parser and config class."""
+
+from abc import abstractmethod
+from pathlib import Path
+from typing import Dict, List, Optional, Union
+
+
+class BaseParser:
+    """Base class for all parsers."""
+
+    def __init__(self, parser_config: Optional[Dict] = None):
+        """Init params."""
+        self._parser_config = parser_config
+
+    def init_parser(self) -> None:
+        """Init parser and store it."""
+        parser_config = self._init_parser()
+        self._parser_config = parser_config
+
+    @property
+    def parser_config_set(self) -> bool:
+        """Check if parser config is set."""
+        return self._parser_config is not None
+
+    @property
+    def parser_config(self) -> Dict:
+        """Check if parser config is set."""
+        if self._parser_config is None:
+            raise ValueError("Parser config not set.")
+        return self._parser_config
+
+    @abstractmethod
+    def _init_parser(self) -> Dict:
+        """Initialize the parser with the config."""
+
+    @abstractmethod
+    def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
+        """Parse file."""
--- a/application/parser/file/bulk.py
+++ b/application/parser/file/bulk.py
@@ -0,0 +1,183 @@
+"""Simple reader that reads files of different formats from a directory."""
+import logging
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, Union
+
+from application.parser.file.base import BaseReader
+from application.parser.file.base_parser import BaseParser
+from application.parser.file.docs_parser import DocxParser, PDFParser
+from application.parser.file.epub_parser import EpubParser
+from application.parser.file.html_parser import HTMLParser
+from application.parser.file.markdown_parser import MarkdownParser
+from application.parser.file.rst_parser import RstParser
+from application.parser.file.tabular_parser import PandasCSVParser,ExcelParser
+from application.parser.file.json_parser import JSONParser
+from application.parser.file.pptx_parser import PPTXParser
+from application.parser.file.image_parser import ImageParser
+from application.parser.schema.base import Document
+
+DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
+    ".pdf": PDFParser(),
+    ".docx": DocxParser(),
+    ".csv": PandasCSVParser(),
+    ".xlsx":ExcelParser(),
+    ".epub": EpubParser(),
+    ".md": MarkdownParser(),
+    ".rst": RstParser(),
+    ".html": HTMLParser(),
+    ".mdx": MarkdownParser(),
+    ".json":JSONParser(),
+    ".pptx":PPTXParser(),
+    ".png": ImageParser(),
+    ".jpg": ImageParser(),
+    ".jpeg": ImageParser(),
+}
+
+
+class SimpleDirectoryReader(BaseReader):
+    """Simple directory reader.
+
+    Can read files into separate documents, or concatenates
+    files into one document text.
+
+    Args:
+        input_dir (str): Path to the directory.
+        input_files (List): List of file paths to read (Optional; overrides input_dir)
+        exclude_hidden (bool): Whether to exclude hidden files (dotfiles).
+        errors (str): how encoding and decoding errors are to be handled,
+              see https://docs.python.org/3/library/functions.html#open
+        recursive (bool): Whether to recursively search in subdirectories.
+            False by default.
+        required_exts (Optional[List[str]]): List of required extensions.
+            Default is None.
+        file_extractor (Optional[Dict[str, BaseParser]]): A mapping of file
+            extension to a BaseParser class that specifies how to convert that file
+            to text. See DEFAULT_FILE_EXTRACTOR.
+        num_files_limit (Optional[int]): Maximum number of files to read.
+            Default is None.
+        file_metadata (Optional[Callable[str, Dict]]): A function that takes
+            in a filename and returns a Dict of metadata for the Document.
+            Default is None.
+    """
+
+    def __init__(
+            self,
+            input_dir: Optional[str] = None,
+            input_files: Optional[List] = None,
+            exclude_hidden: bool = True,
+            errors: str = "ignore",
+            recursive: bool = True,
+            required_exts: Optional[List[str]] = None,
+            file_extractor: Optional[Dict[str, BaseParser]] = None,
+            num_files_limit: Optional[int] = None,
+            file_metadata: Optional[Callable[[str], Dict]] = None,
+    ) -> None:
+        """Initialize with parameters."""
+        super().__init__()
+
+        if not input_dir and not input_files:
+            raise ValueError("Must provide either `input_dir` or `input_files`.")
+
+        self.errors = errors
+
+        self.recursive = recursive
+        self.exclude_hidden = exclude_hidden
+        self.required_exts = required_exts
+        self.num_files_limit = num_files_limit
+
+        if input_files:
+            self.input_files = []
+            for path in input_files:
+                print(path)
+                input_file = Path(path)
+                self.input_files.append(input_file)
+        elif input_dir:
+            self.input_dir = Path(input_dir)
+            self.input_files = self._add_files(self.input_dir)
+
+        self.file_extractor = file_extractor or DEFAULT_FILE_EXTRACTOR
+        self.file_metadata = file_metadata
+
+    def _add_files(self, input_dir: Path) -> List[Path]:
+        """Add files."""
+        input_files = sorted(input_dir.iterdir())
+        new_input_files = []
+        dirs_to_explore = []
+        for input_file in input_files:
+            if input_file.is_dir():
+                if self.recursive:
+                    dirs_to_explore.append(input_file)
+            elif self.exclude_hidden and input_file.name.startswith("."):
+                continue
+            elif (
+                    self.required_exts is not None
+                    and input_file.suffix not in self.required_exts
+            ):
+                continue
+            else:
+                new_input_files.append(input_file)
+
+        for dir_to_explore in dirs_to_explore:
+            sub_input_files = self._add_files(dir_to_explore)
+            new_input_files.extend(sub_input_files)
+
+        if self.num_files_limit is not None and self.num_files_limit > 0:
+            new_input_files = new_input_files[0: self.num_files_limit]
+
+        # print total number of files added
+        logging.debug(
+            f"> [SimpleDirectoryReader] Total files added: {len(new_input_files)}"
+        )
+
+        return new_input_files
+
+    def load_data(self, concatenate: bool = False) -> List[Document]:
+        """Load data from the input directory.
+
+        Args:
+            concatenate (bool): whether to concatenate all files into one document.
+                If set to True, file metadata is ignored.
+                False by default.
+
+        Returns:
+            List[Document]: A list of documents.
+
+        """
+        data: Union[str, List[str]] = ""
+        data_list: List[str] = []
+        metadata_list = []
+        for input_file in self.input_files:
+            if input_file.suffix in self.file_extractor:
+                parser = self.file_extractor[input_file.suffix]
+                if not parser.parser_config_set:
+                    parser.init_parser()
+                data = parser.parse_file(input_file, errors=self.errors)
+            else:
+                # do standard read
+                with open(input_file, "r", errors=self.errors) as f:
+                    data = f.read()
+            # Prepare metadata for this file
+            if self.file_metadata is not None:
+                file_metadata = self.file_metadata(input_file.name)
+            else:
+                # Provide a default empty metadata
+                file_metadata = {'title': '', 'store': ''}
+                # TODO: Find a case with no metadata and check if breaks anything 
+
+            if isinstance(data, List):
+                # Extend data_list with each item in the data list
+                data_list.extend([str(d) for d in data])
+                # For each item in the data list, add the file's metadata to metadata_list
+                metadata_list.extend([file_metadata for _ in data])
+            else:
+                # Add the single piece of data to data_list
+                data_list.append(str(data))
+                # Add the file's metadata to metadata_list
+                metadata_list.append(file_metadata)
+
+        if concatenate:
+            return [Document("\n".join(data_list))]
+        elif self.file_metadata is not None:
+            return [Document(d, extra_info=m) for d, m in zip(data_list, metadata_list)]
+        else:
+            return [Document(d) for d in data_list]
--- a/application/parser/file/docs_parser.py
+++ b/application/parser/file/docs_parser.py
@@ -0,0 +1,70 @@
+"""Docs parser.
+
+Contains parsers for docx, pdf files.
+
+"""
+from pathlib import Path
+from typing import Dict
+
+from application.parser.file.base_parser import BaseParser
+from application.core.settings import settings
+import requests
+
+class PDFParser(BaseParser):
+    """PDF parser."""
+
+    def _init_parser(self) -> Dict:
+        """Init parser."""
+        return {}
+
+    def parse_file(self, file: Path, errors: str = "ignore") -> str:
+        """Parse file."""
+        if settings.PARSE_PDF_AS_IMAGE:
+            doc2md_service = "https://llm.arc53.com/doc2md"
+            # alternatively you can use local vision capable LLM
+            with open(file, "rb") as file_loaded:
+                files = {'file': file_loaded}
+                response = requests.post(doc2md_service, files=files)
+                data = response.json()["markdown"]
+            return data
+
+        try:
+            from pypdf import PdfReader
+        except ImportError:
+            raise ValueError("pypdf is required to read PDF files.")
+        text_list = []
+        with open(file, "rb") as fp:
+            # Create a PDF object
+            pdf = PdfReader(fp)
+
+            # Get the number of pages in the PDF document
+            num_pages = len(pdf.pages)
+
+            # Iterate over every page
+            for page_index in range(num_pages):
+                # Extract the text from the page
+                page = pdf.pages[page_index]
+                page_text = page.extract_text()
+                text_list.append(page_text)
+        text = "\n".join(text_list)
+
+        return text
+
+
+class DocxParser(BaseParser):
+    """Docx parser."""
+
+    def _init_parser(self) -> Dict:
+        """Init parser."""
+        return {}
+
+    def parse_file(self, file: Path, errors: str = "ignore") -> str:
+        """Parse file."""
+        try:
+            import docx2txt
+        except ImportError:
+            raise ValueError("docx2txt is required to read Microsoft Word files.")
+
+        text = docx2txt.process(file)
+
+        return text
--- a/application/parser/file/epub_parser.py
+++ b/application/parser/file/epub_parser.py
@@ -0,0 +1,43 @@
+"""Epub parser.
+
+Contains parsers for epub files.
+"""
+
+from pathlib import Path
+from typing import Dict
+
+from application.parser.file.base_parser import BaseParser
+
+
+class EpubParser(BaseParser):
+    """Epub Parser."""
+
+    def _init_parser(self) -> Dict:
+        """Init parser."""
+        return {}
+
+    def parse_file(self, file: Path, errors: str = "ignore") -> str:
+        """Parse file."""
+        try:
+            import ebooklib
+            from ebooklib import epub
+        except ImportError:
+            raise ValueError("`EbookLib` is required to read Epub files.")
+        try:
+            import html2text
+        except ImportError:
+            raise ValueError("`html2text` is required to parse Epub files.")
+
+        text_list = []
+        book = epub.read_epub(file, options={"ignore_ncx": True})
+
+        # Iterate through all chapters.
+        for item in book.get_items():
+            # Chapters are typically located in epub documents items.
+            if item.get_type() == ebooklib.ITEM_DOCUMENT:
+                text_list.append(
+                    html2text.html2text(item.get_content().decode("utf-8"))
+                )
+
+        text = "\n".join(text_list)
+        return text
--- a/application/parser/file/html_parser.py
+++ b/application/parser/file/html_parser.py
@@ -0,0 +1,24 @@
+"""HTML parser.
+
+Contains parser for html files.
+
+"""
+from pathlib import Path
+from typing import Dict, Union
+
+from application.parser.file.base_parser import BaseParser
+
+
+class HTMLParser(BaseParser):
+    """HTML parser."""
+
+    def _init_parser(self) -> Dict:
+        """Init parser."""
+        return {}
+
+    def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, list[str]]:
+        from langchain_community.document_loaders import BSHTMLLoader
+
+        loader = BSHTMLLoader(file)
+        data = loader.load()        
+        return data
--- a/Show More
+++ b/Show More