Compare commits

..

190 Commits

Author SHA1 Message Date
Alex
5a9d512679 feat: enhance prompt variable handling and add system variable options in prompts modal 2025-11-03 13:48:07 +00:00
Manish Madan
6dd32fd4ca Merge pull request #2111 from arc53/dependabot/npm_and_yarn/frontend/mermaid-11.12.1
chore(deps): bump mermaid from 11.12.0 to 11.12.1 in /frontend
2025-11-03 19:14:00 +05:30
dependabot[bot]
b17b1c70b5 chore(deps): bump mermaid from 11.12.0 to 11.12.1 in /frontend
Bumps [mermaid](https://github.com/mermaid-js/mermaid) from 11.12.0 to 11.12.1.
- [Release notes](https://github.com/mermaid-js/mermaid/releases)
- [Commits](https://github.com/mermaid-js/mermaid/compare/mermaid@11.12.0...mermaid@11.12.1)

---
updated-dependencies:
- dependency-name: mermaid
  dependency-version: 11.12.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-03 19:07:58 +05:30
Manish Madan
3f5b31fb5f Merge pull request #2084 from arc53/dependabot/npm_and_yarn/frontend/typescript-eslint/eslint-plugin-8.46.2
chore(deps-dev): bump @typescript-eslint/eslint-plugin from 5.51.0 to 8.46.2 in /frontend
2025-11-03 18:51:23 +05:30
ManishMadan2882
06bda6bd55 fix: resolve peer dependency conflicts in eslint and typescript-eslint packages
- Update @typescript-eslint/eslint-plugin from ^8.46.2 to ^6.21.0
- Update @typescript-eslint/parser from ^5.62.0 to ^6.21.0
- Update eslint-config-standard-with-typescript from ^34.0.0 to ^43.0.1
- Ensure all dependencies are compatible without requiring --legacy-peer-deps
- Verified with successful npm install and vite build
2025-11-03 18:47:34 +05:30
Christine Belzie
7dd97821a8 feat: Installing vale(redo) (#2104)
* docs: setup Vale

Signed-off-by: Christine Belzie <shecoder30@gmail.com>

* adding more content

* chore: add Vale configuration and custom dictionary

* chore: clean up spelling configuration and remove unused vocabularies

* fix: correct file path format for Vale linter configuration

---------

Signed-off-by: Christine Belzie <shecoder30@gmail.com>
Co-authored-by: Alex <a@tushynski.me>
2025-10-31 18:00:09 +02:00
Harshit Ranjan
695191d888 added error saving vector store (#2081)
* added error saving vector store

* fixed code formating

* added tests for embedding pipeline
2025-10-31 16:29:35 +02:00
dependabot[bot]
1dbcef24c7 chore(deps-dev): bump @typescript-eslint/eslint-plugin in /frontend
Bumps [@typescript-eslint/eslint-plugin](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin) from 5.51.0 to 8.46.2.
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/eslint-plugin/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.46.2/packages/eslint-plugin)

---
updated-dependencies:
- dependency-name: "@typescript-eslint/eslint-plugin"
  dependency-version: 8.46.2
  dependency-type: direct:development
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-31 14:04:19 +00:00
Manish Madan
e086c79da0 Merge pull request #1933 from arc53/dependabot/npm_and_yarn/frontend/npm_and_yarn-d56b2ef021
chore(deps): bump mermaid from 11.6.0 to 11.10.0 in /frontend in the npm_and_yarn group
2025-10-31 19:32:57 +05:30
dependabot[bot]
6ae8d34b27 chore(deps): bump mermaid in /frontend in the npm_and_yarn group
Bumps the npm_and_yarn group in /frontend with 1 update: [mermaid](https://github.com/mermaid-js/mermaid).

Updates `mermaid` from 11.6.0 to 11.10.0
- [Release notes](https://github.com/mermaid-js/mermaid/releases)
- [Commits](https://github.com/mermaid-js/mermaid/compare/mermaid@11.6.0...mermaid@11.10.0)

---
updated-dependencies:
- dependency-name: mermaid
  dependency-version: 11.10.0
  dependency-type: direct:production
  dependency-group: npm_and_yarn
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-31 19:27:31 +05:30
Manish Madan
2e23e547d3 Merge pull request #1916 from arc53/dependabot/npm_and_yarn/frontend/eslint-plugin-prettier-5.5.4
build(deps-dev): bump eslint-plugin-prettier from 5.2.1 to 5.5.4 in /frontend
2025-10-31 19:24:38 +05:30
dependabot[bot]
fa11dc9828 build(deps-dev): bump eslint-plugin-prettier in /frontend
Bumps [eslint-plugin-prettier](https://github.com/prettier/eslint-plugin-prettier) from 5.2.1 to 5.5.4.
- [Release notes](https://github.com/prettier/eslint-plugin-prettier/releases)
- [Changelog](https://github.com/prettier/eslint-plugin-prettier/blob/main/CHANGELOG.md)
- [Commits](https://github.com/prettier/eslint-plugin-prettier/compare/v5.2.1...v5.5.4)

---
updated-dependencies:
- dependency-name: eslint-plugin-prettier
  dependency-version: 5.5.4
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-31 19:18:44 +05:30
Manish Madan
673fa70bc5 Merge pull request #1903 from arc53/dependabot/npm_and_yarn/frontend/multi-6fb5dc7d23
build(deps): bump react-dom and @types/react-dom in /frontend
2025-10-31 19:16:17 +05:30
dependabot[bot]
a0660a54c1 build(deps): bump react-dom and @types/react-dom in /frontend
Bumps [react-dom](https://github.com/facebook/react/tree/HEAD/packages/react-dom) and [@types/react-dom](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/react-dom). These dependencies needed to be updated together.

Updates `react-dom` from 19.1.0 to 19.1.1
- [Release notes](https://github.com/facebook/react/releases)
- [Changelog](https://github.com/facebook/react/blob/main/CHANGELOG.md)
- [Commits](https://github.com/facebook/react/commits/v19.1.1/packages/react-dom)

Updates `@types/react-dom` from 19.1.6 to 19.1.7
- [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases)
- [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/react-dom)

---
updated-dependencies:
- dependency-name: react-dom
  dependency-version: 19.1.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
- dependency-name: "@types/react-dom"
  dependency-version: 19.1.7
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-31 19:10:51 +05:30
Manish Madan
1137bf4280 Merge pull request #1864 from arc53/dependabot/npm_and_yarn/frontend/i18next-browser-languagedetector-8.2.0
build(deps): bump i18next-browser-languagedetector from 8.0.2 to 8.2.0 in /frontend
2025-10-31 19:10:26 +05:30
dependabot[bot]
da41c898d8 build(deps): bump i18next-browser-languagedetector in /frontend
Bumps [i18next-browser-languagedetector](https://github.com/i18next/i18next-browser-languageDetector) from 8.0.2 to 8.2.0.
- [Changelog](https://github.com/i18next/i18next-browser-languageDetector/blob/master/CHANGELOG.md)
- [Commits](https://github.com/i18next/i18next-browser-languageDetector/compare/v8.0.2...v8.2.0)

---
updated-dependencies:
- dependency-name: i18next-browser-languagedetector
  dependency-version: 8.2.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-31 18:59:21 +05:30
Siddhant Rai
21e5c261ef feat: template-based prompt rendering with dynamic namespace injection (#2091)
* feat: template-based prompt rendering with dynamic namespace injection

* refactor: improve template engine initialization with clearer formatting

* refactor: streamline ReActAgent methods and improve content extraction logic

feat: enhance error handling in NamespaceManager and TemplateEngine

fix: update NewAgent component to ensure consistent form data submission

test: modify tests for ReActAgent and prompt renderer to reflect method changes and improve coverage

* feat: tools namespace + three-tier token budget

* refactor: remove unused variable assignment in message building tests

* Enhance prompt customization and tool pre-fetching functionality

* ruff lint fix

* refactor: cleaner error handling and reduce code clutter

---------

Co-authored-by: Alex <a@tushynski.me>
2025-10-31 12:47:44 +00:00
Aqsa Aqeel
a7d61b9d59 feat: implementing the new custom modal design (#2090)
* feat: implementing the new custom modal design

* feat: added tool variable dropdown

* fix: ui fixes and link fixes

* feat: implemented redisgn for edit prompt modal

* (feat:prompts) matching figma

* (fix:prompts) tool vars

* (fix:promptsModal) responsive; disable save on text

---------

Co-authored-by: Aqsa Aqeel <aqsa.aqeel17@example.com>
Co-authored-by: ManishMadan2882 <manishmadan321@gmail.com>
2025-10-31 12:18:13 +02:00
dorkdiaries9
c5fe25c149 Enhance migration script with logging and error handling (#2103)
Added logging for migration steps and error handling.
2025-10-29 01:49:47 +02:00
Manish Madan
6a4cb617f9 Frontend audit: Bug fixes and refinements (#2112)
* (fix:attachements) sep id for redux ops

* (fix:ui) popups, toast, share modal

* (feat:agentsPreview) stable preview, ui fixes

* (fix:ui) light theme icon, sleek scroll

* (chore:i18n) missin keys

* (chore:i18n) missing keys

* (feat:preferrenceSlice) autoclear invalid source from storage

* (fix:general) delete all conv close btn

* (fix:tts) play one at a time

* (fix:tts) gracefully unmount

* (feat:tts) audio LRU cache

* (feat:tts) pointer on hovered area

* (feat:tts) clean text for speach

---------

Co-authored-by: GH Action - Upstream Sync <action@github.com>
2025-10-29 01:47:26 +02:00
Alex
94f70e6de5 refactor(tests): update todo tool tests to use simplified action names and improve key handling 2025-10-28 10:39:35 +00:00
Hanzalah Waheed
ab4ebf9a9d feat: add bg blur for modals (#2110)
* feat: add bg blur for modals

* feat: adjust darkness for lightmode
2025-10-28 12:14:37 +02:00
Nikunj Kohli
9f7945fcf5 [UI/UX] Improve image upload experience — add preview & drag-to-reord… (#2095)
* [UI/UX] Improve image upload experience — add preview & drag-to-reorder in chat section

* chore(chat): remove image previews, keep drag-to-reorder

* chore(chat): prevent attachment drag from triggering upload dropzone

* Revert "chore(chat): prevent attachment drag from triggering upload dropzone"

This reverts commit dd4b96256c.

* (feat:conv) rmv drag-drop on sources

* (feat:msg-input) drop attachments

---------

Co-authored-by: ManishMadan2882 <manishmadan321@gmail.com>
2025-10-27 21:53:18 +02:00
Gayatri K
d8ec3c008c todo tool feature added to tools (#1977)
* todo tool feature added to tools

* removed configs

* fix: require user_id on TodoListTool, normalize timestamps, add tests

* lint and tests fixes

* refactor: support multiple todos per user/tool by indexing with todo_id

* modified todo_id to use auto-increamenting integer instead of UUID

* test-case fixes

* feat: fix todos

---------

Co-authored-by: Alex <a@tushynski.me>
2025-10-27 19:09:32 +02:00
Pavel
2f00691246 Merge pull request #2096 from arc53/hacktoberfest-t-shirt-image
Update HACKTOBERFEST.md with T-shirt image
2025-10-24 13:15:30 +01:00
Pavel
9b2383b074 Update HACKTOBERFEST.md with T-shirt image
Added t-shirt image.
2025-10-24 13:09:23 +01:00
Ritoban Dutta
e4e9910575 fix(ui): use dedicated sidebar open/close icons for better visual feedback of actions (#2088)
* fix(ui): use dedicated icons for sidebar toggle (panel-left-open/close)

* fix: update sidebar toggle icon colors
2025-10-24 00:25:17 +03:00
Nihar
f448e4a615 add configurable provider in settings.py and update ElevenLabs Api (#2065) (#2074) 2025-10-22 19:07:21 +03:00
Manish Madan
c4e8daf50e Frontend audit: refinements (#2083)
* (fix:attachements) sep id for redux ops

* (fix:ui) popups, toast, share modal

* (feat:agentsPreview) stable preview, ui fixes

* (fix:ui) light theme icon, sleek scroll

---------

Co-authored-by: GH Action - Upstream Sync <action@github.com>
2025-10-22 12:12:05 +03:00
Hanzalah Waheed
5aa4ec1b9f fix: cleanup ConversationBubble and fix CopyButton (#2073)
* fix: rm states for hovering. use tailwind classes instead

* fix: use group hover css intead of states

* chore: no point in having separate defaults if cant be customised

* fix: move default bg colors into conditionals
2025-10-18 21:59:15 +03:00
Siddhant Rai
125ce0aad3 test: implement full API test suite with mongomock and centralized fixtures (#2068) 2025-10-17 12:01:14 +03:00
TinTin
ababc9ae04 fix: reduce large margins between list items in answer rendering (#2058) 2025-10-16 13:59:45 +03:00
Alex
62ac90746e fix: improve error handling and loading state in fetchChunks function 2025-10-15 17:33:13 +01:00
Alex
096f6d91a2 fix: handle potential undefined value for selectedDocs in fetchAnswer 2025-10-15 17:12:52 +01:00
jay98
d28ef6b094 Refactor: use async/await in fetchChunks for correct error handling (typescript:S4822) (#2066) 2025-10-15 15:52:55 +03:00
Anurag Yadav
8fb945ab09 feedback button to show after message (#2064) 2025-10-14 18:52:31 +03:00
jay98
835d71727c fix: remove redundant conditional operator for file assignment (#2060) 2025-10-14 17:35:41 +03:00
Ali Arda Fincan
ce32dd2907 Feat: Agent Token or Request Limiting (#2041)
* Update routes.py, added token and request limits to create/update agent operations

* added usage limit check to api endpoints

cannot create agents with usage limit right now that will be implemented

* implemented api limiting as either token limiting or request limiting modes

* minor typo & bug fix
2025-10-13 21:32:46 +03:00
Manish Madan
72bc24a490 Chore: deleted unused files, dead code; minor fixes (#2059)
* (feat:pause-stream) generator exit

* (feat:pause-stream) close request

* (feat:pause-stream) finally close; google anthropic

* (feat:task_status)communicate failure

* (clean:connector) unused routes

* (feat:file-table) missing skeletons

* (chore:fe) purge unused

* (fix:apiKeys) build err

* (chore:settings) clean unused

* merge from main

* (chore:purge) unused fe assets

* (clean:check_docs) unused logic

* (feat:selectedDocs) replace null type

---------

Co-authored-by: GH Action - Upstream Sync <action@github.com>
2025-10-13 19:11:24 +03:00
Siddhant Rai
d6c49bdbf0 test: add agent test coverage and standardize test suite (#2051)
- Add 104 comprehensive tests for agent system
- Integrate agent tests into CI/CD pipeline
- Standardize tests with @pytest.mark.unit markers
- Fix cross-platform path compatibility
- Clean up unused imports and dependencies
2025-10-13 14:43:35 +03:00
beKool.sh
1805292528 Update README.md (#2057) 2025-10-13 13:00:13 +03:00
Nirjas Jakilim
d09ce7e1f7 fixed broken link (#2054) 2025-10-12 15:26:05 +03:00
Marco Ponce
a8d2024791 Windows deployment powershell and renamed LLM_PROVIDER and runtime (#2050)
* Windows deployment powershell and  renamed LLM_PROVIDER and runtime

* added LLM_NAME back

* revert changes on docker-compose-hub.yaml
2025-10-12 15:25:42 +03:00
Manish Madan
f0b954dbfb Upload: communicate failure, minor frontend updates (#2048)
* (feat:pause-stream) generator exit

* (feat:pause-stream) close request

* (feat:pause-stream) finally close; google anthropic

* (feat:task_status)communicate failure

* (clean:connector) unused routes

* (feat:file-table) missing skeletons

* (fix:apiKeys) build err

---------

Co-authored-by: GH Action - Upstream Sync <action@github.com>
2025-10-10 17:34:02 +03:00
Rahul
50bee7c2b0 feat: Add button to cancel LLM response (#1978)
* feat: Add button to cancel LLM response
- Replace text area with cancel button when loading.
- Add useEffect to change elipsis in cancel button text.
- Add new SVG icon for cancel response.
- Button colors match Figma designs.

* fix: Cancel button UI matches new design
- Delete cancel-response svg.
- Change previous cancel button to match the new Figma design.
- Remove console log in handleCancel function.

* fix: Adjust cancel button rounding

* feat: Update UI for send button
- Add SendArrowIcon component, enables dynamic svg color changes
- Replace original icon
- Update colors and hover effects

* (fix:send-button) minor blink in transition

---------

Co-authored-by: Manish Madan <manishmadan321@gmail.com>
2025-10-09 12:01:25 +03:00
Mariam Saeed
e7b15b316e Feat: Notification section (#2033)
* Feature/Notification-section

* fix notification ui and add local storage variable to save the state

* add notification component to app.tsx
2025-10-09 01:26:10 +03:00
Manish Madan
a4507008c1 complete_stream: Stop response streaming (#2031)
* (feat:pause-stream) generator exit

* (feat:pause-stream) close request

* (feat:pause-stream) finally close; google anthropic

---------

Co-authored-by: GH Action - Upstream Sync <action@github.com>
2025-10-08 20:37:30 +03:00
Hanzalah Waheed
c5ba85f929 fix(ui): create a var to check for shared metadata obj (#2040) 2025-10-08 18:18:54 +03:00
Manish Madan
2e636bd67e Merge pull request #1970 from arc53/dependabot/npm_and_yarn/frontend/mermaid-11.12.0
chore(deps): bump mermaid from 11.6.0 to 11.12.0 in /frontend
2025-10-08 20:42:32 +05:30
dependabot[bot]
4a039f1abf chore(deps): bump mermaid from 11.6.0 to 11.12.0 in /frontend
Bumps [mermaid](https://github.com/mermaid-js/mermaid) from 11.6.0 to 11.12.0.
- [Release notes](https://github.com/mermaid-js/mermaid/releases)
- [Commits](https://github.com/mermaid-js/mermaid/compare/mermaid@11.6.0...mermaid@11.12.0)

---
updated-dependencies:
- dependency-name: mermaid
  dependency-version: 11.12.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-08 15:06:36 +00:00
JeevaRamanathan
434d8e2070 fix spinner to match theme (dark/light) in conversation (#2044) 2025-10-08 15:16:44 +03:00
Nihar
160ad2dc79 correct path for storing embeddings model (#2035) 2025-10-07 23:01:39 +03:00
Anshuman Payasi
0ec86c2c71 fix: adjust share modal size and spacing (#2027)
* fix/share-modal-spacing

* fix(share-modal): spacing adjusted for mobile view
2025-10-07 17:11:03 +03:00
Alex
03452ffd9f feat: add GitHub access token support and fix file content fetching logic (#2032) 2025-10-07 16:53:14 +03:00
Siddhant Rai
da6317a242 feat: agent templates and seeding premade agents (#1910)
* feat: agent templates and seeding premade agents

* fix: ensure ObjectId is used for source reference in agent configuration

* fix: improve source handling in DatabaseSeeder and update tool config processing

* feat: add prompt handling in DatabaseSeeder for agent configuration

* Docs premade agents

* link to prescraped docs

* feat: add template agent retrieval and adopt agent functionality

* feat: simplify agent descriptions in premade_agents.yaml  added docs

---------

Co-authored-by: Pavel <pabin@yandex.ru>
Co-authored-by: Alex <a@tushynski.me>
2025-10-07 13:00:14 +03:00
Nihar
8b8e616557 fix: handle missing kwargs in local save_file (#2017)
Previously, the local save_file function didn’t accept kwargs, causing
a crash when passed extra params. Added support to maintain consistency
with AWS version.

Fixes #2009
2025-10-06 23:55:49 +03:00
Marco Ponce
d260f1a1a6 Made changes to how the documentation is represented including the new 5th option when forking and launching DocsGPT on the inviduals device, as well as updated README.md which has miswording issues saying only 4 options, but now includes the 5th option, detailing and giving an explanation for what that option does and documentation provided. (#2020) 2025-10-06 23:50:16 +03:00
Alex
9d452e3b04 feat: enhance MemoryTool and NotesTool with tool_id management and directory renaming tests (#2026) 2025-10-06 23:45:47 +03:00
Alex
e012189672 feat: implement MemoryTool with CRUD operations and integrate with MongoDB 2025-10-06 21:09:22 +01:00
Dhairya Parikh
4c31e9a8b1 Add MongoDB-backed NotesTool with CRUD actions and unit tests (#1982)
* Add MongoDB-backed NotesTool with CRUD actions and unit tests

* NotesTool: enforce single note per user, require decoded_token['sub'] user_id, fix tests

* chore: remove accidentally committed results.txt and ignore it

* fix: remove results.txt, enforce single note per user, add tests

* refactor: update NotesTool actions and tests for clarity and consistency

* refactor: update NotesTool docstring for clarity

* refactor: simplify MCPTool docstring and remove redundant import in test_notes_tool

* lint: fix test

* refactor: remove unused import from test_notes_tool.py

---------

Co-authored-by: Alex <a@tushynski.me>
2025-10-06 19:24:03 +03:00
Alex
7cfc230316 Update README.md 2025-10-06 18:30:47 +03:00
Alex
9605e85f1c Merge pull request #2004 from Lanthoiba2022/AgentImageFix1
Fix #1983: Agent image fallback added
2025-10-06 14:47:26 +01:00
Alex
498e2b772c fix: update save_file method to accept additional keyword arguments; enhance AgentImage component with useEffect for dynamic source updates 2025-10-06 14:41:51 +01:00
Manish Madan
dad897da51 Merge pull request #1997 from arc53/dependabot/npm_and_yarn/frontend/i18next-25.5.3
build(deps): bump i18next from 24.2.0 to 25.5.3 in /frontend
2025-10-06 16:44:15 +05:30
dependabot[bot]
02ad5f062e build(deps): bump i18next from 24.2.0 to 25.5.3 in /frontend
Bumps [i18next](https://github.com/i18next/i18next) from 24.2.0 to 25.5.3.
- [Release notes](https://github.com/i18next/i18next/releases)
- [Changelog](https://github.com/i18next/i18next/blob/master/CHANGELOG.md)
- [Commits](https://github.com/i18next/i18next/compare/v24.2.0...v25.5.3)

---
updated-dependencies:
- dependency-name: i18next
  dependency-version: 25.5.3
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-06 11:03:35 +00:00
Manish Madan
4eb9471b4f Merge pull request #2021 from M4cr0Chen/enhance-edit-button-and-response-ui
adjusted edit button padding and response box padding & roundness
2025-10-06 13:13:00 +05:30
Zhenghong Chen
b505d207d7 adjusted edit button padding and response box padding & roundness 2025-10-05 16:34:17 -04:00
Alex
3c954bd07f Merge pull request #2007 from ManishMadan2882/upload-toast
Upload Sources: Tasks are notified with UI toasts
2025-10-05 13:49:11 +01:00
ManishMadan2882
c00b6459dc (fix:upload) set docs 2025-10-05 18:07:21 +05:30
ManishMadan2882
eb4d776784 (feat:upload) wording, icons and rmv clear button 2025-10-05 16:12:58 +05:30
Alex
5d7a890533 Merge pull request #2015 from JeevaRamanathan/fix/twitter-navigation-link
chore: update twitter.com to x.com
2025-10-04 21:50:47 +01:00
JeevaRamanathan
9c6aefef1e chore: update twitter.com to x.com for avoiding redirection
Signed-off-by: JeevaRamanathan <jeevaramanathan.m@infosys.com>
2025-10-05 02:00:57 +05:30
ManishMadan2882
e4554d6c09 Merge branch 'main' of https://github.com/arc53/DocsGPT into upload-toast 2025-10-03 23:26:32 +05:30
ManishMadan2882
c184b63df8 (feat:upload) i18n 2025-10-03 20:32:43 +05:30
ManishMadan2882
6bb4195393 (feat:upload) dismiss, but notify on completion 2025-10-03 19:55:41 +05:30
Alex
7827a4d40d Merge pull request #1960 from jayamrutiya/letmecheck/bug-return-value-not-be-ignored
Fix: replace map with for...of loop to avoid ignoring return value (S2201)
2025-10-03 14:15:39 +01:00
ManishMadan2882
f09fa8231a (feat:upload) new toast UI 2025-10-03 17:08:39 +05:30
Alex
96ff10000d Merge pull request #1999 from hanzalahwaheed/feat/ui-enhancements
Feat: UI enhancements
2025-10-03 10:15:54 +01:00
Alex
9460636867 Merge pull request #2005 from siiddhantt/feat/routes-refactor
refactor: modularize user API routes into domain-driven structure
2025-10-03 10:03:33 +01:00
Siddhant Rai
6c43245295 refactor: organize import statements for clarity and consistency 2025-10-03 12:41:03 +05:30
Pavel
266b6cf638 Fix typo in HACKTOBERFEST.md 2025-10-03 08:09:22 +01:00
Siddhant Rai
70183e234a refactor: break up monolithic routes.py into modular domain structure 2025-10-03 12:30:04 +05:30
Hanzalah Waheed
17b9c359ca fix: use hexcode for purple taupe and rm extra props 2025-10-03 00:22:36 +04:00
Lanthoiba22
045630b8a5 Agent image fallback added 2025-10-02 21:53:38 +05:30
Alex
55ff7dd640 Merge pull request #2003 from arc53/hacktoberfest-rules
Update HACKTOBERFEST.md
2025-10-02 16:58:22 +01:00
Alex
e6d64f71f2 Update HACKTOBERFEST.md 2025-10-02 16:58:01 +01:00
Alex
e72313ebdd Merge pull request #2002 from ManishMadan2882/main
Frontend Lint
2025-10-02 12:35:59 +01:00
ManishMadan2882
65d5bd72cd Merge branch 'main' of https://github.com/manishmadan2882/docsgpt 2025-10-02 16:59:13 +05:30
ManishMadan2882
dc0cbb41f0 (fix:table) remove display name 2025-10-02 16:50:59 +05:30
ManishMadan2882
c4a54a85be (lint) fe 2025-10-02 16:30:08 +05:30
Hanzalah Waheed
5b2738aec9 fix (ui): is_copied true, disable hover state 2025-10-02 13:44:05 +04:00
Hanzalah Waheed
892312fc08 fix: hover bg color change fixed using correct css var 2025-10-02 12:23:17 +04:00
Alex
c2ccf2c72c Merge pull request #2000 from ManishMadan2882/main
Test coverage:  TTS, Security and Storage layers
2025-10-02 00:11:22 +01:00
ManishMadan2882
80aaecb5f0 ruff-fix 2025-10-02 02:48:16 +05:30
ManishMadan2882
946865a335 test:TTS 2025-10-02 02:40:30 +05:30
ManishMadan2882
5de15c8413 (feat:11Labs) just functional part 2025-10-02 02:39:53 +05:30
ManishMadan2882
67268fd35a tests:security 2025-10-02 02:34:05 +05:30
ManishMadan2882
42fc771833 tests:storage 2025-10-02 02:33:12 +05:30
Hanzalah Waheed
444b1a0b65 feat(ui): add transition animation to navigation sidebar 2025-10-01 23:16:24 +04:00
Hanzalah Waheed
814ea1c016 fix(ui): add text-32px for smaller than lg view for agent headings 2025-10-01 21:21:23 +04:00
Alex
4d34dc4234 Merge pull request #1996 from arc53/pr/1988
Pr/1988
2025-10-01 10:29:20 +01:00
Alex
d567399f2b Merge pull request #1995 from siiddhantt/fix/agent-sources
fix: agent sources and other issues
2025-10-01 10:19:47 +01:00
Alex
77f4f8d8b0 (refactor) update launch configurations and remove obsolete tasks.json 2025-10-01 10:17:52 +01:00
Alex
a2d04beaa1 (refactor) remove redundant source validation in CreateAgent 2025-10-01 10:17:26 +01:00
Siddhant Rai
ba49eea23d Refactor agent creation and update logic to improve error handling and default values; enhance logging for better traceability 2025-10-01 13:56:31 +05:30
Alex
82beafc086 Merge pull request #1991 from ManishMadan2882/tester
Tests: coverage for application/llm/* and application/llm/handlers/* ; fix on parsers/test_markdown
2025-09-30 23:19:38 +01:00
ManishMadan2882
7d8ed2d102 ruff-fix 2025-10-01 02:23:53 +05:30
ManishMadan2882
aab8d3a4f1 Merge branch 'tester' of https://github.com/manishmadan2882/docsgpt into tester 2025-10-01 01:58:52 +05:30
Alex
76658d50a0 Update HACKTOBERFEST.md 2025-09-30 21:46:09 +03:00
Alex
88ba22342c Update README with Hacktoberfest details and demo
Added Hacktoberfest information and a demo GIF.
2025-09-30 21:45:55 +03:00
Alex
11a1460af9 Add Hacktoberfest participation details to HACKTOBERFEST.md
This document outlines the participation of DocsGPT in Hacktoberfest, encouraging contributors to submit meaningful pull requests for a chance to win a T-shirt. It includes guidelines for contributions and links to resources.
2025-09-30 21:42:13 +03:00
Alex
2cd4c41316 Merge pull request #1992 from arc53/fix-api-answer-tool-call
fix: api answer tool call event
2025-09-30 14:49:57 +01:00
Alex
b910f308f2 fix: api answer tool call event 2025-09-30 14:42:54 +01:00
ManishMadan2882
763aa73ea4 (tests:llm) llms, handlers 2025-09-30 16:03:02 +05:30
Manish Madan
30c79e92d4 Merge branch 'arc53:main' into tester 2025-09-30 15:52:23 +05:30
ManishMadan2882
402d5e054b (fix:test_markdown) patch tokenizer 2025-09-30 13:35:03 +05:30
Alex
0e211df206 Merge pull request #1988 from ManishMadan2882/tester
Test coverage for parsers
2025-09-29 17:42:27 +01:00
ManishMadan2882
e24a0ac686 (test:parsers) github, reddit 2025-09-29 20:33:05 +05:30
ManishMadan2882
8c91b1c527 (tests:parsers) remote 2025-09-29 19:39:24 +05:30
ManishMadan2882
2b38f80d04 (test:files) epub, image, rst 2025-09-29 17:39:20 +05:30
ManishMadan2882
282bd35f52 (test:files) html, md, json 2025-09-29 17:23:15 +05:30
Alex
cc9b4c2bcb Merge pull request #1964 from siiddhantt/refine/mcp-tool
refactor: oauth + use fastmcp client for handling SSE and different transports in remote mcp
2025-09-26 13:54:40 +01:00
Alex
068ce4970a Merge branch 'refine/mcp-tool' of https://github.com/siiddhantt/DocsGPT into pr/1964 2025-09-26 13:42:34 +01:00
Alex
cf19165ad8 fix(lint): ruff 2025-09-26 13:42:08 +01:00
Alex
68c479f3a5 Merge branch 'main' into refine/mcp-tool 2025-09-26 13:40:12 +01:00
ManishMadan2882
ba496a772b (test) doc parsers coverage 2025-09-26 16:07:12 +05:30
Siddhant Rai
3b27db36f2 feat: Implement OAuth flow for MCP server integration
- Added MCPOAuthManager to handle OAuth authorization.
- Updated MCPServerSave resource to manage OAuth status and callback.
- Introduced new endpoints for OAuth status and callback handling.
- Enhanced user interface to support OAuth authentication type.
- Implemented polling mechanism for OAuth status in MCPServerModal.
- Updated frontend services and endpoints to accommodate new OAuth features.
- Improved error handling and user feedback for OAuth processes.
2025-09-26 02:44:08 +05:30
Alex
f803def69b Merge pull request #1976 from ManishMadan2882/main
OAuth fix for connector
2025-09-25 10:15:13 +01:00
ManishMadan2882
52065e69a4 ruff 2025-09-25 05:05:59 +05:30
ManishMadan2882
50f5e8a955 Merge branch 'main' of https://github.com/manishmadan2882/docsgpt 2025-09-25 04:58:05 +05:30
ManishMadan2882
2d0e97b66d (feat:oauth) provider as state, not args 2025-09-25 04:55:25 +05:30
Manish Madan
5f3cc5a392 Merge branch 'arc53:main' into main 2025-09-25 03:50:50 +05:30
ManishMadan2882
ac66d77512 (fix:oauth) handle access denied 2025-09-25 03:45:12 +05:30
Alex
50cf653d4a Merge pull request #1975 from arc53/chunk-fix
fix: chunking
2025-09-24 23:05:41 +01:00
Alex
56256051d2 fix: chunking 2025-09-24 22:59:53 +01:00
ManishMadan2882
c0361ff03d (fix:oauth) user field null 2025-09-25 03:27:16 +05:30
Alex
f153435c08 Merge pull request #1974 from ManishMadan2882/tester
Test app fix
2025-09-24 10:12:29 +01:00
Alex
9aa7f22fa6 Merge pull request #1961 from NewAi25/feature-letmecheck-fix-Identica-Sub-Expression
Added fix in frontend/src/conversation/ConversationMessages.tsx line…
2025-09-24 09:35:12 +01:00
ManishMadan2882
52b7bda5f8 Merge branch 'main' of https://github.com/manishmadan2882/docsgpt into tester 2025-09-24 02:37:20 +05:30
ManishMadan2882
21aefa2778 (fix:tests.application) attr err 2025-09-23 23:43:50 +05:30
Alex
a89ff71c9e Update README.md 2025-09-23 12:48:49 +03:00
Alex
4c275816be Merge pull request #1954 from ManishMadan2882/main
Refinements: Files uploaded via Connectors
2025-09-22 21:54:48 +01:00
ManishMadan2882
f8dfbcfc80 (docs) guide gdrive 2025-09-22 20:20:51 +05:30
ManishMadan2882
d317f6473d (feat:gdrive) upload files only 2025-09-22 20:19:56 +05:30
Siddhant Rai
00b4e133d4 feat: implement OAuth 2.1 integration with custom handlers for fastmcp 2025-09-22 01:31:09 +05:30
ManishMadan2882
b6349e4efb (fix:gdrive) no api keyneded 2025-09-20 19:55:18 +05:30
ManishMadan2882
6ca3d9585c (fix:connector) db entry 2025-09-20 19:52:12 +05:30
ManishMadan2882
5935a0283a (fix:ui)adjust 2025-09-20 19:50:58 +05:30
ManishMadan2882
5400a6ec06 (feat:googlePicker) frontend envars 2025-09-20 00:13:03 +05:30
ManishMadan2882
6574d9cc84 (feat:pickers) ux, code refactor 2025-09-20 00:04:27 +05:30
ManishMadan2882
42b83c5994 (refactor:upload) single schema with validation 2025-09-19 23:55:40 +05:30
ManishMadan2882
896612a5a3 (fix:auth) refresh drive token 2025-09-19 22:57:09 +05:30
ManishMadan2882
0ee875bee4 (fix:gdrive) missing appId in picker 2025-09-18 20:25:20 +05:30
Siddhant Rai
8ce345cd94 feat: refactor MCPTool to use FastMCP client and improve async handling, update transport and authentication configurations 2025-09-17 20:51:32 +05:30
ManishMadan2882
da2f8477e6 (feat:drive) oauth for drive.file scope, picker 2025-09-17 19:37:01 +05:30
jane
82b47b5673 Added fix in frontend/src/conversation/ConversationMessages.tsx line 213 2025-09-16 23:53:06 +05:30
jayamrutiya
7c15a4c7ff Fix: replace map with forEach to avoid ignoring return value (S2201) 2025-09-16 22:11:55 +05:30
Siddhant Rai
3369b910b4 feat: update MCP request ID generation and enhance response handling for event streams 2025-09-16 20:43:04 +05:30
ManishMadan2882
ec0c4c3b84 Merge branch 'main' of https://github.com/arc53/DocsGPT 2025-09-16 14:59:18 +05:30
ManishMadan2882
f74e2c9da1 (feat:filepciker) load inside table, ui 2025-09-16 14:50:58 +05:30
Alex
e26ad3c475 Merge pull request #1947 from siiddhantt/feat/remote-mcp
feat: remote mcp
2025-09-15 09:31:36 +01:00
ManishMadan2882
145c3b8ad0 (feat:file picker) table ui 2025-09-15 12:58:45 +05:30
Siddhant Rai
0ff6c6a154 Merge branch 'main' into feat/remote-mcp 2025-09-15 09:53:58 +05:30
Siddhant Rai
641cf5a4c1 feat: skip empty fields in mcp tool call + improve error handling and response 2025-09-11 19:04:10 +05:30
Siddhant Rai
09b9576eef feat: enhance message and schema cleaning for Google AI integration 2025-09-11 17:54:46 +05:30
ManishMadan2882
18b71ca2f2 (feat:upload) cards for upload types 2025-09-11 13:27:55 +05:30
Alex
e0eb7f456e Merge pull request #1930 from Ankit-Matth/feature/multi-select-sources
Added support for Multi select sources
2025-09-10 17:48:02 +01:00
Siddhant Rai
188d118fc0 refactor: remove unused logging import from routes.py 2025-09-10 22:14:31 +05:30
Siddhant Rai
adcdce8d76 fix: handle invalid chunks value in StreamProcessor and ClassicRAG 2025-09-10 22:10:11 +05:30
Siddhant Rai
b865a7aec1 Merge branch 'main' of https://github.com/siiddhantt/DocsGPT into pr/1930 2025-09-10 20:15:20 +05:30
ManishMadan2882
cec8c72b46 (refactor:uploads) YAGNI 2025-09-10 19:19:40 +05:30
Siddhant Rai
b052e32805 feat: enhance MCP tool configuration handling and authentication logic 2025-09-10 14:15:51 +05:30
Siddhant Rai
816f660be3 fix: enhance MCPTool request handling and tool discovery logic 2025-09-10 13:08:14 +05:30
ManishMadan2882
fc8be45d5a (feat:sync) confirmation check 2025-09-09 13:08:38 +05:30
ManishMadan2882
e749c936c9 (refactor:uploads) for new ui 2025-09-09 13:07:26 +05:30
ManishMadan2882
b2b9670a23 (feat:connectors) type as connector:file 2025-09-09 00:00:58 +05:30
Siddhant Rai
2f88890c94 feat: add support for multiple sources in agent configuration and update related components 2025-09-08 22:10:08 +05:30
ManishMadan2882
6366663f03 (refactor:uploads) separate file picker 2025-09-08 19:09:19 +05:30
Manish Madan
20fe7dc6d1 Merge branch 'main' into main 2025-09-08 14:19:32 +05:30
Alex
4b9153069e Merge pull request #1928 from Ankit-Matth/fix/ui-ux-improvements
bugfix(ui): several UI/UX updates and fixes
2025-09-05 13:54:09 +01:00
ManishMadan2882
80406d0753 (feat:drive) debounce search, ui/ux 2025-09-05 13:25:36 +05:30
ManishMadan2882
35f4c11784 Merge branch 'main' of https://github.com/manishmadan2882/docsgpt 2025-09-05 11:43:18 +05:30
ManishMadan2882
7896526f19 (feat:load_files) search feature 2025-09-05 10:35:23 +05:30
Siddhant Rai
0e4196f036 fix: remove unused tool labels from localization file 2025-09-04 15:19:58 +05:30
Siddhant Rai
1bf6af6eeb feat: finalize remote mcp 2025-09-04 15:10:12 +05:30
Siddhant Rai
7c23f43c63 feat: Add MCP Server management functionality
- Implemented encryption utility for securely storing sensitive credentials.
- Added MCPServerModal component for managing MCP server configurations.
- Updated AddToolModal to include MCP server management.
- Enhanced localization with new strings for MCP server features.
- Introduced SVG icons for MCP tools in the frontend.
- Created a new settings section for MCP server configurations in the application.
2025-09-03 15:41:59 +05:30
Ankit Matth
07d59b6640 refactor: use list instead of string parsing 2025-08-23 20:25:29 +05:30
Siddhant Rai
bd73fa9ae7 refactor: remove unused abstract method and improve retrievers 2025-08-20 22:25:31 +05:30
Ankit Matth
6f47aa802b added support for multi select sources 2025-08-16 15:19:19 +05:30
Ankit Matth
3417c73011 fix(ui): resolve multiple UI/UX issues 2025-08-15 10:14:31 +05:30
290 changed files with 31795 additions and 11430 deletions

11
.github/styles/DocsGPT/Spelling.yml vendored Normal file
View File

@@ -0,0 +1,11 @@
extends: spelling
level: warning
message: "Did you really mean '%s'?"
ignore:
- "**/node_modules/**"
- "**/dist/**"
- "**/build/**"
- "**/coverage/**"
- "**/public/**"
- "**/static/**"
vocab: DocsGPT

View File

@@ -0,0 +1,46 @@
Ollama
Qdrant
Milvus
Chatwoot
Nextra
VSCode
npm
LLMs
APIs
Groq
SGLang
LMDeploy
OAuth
Vite
LLM
JSONPath
UIs
configs
uncomment
qdrant
vectorstore
docsgpt
llm
GPUs
kubectl
Lightsail
enqueues
chatbot
VSCode's
Shareability
feedbacks
automations
Premade
Signup
Repo
repo
env
URl
agentic
llama_cpp
parsable
SDKs
boolean
bool
hardcode
EOL

View File

@@ -21,7 +21,7 @@ jobs:
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install dependencies

View File

@@ -10,21 +10,21 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest pytest-cov
cd application
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
cd ../tests
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Test with pytest and generate coverage report
run: |
python -m pytest --cov=application --cov-report=xml
python -m pytest --cov=application --cov-report=xml --cov-report=term-missing
- name: Upload coverage reports to Codecov
if: github.event_name == 'pull_request' && matrix.python-version == '3.12'
uses: codecov/codecov-action@v5
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

26
.github/workflows/vale.yml vendored Normal file
View File

@@ -0,0 +1,26 @@
name: Vale Documentation Linter
on:
pull_request:
paths:
- 'docs/**/*.md'
- 'docs/**/*.mdx'
- '**/*.md'
- '.vale.ini'
- '.github/styles/**'
jobs:
vale:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Vale linter
uses: errata-ai/vale-action@v2
with:
files: docs
fail_on_error: false
version: 3.0.5
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

1
.gitignore vendored
View File

@@ -3,6 +3,7 @@ __pycache__/
*.py[cod]
*$py.class
experiments
# C extensions
*.so
*.next

5
.vale.ini Normal file
View File

@@ -0,0 +1,5 @@
MinAlertLevel = warning
StylesPath = .github/styles
[*.{md,mdx}]
BasedOnStyles = DocsGPT

33
.vscode/launch.json vendored
View File

@@ -2,15 +2,11 @@
"version": "0.2.0",
"configurations": [
{
"name": "Docker Debug Frontend",
"name": "Frontend Debug (npm)",
"type": "node-terminal",
"request": "launch",
"type": "chrome",
"preLaunchTask": "docker-compose: debug:frontend",
"url": "http://127.0.0.1:5173",
"webRoot": "${workspaceFolder}/frontend",
"skipFiles": [
"<node_internals>/**"
]
"command": "npm run dev",
"cwd": "${workspaceFolder}/frontend"
},
{
"name": "Flask Debugger",
@@ -49,6 +45,27 @@
"--pool=solo"
],
"cwd": "${workspaceFolder}"
},
{
"name": "Dev Containers (Mongo + Redis)",
"type": "node-terminal",
"request": "launch",
"command": "docker compose -f deployment/docker-compose-dev.yaml up --build",
"cwd": "${workspaceFolder}"
}
],
"compounds": [
{
"name": "DocsGPT: Full Stack",
"configurations": [
"Frontend Debug (npm)",
"Flask Debugger",
"Celery Debugger"
],
"presentation": {
"group": "DocsGPT",
"order": 1
}
}
]
}

21
.vscode/tasks.json vendored
View File

@@ -1,21 +0,0 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "docker-compose",
"label": "docker-compose: debug:frontend",
"dockerCompose": {
"up": {
"detached": true,
"services": [
"frontend"
],
"build": true
},
"files": [
"${workspaceFolder}/docker-compose.yaml"
]
}
}
]
}

39
HACKTOBERFEST.md Normal file
View File

@@ -0,0 +1,39 @@
# **🎉 Join the Hacktoberfest with DocsGPT and win a Free T-shirt for a meaningful PR! 🎉**
Welcome, contributors! We're excited to announce that DocsGPT is participating in Hacktoberfest. Get involved by submitting meaningful pull requests.
All Meaningful contributors with accepted PRs that were created for issues with the `hacktoberfest` label (set by our maintainer team: dartpain, siiddhantt, pabik, ManishMadan2882) will receive a cool T-shirt! 🤩.
<img width="1331" height="678" alt="hacktoberfest-mocks-preview" src="https://github.com/user-attachments/assets/633f6377-38db-48f5-b519-a8b3855a9eb4" />
Fill in [this form](https://forms.gle/Npaba4n9Epfyx56S8
) after your PR was merged please
If you are in doubt don't hesitate to ping us on discord, ping me - Alex (dartpain).
## 📜 Here's How to Contribute:
```text
🛠️ Code: This is the golden ticket! Make meaningful contributions through PRs.
🧩 API extension: Build an app utilising DocsGPT API. We prefer submissions that showcase original ideas and turn the API into an AI agent.
They can be a completely separate repos.
For example:
https://github.com/arc53/tg-bot-docsgpt-extenstion or
https://github.com/arc53/DocsGPT-cli
Non-Code Contributions:
📚 Wiki: Improve our documentation, create a guide.
🖥️ Design: Improve the UI/UX or design a new feature.
```
### 📝 Guidelines for Pull Requests:
- Familiarize yourself with the current contributions and our [Roadmap](https://github.com/orgs/arc53/projects/2).
- Before contributing check existing [issues](https://github.com/arc53/DocsGPT/issues) or [create](https://github.com/arc53/DocsGPT/issues/new/choose) an issue and wait to get assigned.
- Once you are finished with your contribution, please fill in this [form](https://forms.gle/Npaba4n9Epfyx56S8).
- Refer to the [Documentation](https://docs.docsgpt.cloud/).
- Feel free to join our [Discord](https://discord.gg/n5BX8dh8rU) server. We're here to help newcomers, so don't hesitate to jump in! Join us [here](https://discord.gg/n5BX8dh8rU).
Thank you very much for considering contributing to DocsGPT during Hacktoberfest! 🙏 Your contributions (not just simple typos) could earn you a stylish new t-shirt.
We will publish a t-shirt design later into the October.

View File

@@ -17,7 +17,7 @@
<a href="https://github.com/arc53/DocsGPT/blob/main/LICENSE">![link to license file](https://img.shields.io/github/license/arc53/docsgpt)</a>
<a href="https://www.bestpractices.dev/projects/9907"><img src="https://www.bestpractices.dev/projects/9907/badge"></a>
<a href="https://discord.gg/n5BX8dh8rU">![link to discord](https://img.shields.io/discord/1070046503302877216)</a>
<a href="https://twitter.com/docsgptai">![X (formerly Twitter) URL](https://img.shields.io/twitter/follow/docsgptai)</a>
<a href="https://x.com/docsgptai">![X (formerly Twitter) URL](https://img.shields.io/twitter/follow/docsgptai)</a>
<a href="https://docs.docsgpt.cloud/quickstart">⚡️ Quickstart</a><a href="https://app.docsgpt.cloud/">☁️ Cloud Version</a><a href="https://discord.gg/n5BX8dh8rU">💬 Discord</a>
<br>
@@ -25,7 +25,17 @@
<br>
</div>
<div align="center">
<br>
🎃 <a href="https://github.com/arc53/DocsGPT/blob/main/HACKTOBERFEST.md"> Hacktoberfest Prizes, Rules & Q&A </a> 🎃
<br>
<br>
</div>
<div align="center">
<br>
<img src="https://d3dg1063dc54p9.cloudfront.net/videos/demov7.gif" alt="video-example-of-docs-gpt" width="800" height="450">
</div>
<h3 align="left">
@@ -55,9 +65,11 @@
- [x] Agent optimisations (May 2025)
- [x] Filesystem sources update (July 2025)
- [x] Json Responses (August 2025)
- [ ] Sharepoint integration (August 2025)
- [ ] MCP support (August 2025)
- [ ] Add OAuth 2.0 authentication for tools and sources (August 2025)
- [x] MCP support (August 2025)
- [x] Google Drive integration (September 2025)
- [x] Add OAuth 2.0 authentication for MCP (September 2025)
- [ ] SharePoint integration (October 2025)
- [ ] Deep Agents (October 2025)
- [ ] Agent scheduling
You can find our full roadmap [here](https://github.com/orgs/arc53/projects/2). Please don't hesitate to contribute or create issues, it helps us improve DocsGPT!
@@ -106,7 +118,7 @@ A more detailed [Quickstart](https://docs.docsgpt.cloud/quickstart) is available
PowerShell -ExecutionPolicy Bypass -File .\setup.ps1
```
Either script will guide you through setting up DocsGPT. Four options available: using the public API, running locally, connecting to a local inference engine, or using a cloud API provider. Scripts will automatically configure your `.env` file and handle necessary downloads and installations based on your chosen option.
Either script will guide you through setting up DocsGPT. Five options available: using the public API, running locally, connecting to a local inference engine, using a cloud API provider, or build the docker image locally. Scripts will automatically configure your `.env` file and handle necessary downloads and installations based on your chosen option.
**Navigate to http://localhost:5173/**

View File

@@ -12,7 +12,6 @@ from application.core.settings import settings
from application.llm.handlers.handler_creator import LLMHandlerCreator
from application.llm.llm_creator import LLMCreator
from application.logging import build_stack_data, log_activity, LogContext
from application.retriever.base import BaseRetriever
logger = logging.getLogger(__name__)
@@ -27,9 +26,14 @@ class BaseAgent(ABC):
user_api_key: Optional[str] = None,
prompt: str = "",
chat_history: Optional[List[Dict]] = None,
retrieved_docs: Optional[List[Dict]] = None,
decoded_token: Optional[Dict] = None,
attachments: Optional[List[Dict]] = None,
json_schema: Optional[Dict] = None,
limited_token_mode: Optional[bool] = False,
token_limit: Optional[int] = settings.DEFAULT_AGENT_LIMITS["token_limit"],
limited_request_mode: Optional[bool] = False,
request_limit: Optional[int] = settings.DEFAULT_AGENT_LIMITS["request_limit"],
):
self.endpoint = endpoint
self.llm_name = llm_name
@@ -38,7 +42,7 @@ class BaseAgent(ABC):
self.user_api_key = user_api_key
self.prompt = prompt
self.decoded_token = decoded_token or {}
self.user: str = decoded_token.get("sub")
self.user: str = self.decoded_token.get("sub")
self.tool_config: Dict = {}
self.tools: List[Dict] = []
self.tool_calls: List[Dict] = []
@@ -49,21 +53,26 @@ class BaseAgent(ABC):
user_api_key=user_api_key,
decoded_token=decoded_token,
)
self.retrieved_docs = retrieved_docs or []
self.llm_handler = LLMHandlerCreator.create_handler(
llm_name if llm_name else "default"
)
self.attachments = attachments or []
self.json_schema = json_schema
self.limited_token_mode = limited_token_mode
self.token_limit = token_limit
self.limited_request_mode = limited_request_mode
self.request_limit = request_limit
@log_activity()
def gen(
self, query: str, retriever: BaseRetriever, log_context: LogContext = None
self, query: str, log_context: LogContext = None
) -> Generator[Dict, None, None]:
yield from self._gen_inner(query, retriever, log_context)
yield from self._gen_inner(query, log_context)
@abstractmethod
def _gen_inner(
self, query: str, retriever: BaseRetriever, log_context: LogContext
self, query: str, log_context: LogContext
) -> Generator[Dict, None, None]:
pass
@@ -140,29 +149,31 @@ class BaseAgent(ABC):
tool_id, action_name, call_args = parser.parse_args(call)
call_id = getattr(call, "id", None) or str(uuid.uuid4())
# Check if parsing failed
if tool_id is None or action_name is None:
error_message = f"Error: Failed to parse LLM tool call. Tool name: {getattr(call, 'name', 'unknown')}"
logger.error(error_message)
tool_call_data = {
"tool_name": "unknown",
"call_id": call_id,
"action_name": getattr(call, 'name', 'unknown'),
"action_name": getattr(call, "name", "unknown"),
"arguments": call_args or {},
"result": f"Failed to parse tool call. Invalid tool name format: {getattr(call, 'name', 'unknown')}",
}
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
self.tool_calls.append(tool_call_data)
return "Failed to parse tool call.", call_id
# Check if tool_id exists in available tools
if tool_id not in tools_dict:
error_message = f"Error: Tool ID '{tool_id}' extracted from LLM call not found in available tools_dict. Available IDs: {list(tools_dict.keys())}"
logger.error(error_message)
# Return error result
tool_call_data = {
"tool_name": "unknown",
"call_id": call_id,
@@ -173,7 +184,6 @@ class BaseAgent(ABC):
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
self.tool_calls.append(tool_call_data)
return f"Tool with ID {tool_id} not found.", call_id
tool_call_data = {
"tool_name": tools_dict[tool_id]["name"],
"call_id": call_id,
@@ -213,18 +223,26 @@ class BaseAgent(ABC):
):
target_dict[param] = value
tm = ToolManager(config={})
# Prepare tool_config and add tool_id for memory tools
if tool_data["name"] == "api_tool":
tool_config = {
"url": tool_data["config"]["actions"][action_name]["url"],
"method": tool_data["config"]["actions"][action_name]["method"],
"headers": headers,
"query_params": query_params,
}
else:
tool_config = tool_data["config"].copy() if tool_data["config"] else {}
# Add tool_id from MongoDB _id for tools that need instance isolation (like memory tool)
# Use MongoDB _id if available, otherwise fall back to enumerated tool_id
tool_config["tool_id"] = str(tool_data.get("_id", tool_id))
tool = tm.load_tool(
tool_data["name"],
tool_config=(
{
"url": tool_data["config"]["actions"][action_name]["url"],
"method": tool_data["config"]["actions"][action_name]["method"],
"headers": headers,
"query_params": query_params,
}
if tool_data["name"] == "api_tool"
else tool_data["config"]
),
tool_config=tool_config,
user_id=self.user, # Pass user ID for MCP tools credential decryption
)
if tool_data["name"] == "api_tool":
print(
@@ -261,16 +279,14 @@ class BaseAgent(ABC):
self,
system_prompt: str,
query: str,
retrieved_data: List[Dict],
) -> List[Dict]:
docs_together = "\n".join([doc["text"] for doc in retrieved_data])
p_chat_combine = system_prompt.replace("{summaries}", docs_together)
messages_combine = [{"role": "system", "content": p_chat_combine}]
"""Build messages using pre-rendered system prompt"""
messages = [{"role": "system", "content": system_prompt}]
for i in self.chat_history:
if "prompt" in i and "response" in i:
messages_combine.append({"role": "user", "content": i["prompt"]})
messages_combine.append({"role": "assistant", "content": i["response"]})
messages.append({"role": "user", "content": i["prompt"]})
messages.append({"role": "assistant", "content": i["response"]})
if "tool_calls" in i:
for tool_call in i["tool_calls"]:
call_id = tool_call.get("call_id") or str(uuid.uuid4())
@@ -290,26 +306,14 @@ class BaseAgent(ABC):
}
}
messages_combine.append(
messages.append(
{"role": "assistant", "content": [function_call_dict]}
)
messages_combine.append(
messages.append(
{"role": "tool", "content": [function_response_dict]}
)
messages_combine.append({"role": "user", "content": query})
return messages_combine
def _retriever_search(
self,
retriever: BaseRetriever,
query: str,
log_context: Optional[LogContext] = None,
) -> List[Dict]:
retrieved_data = retriever.search(query)
if log_context:
data = build_stack_data(retriever, exclude_attributes=["llm"])
log_context.stacks.append({"component": "retriever", "data": data})
return retrieved_data
messages.append({"role": "user", "content": query})
return messages
def _llm_gen(self, messages: List[Dict], log_context: Optional[LogContext] = None):
gen_kwargs = {"model": self.gpt_model, "messages": messages}
@@ -320,7 +324,6 @@ class BaseAgent(ABC):
and self.tools
):
gen_kwargs["tools"] = self.tools
if (
self.json_schema
and hasattr(self.llm, "_supports_structured_output")
@@ -334,7 +337,6 @@ class BaseAgent(ABC):
gen_kwargs["response_format"] = structured_format
elif self.llm_name == "google":
gen_kwargs["response_schema"] = structured_format
resp = self.llm.gen_stream(**gen_kwargs)
if log_context:

View File

@@ -1,32 +1,20 @@
import logging
from typing import Dict, Generator
from application.agents.base import BaseAgent
from application.logging import LogContext
from application.retriever.base import BaseRetriever
import logging
logger = logging.getLogger(__name__)
class ClassicAgent(BaseAgent):
"""A simplified agent with clear execution flow.
Usage:
1. Processes a query through retrieval
2. Sets up available tools
3. Generates responses using LLM
4. Handles tool interactions if needed
5. Returns standardized outputs
Easy to extend by overriding specific steps.
"""
"""A simplified agent with clear execution flow"""
def _gen_inner(
self, query: str, retriever: BaseRetriever, log_context: LogContext
self, query: str, log_context: LogContext
) -> Generator[Dict, None, None]:
# Step 1: Retrieve relevant data
retrieved_data = self._retriever_search(retriever, query, log_context)
"""Core generator function for ClassicAgent execution flow"""
# Step 2: Prepare tools
tools_dict = (
self._get_user_tools(self.user)
if not self.user_api_key
@@ -34,20 +22,16 @@ class ClassicAgent(BaseAgent):
)
self._prepare_tools(tools_dict)
# Step 3: Build and process messages
messages = self._build_messages(self.prompt, query, retrieved_data)
messages = self._build_messages(self.prompt, query)
llm_response = self._llm_gen(messages, log_context)
# Step 4: Handle the response
yield from self._handle_response(
llm_response, tools_dict, messages, log_context
)
# Step 5: Return metadata
yield {"sources": retrieved_data}
yield {"sources": self.retrieved_docs}
yield {"tool_calls": self._get_truncated_tool_calls()}
# Log tool calls for debugging
log_context.stacks.append(
{"component": "agent", "data": {"tool_calls": self.tool_calls.copy()}}
)

View File

@@ -1,229 +1,238 @@
import os
from typing import Dict, Generator, List, Any
import logging
import os
from typing import Any, Dict, Generator, List
from application.agents.base import BaseAgent
from application.logging import build_stack_data, LogContext
from application.retriever.base import BaseRetriever
logger = logging.getLogger(__name__)
MAX_ITERATIONS_REASONING = 10
current_dir = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
with open(
os.path.join(current_dir, "application/prompts", "react_planning_prompt.txt"), "r"
) as f:
planning_prompt_template = f.read()
PLANNING_PROMPT_TEMPLATE = f.read()
with open(
os.path.join(current_dir, "application/prompts", "react_final_prompt.txt"),
"r",
os.path.join(current_dir, "application/prompts", "react_final_prompt.txt"), "r"
) as f:
final_prompt_template = f.read()
MAX_ITERATIONS_REASONING = 10
FINAL_PROMPT_TEMPLATE = f.read()
class ReActAgent(BaseAgent):
"""
Research and Action (ReAct) Agent - Advanced reasoning agent with iterative planning.
Implements a think-act-observe loop for complex problem-solving:
1. Creates a strategic plan based on the query
2. Executes tools and gathers observations
3. Iteratively refines approach until satisfied
4. Synthesizes final answer from all observations
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.plan: str = ""
self.observations: List[str] = []
def _extract_content_from_llm_response(self, resp: Any) -> str:
"""
Helper to extract string content from various LLM response types.
Handles strings, message objects (OpenAI-like), and streams.
Adapt stream handling for your specific LLM client if not OpenAI.
"""
collected_content = []
if isinstance(resp, str):
collected_content.append(resp)
elif ( # OpenAI non-streaming or Anthropic non-streaming (older SDK style)
hasattr(resp, "message")
and hasattr(resp.message, "content")
and resp.message.content is not None
):
collected_content.append(resp.message.content)
elif ( # OpenAI non-streaming (Pydantic model), Anthropic new SDK non-streaming
hasattr(resp, "choices") and resp.choices and
hasattr(resp.choices[0], "message") and
hasattr(resp.choices[0].message, "content") and
resp.choices[0].message.content is not None
):
collected_content.append(resp.choices[0].message.content) # OpenAI
elif ( # Anthropic new SDK non-streaming content block
hasattr(resp, "content") and isinstance(resp.content, list) and resp.content and
hasattr(resp.content[0], "text")
):
collected_content.append(resp.content[0].text) # Anthropic
else:
# Assume resp is a stream if not a recognized object
try:
for chunk in resp: # This will fail if resp is not iterable (e.g. a non-streaming response object)
content_piece = ""
# OpenAI-like stream
if hasattr(chunk, 'choices') and len(chunk.choices) > 0 and \
hasattr(chunk.choices[0], 'delta') and \
hasattr(chunk.choices[0].delta, 'content') and \
chunk.choices[0].delta.content is not None:
content_piece = chunk.choices[0].delta.content
# Anthropic-like stream (ContentBlockDelta)
elif hasattr(chunk, 'type') and chunk.type == 'content_block_delta' and \
hasattr(chunk, 'delta') and hasattr(chunk.delta, 'text'):
content_piece = chunk.delta.text
elif isinstance(chunk, str): # Simplest case: stream of strings
content_piece = chunk
if content_piece:
collected_content.append(content_piece)
except TypeError: # If resp is not iterable (e.g. a final response object that wasn't caught above)
logger.debug(f"Response type {type(resp)} could not be iterated as a stream. It might be a non-streaming object not handled by specific checks.")
except Exception as e:
logger.error(f"Error processing potential stream chunk: {e}, chunk was: {getattr(chunk, '__dict__', chunk)}")
return "".join(collected_content)
def _gen_inner(
self, query: str, retriever: BaseRetriever, log_context: LogContext
self, query: str, log_context: LogContext
) -> Generator[Dict, None, None]:
# Reset state for this generation call
self.plan = ""
self.observations = []
retrieved_data = self._retriever_search(retriever, query, log_context)
"""Execute ReAct reasoning loop with planning, action, and observation cycles"""
if self.user_api_key:
tools_dict = self._get_tools(self.user_api_key)
else:
tools_dict = self._get_user_tools(self.user)
self._reset_state()
tools_dict = (
self._get_tools(self.user_api_key)
if self.user_api_key
else self._get_user_tools(self.user)
)
self._prepare_tools(tools_dict)
docs_together = "\n".join([doc["text"] for doc in retrieved_data])
iterating_reasoning = 0
while iterating_reasoning < MAX_ITERATIONS_REASONING:
iterating_reasoning += 1
# 1. Create Plan
logger.info("ReActAgent: Creating plan...")
plan_stream = self._create_plan(query, docs_together, log_context)
current_plan_parts = []
yield {"thought": f"Reasoning... (iteration {iterating_reasoning})\n\n"}
for line_chunk in plan_stream:
current_plan_parts.append(line_chunk)
yield {"thought": line_chunk}
self.plan = "".join(current_plan_parts)
if self.plan:
self.observations.append(f"Plan: {self.plan} Iteration: {iterating_reasoning}")
for iteration in range(1, MAX_ITERATIONS_REASONING + 1):
yield {"thought": f"Reasoning... (iteration {iteration})\n\n"}
yield from self._planning_phase(query, log_context)
max_obs_len = 20000
obs_str = "\n".join(self.observations)
if len(obs_str) > max_obs_len:
obs_str = obs_str[:max_obs_len] + "\n...[observations truncated]"
execution_prompt_str = (
(self.prompt or "")
+ f"\n\nFollow this plan:\n{self.plan}"
+ f"\n\nObservations:\n{obs_str}"
+ f"\n\nIf there is enough data to complete user query '{query}', Respond with 'SATISFIED' only. Otherwise, continue. Dont Menstion 'SATISFIED' in your response if you are not ready. "
)
messages = self._build_messages(execution_prompt_str, query, retrieved_data)
resp_from_llm_gen = self._llm_gen(messages, log_context)
initial_llm_thought_content = self._extract_content_from_llm_response(resp_from_llm_gen)
if initial_llm_thought_content:
self.observations.append(f"Initial thought/response: {initial_llm_thought_content}")
else:
logger.info("ReActAgent: Initial LLM response (before handler) had no textual content (might be only tool calls).")
resp_after_handler = self._llm_handler(resp_from_llm_gen, tools_dict, messages, log_context)
for tool_call_info in self.tool_calls: # Iterate over self.tool_calls populated by _llm_handler
observation_string = (
f"Executed Action: Tool '{tool_call_info.get('tool_name', 'N/A')}' "
f"with arguments '{tool_call_info.get('arguments', '{}')}'. Result: '{str(tool_call_info.get('result', ''))[:200]}...'"
if not self.plan:
logger.warning(
f"ReActAgent: No plan generated in iteration {iteration}"
)
self.observations.append(observation_string)
content_after_handler = self._extract_content_from_llm_response(resp_after_handler)
if content_after_handler:
self.observations.append(f"Response after tool execution: {content_after_handler}")
else:
logger.info("ReActAgent: LLM response after handler had no textual content.")
if log_context:
log_context.stacks.append(
{"component": "agent_tool_calls", "data": {"tool_calls": self.tool_calls.copy()}}
)
yield {"sources": retrieved_data}
display_tool_calls = []
for tc in self.tool_calls:
cleaned_tc = tc.copy()
if len(str(cleaned_tc.get("result", ""))) > 50:
cleaned_tc["result"] = str(cleaned_tc["result"])[:50] + "..."
display_tool_calls.append(cleaned_tc)
if display_tool_calls:
yield {"tool_calls": display_tool_calls}
if "SATISFIED" in content_after_handler:
logger.info("ReActAgent: LLM satisfied with the plan and data. Stopping reasoning.")
break
self.observations.append(f"Plan (iteration {iteration}): {self.plan}")
# 3. Create Final Answer based on all observations
final_answer_stream = self._create_final_answer(query, self.observations, log_context)
for answer_chunk in final_answer_stream:
yield {"answer": answer_chunk}
logger.info("ReActAgent: Finished generating final answer.")
satisfied = yield from self._execution_phase(query, tools_dict, log_context)
def _create_plan(
self, query: str, docs_data: str, log_context: LogContext = None
) -> Generator[str, None, None]:
plan_prompt_filled = planning_prompt_template.replace("{query}", query)
if "{summaries}" in plan_prompt_filled:
summaries = docs_data if docs_data else "No documents retrieved."
plan_prompt_filled = plan_prompt_filled.replace("{summaries}", summaries)
plan_prompt_filled = plan_prompt_filled.replace("{prompt}", self.prompt or "")
plan_prompt_filled = plan_prompt_filled.replace("{observations}", "\n".join(self.observations))
if satisfied:
logger.info("ReActAgent: Goal satisfied, stopping reasoning loop")
break
yield from self._synthesis_phase(query, log_context)
messages = [{"role": "user", "content": plan_prompt_filled}]
def _reset_state(self):
"""Reset agent state for new query"""
self.plan = ""
self.observations = []
plan_stream_from_llm = self.llm.gen_stream(
model=self.gpt_model, messages=messages, tools=getattr(self, 'tools', None) # Use self.tools
def _planning_phase(
self, query: str, log_context: LogContext
) -> Generator[Dict, None, None]:
"""Generate strategic plan for query"""
logger.info("ReActAgent: Creating plan...")
plan_prompt = self._build_planning_prompt(query)
messages = [{"role": "user", "content": plan_prompt}]
plan_stream = self.llm.gen_stream(
model=self.gpt_model,
messages=messages,
tools=self.tools if self.tools else None,
)
if log_context:
data = build_stack_data(self.llm)
log_context.stacks.append({"component": "planning_llm", "data": data})
log_context.stacks.append(
{"component": "planning_llm", "data": build_stack_data(self.llm)}
)
plan_parts = []
for chunk in plan_stream:
content = self._extract_content(chunk)
if content:
plan_parts.append(content)
yield {"thought": content}
self.plan = "".join(plan_parts)
for chunk in plan_stream_from_llm:
content_piece = self._extract_content_from_llm_response(chunk)
if content_piece:
yield content_piece
def _execution_phase(
self, query: str, tools_dict: Dict, log_context: LogContext
) -> Generator[bool, None, None]:
"""Execute plan with tool calls and observations"""
execution_prompt = self._build_execution_prompt(query)
messages = self._build_messages(execution_prompt, query)
def _create_final_answer(
self, query: str, observations: List[str], log_context: LogContext = None
) -> Generator[str, None, None]:
observation_string = "\n".join(observations)
max_obs_len = 10000
if len(observation_string) > max_obs_len:
observation_string = observation_string[:max_obs_len] + "\n...[observations truncated]"
logger.warning("ReActAgent: Truncated observations for final answer prompt due to length.")
llm_response = self._llm_gen(messages, log_context)
initial_content = self._extract_content(llm_response)
final_answer_prompt_filled = final_prompt_template.format(
query=query, observations=observation_string
if initial_content:
self.observations.append(f"Initial response: {initial_content}")
processed_response = self._llm_handler(
llm_response, tools_dict, messages, log_context
)
messages = [{"role": "user", "content": final_answer_prompt_filled}]
for tool_call in self.tool_calls:
observation = (
f"Executed: {tool_call.get('tool_name', 'Unknown')} "
f"with args {tool_call.get('arguments', {})}. "
f"Result: {str(tool_call.get('result', ''))[:200]}"
)
self.observations.append(observation)
final_content = self._extract_content(processed_response)
if final_content:
self.observations.append(f"Response after tools: {final_content}")
if log_context:
log_context.stacks.append(
{
"component": "agent_tool_calls",
"data": {"tool_calls": self.tool_calls.copy()},
}
)
yield {"sources": self.retrieved_docs}
yield {"tool_calls": self._get_truncated_tool_calls()}
# Final answer should synthesize, not call tools.
final_answer_stream_from_llm = self.llm.gen_stream(
return "SATISFIED" in (final_content or "")
def _synthesis_phase(
self, query: str, log_context: LogContext
) -> Generator[Dict, None, None]:
"""Synthesize final answer from all observations"""
logger.info("ReActAgent: Generating final answer...")
final_prompt = self._build_final_answer_prompt(query)
messages = [{"role": "user", "content": final_prompt}]
final_stream = self.llm.gen_stream(
model=self.gpt_model, messages=messages, tools=None
)
if log_context:
data = build_stack_data(self.llm)
log_context.stacks.append({"component": "final_answer_llm", "data": data})
for chunk in final_answer_stream_from_llm:
content_piece = self._extract_content_from_llm_response(chunk)
if content_piece:
yield content_piece
if log_context:
log_context.stacks.append(
{"component": "final_answer_llm", "data": build_stack_data(self.llm)}
)
for chunk in final_stream:
content = self._extract_content(chunk)
if content:
yield {"answer": content}
def _build_planning_prompt(self, query: str) -> str:
"""Build planning phase prompt"""
prompt = PLANNING_PROMPT_TEMPLATE.replace("{query}", query)
prompt = prompt.replace("{prompt}", self.prompt or "")
prompt = prompt.replace("{summaries}", "")
prompt = prompt.replace("{observations}", "\n".join(self.observations))
return prompt
def _build_execution_prompt(self, query: str) -> str:
"""Build execution phase prompt with plan and observations"""
observations_str = "\n".join(self.observations)
if len(observations_str) > 20000:
observations_str = observations_str[:20000] + "\n...[truncated]"
return (
f"{self.prompt or ''}\n\n"
f"Follow this plan:\n{self.plan}\n\n"
f"Observations:\n{observations_str}\n\n"
f"If sufficient data exists to answer '{query}', respond with 'SATISFIED'. "
f"Otherwise, continue executing the plan."
)
def _build_final_answer_prompt(self, query: str) -> str:
"""Build final synthesis prompt"""
observations_str = "\n".join(self.observations)
if len(observations_str) > 10000:
observations_str = observations_str[:10000] + "\n...[truncated]"
logger.warning("ReActAgent: Observations truncated for final answer")
return FINAL_PROMPT_TEMPLATE.format(query=query, observations=observations_str)
def _extract_content(self, response: Any) -> str:
"""Extract text content from various LLM response formats"""
if not response:
return ""
collected = []
if isinstance(response, str):
return response
if hasattr(response, "message") and hasattr(response.message, "content"):
if response.message.content:
return response.message.content
if hasattr(response, "choices") and response.choices:
if hasattr(response.choices[0], "message"):
content = response.choices[0].message.content
if content:
return content
if hasattr(response, "content") and isinstance(response.content, list):
if response.content and hasattr(response.content[0], "text"):
return response.content[0].text
try:
for chunk in response:
content_piece = ""
if hasattr(chunk, "choices") and chunk.choices:
if hasattr(chunk.choices[0], "delta"):
delta_content = chunk.choices[0].delta.content
if delta_content:
content_piece = delta_content
elif hasattr(chunk, "type") and chunk.type == "content_block_delta":
if hasattr(chunk, "delta") and hasattr(chunk.delta, "text"):
content_piece = chunk.delta.text
elif isinstance(chunk, str):
content_piece = chunk
if content_piece:
collected.append(content_piece)
except (TypeError, AttributeError):
logger.debug(
f"Response not iterable or unexpected format: {type(response)}"
)
except Exception as e:
logger.error(f"Error extracting content: {e}")
return "".join(collected)

View File

@@ -0,0 +1,861 @@
import asyncio
import base64
import json
import logging
import time
from typing import Any, Dict, List, Optional
from urllib.parse import parse_qs, urlparse
from application.agents.tools.base import Tool
from application.api.user.tasks import mcp_oauth_status_task, mcp_oauth_task
from application.cache import get_redis_instance
from application.core.mongo_db import MongoDB
from application.core.settings import settings
from application.security.encryption import decrypt_credentials
from fastmcp import Client
from fastmcp.client.auth import BearerAuth
from fastmcp.client.transports import (
SSETransport,
StdioTransport,
StreamableHttpTransport,
)
from mcp.client.auth import OAuthClientProvider, TokenStorage
from mcp.shared.auth import OAuthClientInformationFull, OAuthClientMetadata, OAuthToken
from pydantic import AnyHttpUrl, ValidationError
from redis import Redis
mongo = MongoDB.get_client()
db = mongo[settings.MONGO_DB_NAME]
_mcp_clients_cache = {}
class MCPTool(Tool):
"""
MCP Tool
Connect to remote Model Context Protocol (MCP) servers to access dynamic tools and resources.
"""
def __init__(self, config: Dict[str, Any], user_id: Optional[str] = None):
"""
Initialize the MCP Tool with configuration.
Args:
config: Dictionary containing MCP server configuration:
- server_url: URL of the remote MCP server
- transport_type: Transport type (auto, sse, http, stdio)
- auth_type: Type of authentication (bearer, oauth, api_key, basic, none)
- encrypted_credentials: Encrypted credentials (if available)
- timeout: Request timeout in seconds (default: 30)
- headers: Custom headers for requests
- command: Command for STDIO transport
- args: Arguments for STDIO transport
- oauth_scopes: OAuth scopes for oauth auth type
- oauth_client_name: OAuth client name for oauth auth type
user_id: User ID for decrypting credentials (required if encrypted_credentials exist)
"""
self.config = config
self.user_id = user_id
self.server_url = config.get("server_url", "")
self.transport_type = config.get("transport_type", "auto")
self.auth_type = config.get("auth_type", "none")
self.timeout = config.get("timeout", 30)
self.custom_headers = config.get("headers", {})
self.auth_credentials = {}
if config.get("encrypted_credentials") and user_id:
self.auth_credentials = decrypt_credentials(
config["encrypted_credentials"], user_id
)
else:
self.auth_credentials = config.get("auth_credentials", {})
self.oauth_scopes = config.get("oauth_scopes", [])
self.oauth_task_id = config.get("oauth_task_id", None)
self.oauth_client_name = config.get("oauth_client_name", "DocsGPT-MCP")
self.redirect_uri = f"{settings.API_URL}/api/mcp_server/callback"
self.available_tools = []
self._cache_key = self._generate_cache_key()
self._client = None
# Only validate and setup if server_url is provided and not OAuth
if self.server_url and self.auth_type != "oauth":
self._setup_client()
def _generate_cache_key(self) -> str:
"""Generate a unique cache key for this MCP server configuration."""
auth_key = ""
if self.auth_type == "oauth":
scopes_str = ",".join(self.oauth_scopes) if self.oauth_scopes else "none"
auth_key = f"oauth:{self.oauth_client_name}:{scopes_str}"
elif self.auth_type in ["bearer"]:
token = self.auth_credentials.get(
"bearer_token", ""
) or self.auth_credentials.get("access_token", "")
auth_key = f"bearer:{token[:10]}..." if token else "bearer:none"
elif self.auth_type == "api_key":
api_key = self.auth_credentials.get("api_key", "")
auth_key = f"apikey:{api_key[:10]}..." if api_key else "apikey:none"
elif self.auth_type == "basic":
username = self.auth_credentials.get("username", "")
auth_key = f"basic:{username}"
else:
auth_key = "none"
return f"{self.server_url}#{self.transport_type}#{auth_key}"
def _setup_client(self):
"""Setup FastMCP client with proper transport and authentication."""
global _mcp_clients_cache
if self._cache_key in _mcp_clients_cache:
cached_data = _mcp_clients_cache[self._cache_key]
if time.time() - cached_data["created_at"] < 1800:
self._client = cached_data["client"]
return
else:
del _mcp_clients_cache[self._cache_key]
transport = self._create_transport()
auth = None
if self.auth_type == "oauth":
redis_client = get_redis_instance()
auth = DocsGPTOAuth(
mcp_url=self.server_url,
scopes=self.oauth_scopes,
redis_client=redis_client,
redirect_uri=self.redirect_uri,
task_id=self.oauth_task_id,
db=db,
user_id=self.user_id,
)
elif self.auth_type == "bearer":
token = self.auth_credentials.get(
"bearer_token", ""
) or self.auth_credentials.get("access_token", "")
if token:
auth = BearerAuth(token)
self._client = Client(transport, auth=auth)
_mcp_clients_cache[self._cache_key] = {
"client": self._client,
"created_at": time.time(),
}
def _create_transport(self):
"""Create appropriate transport based on configuration."""
headers = {"Content-Type": "application/json", "User-Agent": "DocsGPT-MCP/1.0"}
headers.update(self.custom_headers)
if self.auth_type == "api_key":
api_key = self.auth_credentials.get("api_key", "")
header_name = self.auth_credentials.get("api_key_header", "X-API-Key")
if api_key:
headers[header_name] = api_key
elif self.auth_type == "basic":
username = self.auth_credentials.get("username", "")
password = self.auth_credentials.get("password", "")
if username and password:
credentials = base64.b64encode(
f"{username}:{password}".encode()
).decode()
headers["Authorization"] = f"Basic {credentials}"
if self.transport_type == "auto":
if "sse" in self.server_url.lower() or self.server_url.endswith("/sse"):
transport_type = "sse"
else:
transport_type = "http"
else:
transport_type = self.transport_type
if transport_type == "sse":
headers.update({"Accept": "text/event-stream", "Cache-Control": "no-cache"})
return SSETransport(url=self.server_url, headers=headers)
elif transport_type == "http":
return StreamableHttpTransport(url=self.server_url, headers=headers)
elif transport_type == "stdio":
command = self.config.get("command", "python")
args = self.config.get("args", [])
env = self.auth_credentials if self.auth_credentials else None
return StdioTransport(command=command, args=args, env=env)
else:
return StreamableHttpTransport(url=self.server_url, headers=headers)
def _format_tools(self, tools_response) -> List[Dict]:
"""Format tools response to match expected format."""
if hasattr(tools_response, "tools"):
tools = tools_response.tools
elif isinstance(tools_response, list):
tools = tools_response
else:
tools = []
tools_dict = []
for tool in tools:
if hasattr(tool, "name"):
tool_dict = {
"name": tool.name,
"description": tool.description,
}
if hasattr(tool, "inputSchema"):
tool_dict["inputSchema"] = tool.inputSchema
tools_dict.append(tool_dict)
elif isinstance(tool, dict):
tools_dict.append(tool)
else:
if hasattr(tool, "model_dump"):
tools_dict.append(tool.model_dump())
else:
tools_dict.append({"name": str(tool), "description": ""})
return tools_dict
async def _execute_with_client(self, operation: str, *args, **kwargs):
"""Execute operation with FastMCP client."""
if not self._client:
raise Exception("FastMCP client not initialized")
async with self._client:
if operation == "ping":
return await self._client.ping()
elif operation == "list_tools":
tools_response = await self._client.list_tools()
self.available_tools = self._format_tools(tools_response)
return self.available_tools
elif operation == "call_tool":
tool_name = args[0]
tool_args = kwargs
return await self._client.call_tool(tool_name, tool_args)
elif operation == "list_resources":
return await self._client.list_resources()
elif operation == "list_prompts":
return await self._client.list_prompts()
else:
raise Exception(f"Unknown operation: {operation}")
def _run_async_operation(self, operation: str, *args, **kwargs):
"""Run async operation in sync context."""
try:
try:
loop = asyncio.get_running_loop()
import concurrent.futures
def run_in_thread():
new_loop = asyncio.new_event_loop()
asyncio.set_event_loop(new_loop)
try:
return new_loop.run_until_complete(
self._execute_with_client(operation, *args, **kwargs)
)
finally:
new_loop.close()
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(run_in_thread)
return future.result(timeout=self.timeout)
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
return loop.run_until_complete(
self._execute_with_client(operation, *args, **kwargs)
)
finally:
loop.close()
except Exception as e:
print(f"Error occurred while running async operation: {e}")
raise
def discover_tools(self) -> List[Dict]:
"""
Discover available tools from the MCP server using FastMCP.
Returns:
List of tool definitions from the server
"""
if not self.server_url:
return []
if not self._client:
self._setup_client()
try:
tools = self._run_async_operation("list_tools")
self.available_tools = tools
return self.available_tools
except Exception as e:
raise Exception(f"Failed to discover tools from MCP server: {str(e)}")
def execute_action(self, action_name: str, **kwargs) -> Any:
"""
Execute an action on the remote MCP server using FastMCP.
Args:
action_name: Name of the action to execute
**kwargs: Parameters for the action
Returns:
Result from the MCP server
"""
if not self.server_url:
raise Exception("No MCP server configured")
if not self._client:
self._setup_client()
cleaned_kwargs = {}
for key, value in kwargs.items():
if value == "" or value is None:
continue
cleaned_kwargs[key] = value
try:
result = self._run_async_operation(
"call_tool", action_name, **cleaned_kwargs
)
return self._format_result(result)
except Exception as e:
raise Exception(f"Failed to execute action '{action_name}': {str(e)}")
def _format_result(self, result) -> Dict:
"""Format FastMCP result to match expected format."""
if hasattr(result, "content"):
content_list = []
for content_item in result.content:
if hasattr(content_item, "text"):
content_list.append({"type": "text", "text": content_item.text})
elif hasattr(content_item, "data"):
content_list.append({"type": "data", "data": content_item.data})
else:
content_list.append(
{"type": "unknown", "content": str(content_item)}
)
return {
"content": content_list,
"isError": getattr(result, "isError", False),
}
else:
return result
def test_connection(self) -> Dict:
"""
Test the connection to the MCP server and validate functionality.
Returns:
Dictionary with connection test results including tool count
"""
if not self.server_url:
return {
"success": False,
"message": "No MCP server URL configured",
"tools_count": 0,
"transport_type": self.transport_type,
"auth_type": self.auth_type,
"error_type": "ConfigurationError",
}
if not self._client:
self._setup_client()
try:
if self.auth_type == "oauth":
return self._test_oauth_connection()
else:
return self._test_regular_connection()
except Exception as e:
return {
"success": False,
"message": f"Connection failed: {str(e)}",
"tools_count": 0,
"transport_type": self.transport_type,
"auth_type": self.auth_type,
"error_type": type(e).__name__,
}
def _test_regular_connection(self) -> Dict:
"""Test connection for non-OAuth auth types."""
try:
self._run_async_operation("ping")
ping_success = True
except Exception:
ping_success = False
tools = self.discover_tools()
message = f"Successfully connected to MCP server. Found {len(tools)} tools."
if not ping_success:
message += " (Ping not supported, but tool discovery worked)"
return {
"success": True,
"message": message,
"tools_count": len(tools),
"transport_type": self.transport_type,
"auth_type": self.auth_type,
"ping_supported": ping_success,
"tools": [tool.get("name", "unknown") for tool in tools],
}
def _test_oauth_connection(self) -> Dict:
"""Test connection for OAuth auth type with proper async handling."""
try:
task = mcp_oauth_task.delay(config=self.config, user=self.user_id)
if not task:
raise Exception("Failed to start OAuth authentication")
return {
"success": True,
"requires_oauth": True,
"task_id": task.id,
"status": "pending",
"message": "OAuth flow started",
}
except Exception as e:
return {
"success": False,
"message": f"OAuth connection failed: {str(e)}",
"tools_count": 0,
"transport_type": self.transport_type,
"auth_type": self.auth_type,
"error_type": type(e).__name__,
}
def get_actions_metadata(self) -> List[Dict]:
"""
Get metadata for all available actions.
Returns:
List of action metadata dictionaries
"""
actions = []
for tool in self.available_tools:
input_schema = (
tool.get("inputSchema")
or tool.get("input_schema")
or tool.get("schema")
or tool.get("parameters")
)
parameters_schema = {
"type": "object",
"properties": {},
"required": [],
}
if input_schema:
if isinstance(input_schema, dict):
if "properties" in input_schema:
parameters_schema = {
"type": input_schema.get("type", "object"),
"properties": input_schema.get("properties", {}),
"required": input_schema.get("required", []),
}
for key in ["additionalProperties", "description"]:
if key in input_schema:
parameters_schema[key] = input_schema[key]
else:
parameters_schema["properties"] = input_schema
action = {
"name": tool.get("name", ""),
"description": tool.get("description", ""),
"parameters": parameters_schema,
}
actions.append(action)
return actions
def get_config_requirements(self) -> Dict:
"""Get configuration requirements for the MCP tool."""
return {
"server_url": {
"type": "string",
"description": "URL of the remote MCP server (e.g., https://api.example.com/mcp or https://docs.mcp.cloudflare.com/sse)",
"required": True,
},
"transport_type": {
"type": "string",
"description": "Transport type for connection",
"enum": ["auto", "sse", "http", "stdio"],
"default": "auto",
"required": False,
"help": {
"auto": "Automatically detect best transport",
"sse": "Server-Sent Events (for real-time streaming)",
"http": "HTTP streaming (recommended for production)",
"stdio": "Standard I/O (for local servers)",
},
},
"auth_type": {
"type": "string",
"description": "Authentication type",
"enum": ["none", "bearer", "oauth", "api_key", "basic"],
"default": "none",
"required": True,
"help": {
"none": "No authentication",
"bearer": "Bearer token authentication",
"oauth": "OAuth 2.1 authentication (with frontend integration)",
"api_key": "API key authentication",
"basic": "Basic authentication",
},
},
"auth_credentials": {
"type": "object",
"description": "Authentication credentials (varies by auth_type)",
"required": False,
"properties": {
"bearer_token": {
"type": "string",
"description": "Bearer token for bearer auth",
},
"access_token": {
"type": "string",
"description": "Access token for OAuth (if pre-obtained)",
},
"api_key": {
"type": "string",
"description": "API key for api_key auth",
},
"api_key_header": {
"type": "string",
"description": "Header name for API key (default: X-API-Key)",
},
"username": {
"type": "string",
"description": "Username for basic auth",
},
"password": {
"type": "string",
"description": "Password for basic auth",
},
},
},
"oauth_scopes": {
"type": "array",
"description": "OAuth scopes to request (for oauth auth_type)",
"items": {"type": "string"},
"required": False,
"default": [],
},
"oauth_client_name": {
"type": "string",
"description": "Client name for OAuth registration (for oauth auth_type)",
"default": "DocsGPT-MCP",
"required": False,
},
"headers": {
"type": "object",
"description": "Custom headers to send with requests",
"required": False,
},
"timeout": {
"type": "integer",
"description": "Request timeout in seconds",
"default": 30,
"minimum": 1,
"maximum": 300,
"required": False,
},
"command": {
"type": "string",
"description": "Command to run for STDIO transport (e.g., 'python')",
"required": False,
},
"args": {
"type": "array",
"description": "Arguments for STDIO command",
"items": {"type": "string"},
"required": False,
},
}
class DocsGPTOAuth(OAuthClientProvider):
"""
Custom OAuth handler for DocsGPT that uses frontend redirect instead of browser.
"""
def __init__(
self,
mcp_url: str,
redirect_uri: str,
redis_client: Redis | None = None,
redis_prefix: str = "mcp_oauth:",
task_id: str = None,
scopes: str | list[str] | None = None,
client_name: str = "DocsGPT-MCP",
user_id=None,
db=None,
additional_client_metadata: dict[str, Any] | None = None,
):
"""
Initialize custom OAuth client provider for DocsGPT.
Args:
mcp_url: Full URL to the MCP endpoint
redirect_uri: Custom redirect URI for DocsGPT frontend
redis_client: Redis client for storing auth state
redis_prefix: Prefix for Redis keys
task_id: Task ID for tracking auth status
scopes: OAuth scopes to request
client_name: Name for this client during registration
user_id: User ID for token storage
db: Database instance for token storage
additional_client_metadata: Extra fields for OAuthClientMetadata
"""
self.redirect_uri = redirect_uri
self.redis_client = redis_client
self.redis_prefix = redis_prefix
self.task_id = task_id
self.user_id = user_id
self.db = db
parsed_url = urlparse(mcp_url)
self.server_base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
if isinstance(scopes, list):
scopes = " ".join(scopes)
client_metadata = OAuthClientMetadata(
client_name=client_name,
redirect_uris=[AnyHttpUrl(redirect_uri)],
grant_types=["authorization_code", "refresh_token"],
response_types=["code"],
scope=scopes,
**(additional_client_metadata or {}),
)
storage = DBTokenStorage(
server_url=self.server_base_url, user_id=self.user_id, db_client=self.db
)
super().__init__(
server_url=self.server_base_url,
client_metadata=client_metadata,
storage=storage,
redirect_handler=self.redirect_handler,
callback_handler=self.callback_handler,
)
self.auth_url = None
self.extracted_state = None
def _process_auth_url(self, authorization_url: str) -> tuple[str, str]:
"""Process authorization URL to extract state"""
try:
parsed_url = urlparse(authorization_url)
query_params = parse_qs(parsed_url.query)
state_params = query_params.get("state", [])
if state_params:
state = state_params[0]
else:
raise ValueError("No state in auth URL")
return authorization_url, state
except Exception as e:
raise Exception(f"Failed to process auth URL: {e}")
async def redirect_handler(self, authorization_url: str) -> None:
"""Store auth URL and state in Redis for frontend to use."""
auth_url, state = self._process_auth_url(authorization_url)
logging.info(
"[DocsGPTOAuth] Processed auth_url: %s, state: %s", auth_url, state
)
self.auth_url = auth_url
self.extracted_state = state
if self.redis_client and self.extracted_state:
key = f"{self.redis_prefix}auth_url:{self.extracted_state}"
self.redis_client.setex(key, 600, auth_url)
logging.info("[DocsGPTOAuth] Stored auth_url in Redis: %s", key)
if self.task_id:
status_key = f"mcp_oauth_status:{self.task_id}"
status_data = {
"status": "requires_redirect",
"message": "OAuth authorization required",
"authorization_url": self.auth_url,
"state": self.extracted_state,
"requires_oauth": True,
"task_id": self.task_id,
}
self.redis_client.setex(status_key, 600, json.dumps(status_data))
async def callback_handler(self) -> tuple[str, str | None]:
"""Wait for auth code from Redis using the state value."""
if not self.redis_client or not self.extracted_state:
raise Exception("Redis client or state not configured for OAuth")
poll_interval = 1
max_wait_time = 300
code_key = f"{self.redis_prefix}code:{self.extracted_state}"
if self.task_id:
status_key = f"mcp_oauth_status:{self.task_id}"
status_data = {
"status": "awaiting_callback",
"message": "Waiting for OAuth callback...",
"authorization_url": self.auth_url,
"state": self.extracted_state,
"requires_oauth": True,
"task_id": self.task_id,
}
self.redis_client.setex(status_key, 600, json.dumps(status_data))
start_time = time.time()
while time.time() - start_time < max_wait_time:
code_data = self.redis_client.get(code_key)
if code_data:
code = code_data.decode()
returned_state = self.extracted_state
self.redis_client.delete(code_key)
self.redis_client.delete(
f"{self.redis_prefix}auth_url:{self.extracted_state}"
)
self.redis_client.delete(
f"{self.redis_prefix}state:{self.extracted_state}"
)
if self.task_id:
status_data = {
"status": "callback_received",
"message": "OAuth callback received, completing authentication...",
"task_id": self.task_id,
}
self.redis_client.setex(status_key, 600, json.dumps(status_data))
return code, returned_state
error_key = f"{self.redis_prefix}error:{self.extracted_state}"
error_data = self.redis_client.get(error_key)
if error_data:
error_msg = error_data.decode()
self.redis_client.delete(error_key)
self.redis_client.delete(
f"{self.redis_prefix}auth_url:{self.extracted_state}"
)
self.redis_client.delete(
f"{self.redis_prefix}state:{self.extracted_state}"
)
raise Exception(f"OAuth error: {error_msg}")
await asyncio.sleep(poll_interval)
self.redis_client.delete(f"{self.redis_prefix}auth_url:{self.extracted_state}")
self.redis_client.delete(f"{self.redis_prefix}state:{self.extracted_state}")
raise Exception("OAuth callback timeout: no code received within 5 minutes")
class DBTokenStorage(TokenStorage):
def __init__(self, server_url: str, user_id: str, db_client):
self.server_url = server_url
self.user_id = user_id
self.db_client = db_client
self.collection = db_client["connector_sessions"]
@staticmethod
def get_base_url(url: str) -> str:
parsed = urlparse(url)
return f"{parsed.scheme}://{parsed.netloc}"
def get_db_key(self) -> dict:
return {
"server_url": self.get_base_url(self.server_url),
"user_id": self.user_id,
}
async def get_tokens(self) -> OAuthToken | None:
doc = await asyncio.to_thread(self.collection.find_one, self.get_db_key())
if not doc or "tokens" not in doc:
return None
try:
tokens = OAuthToken.model_validate(doc["tokens"])
return tokens
except ValidationError as e:
logging.error(f"Could not load tokens: {e}")
return None
async def set_tokens(self, tokens: OAuthToken) -> None:
await asyncio.to_thread(
self.collection.update_one,
self.get_db_key(),
{"$set": {"tokens": tokens.model_dump()}},
True,
)
logging.info(f"Saved tokens for {self.get_base_url(self.server_url)}")
async def get_client_info(self) -> OAuthClientInformationFull | None:
doc = await asyncio.to_thread(self.collection.find_one, self.get_db_key())
if not doc or "client_info" not in doc:
return None
try:
client_info = OAuthClientInformationFull.model_validate(doc["client_info"])
tokens = await self.get_tokens()
if tokens is None:
logging.debug(
"No tokens found, clearing client info to force fresh registration."
)
await asyncio.to_thread(
self.collection.update_one,
self.get_db_key(),
{"$unset": {"client_info": ""}},
)
return None
return client_info
except ValidationError as e:
logging.error(f"Could not load client info: {e}")
return None
def _serialize_client_info(self, info: dict) -> dict:
if "redirect_uris" in info and isinstance(info["redirect_uris"], list):
info["redirect_uris"] = [str(u) for u in info["redirect_uris"]]
return info
async def set_client_info(self, client_info: OAuthClientInformationFull) -> None:
serialized_info = self._serialize_client_info(client_info.model_dump())
await asyncio.to_thread(
self.collection.update_one,
self.get_db_key(),
{"$set": {"client_info": serialized_info}},
True,
)
logging.info(f"Saved client info for {self.get_base_url(self.server_url)}")
async def clear(self) -> None:
await asyncio.to_thread(self.collection.delete_one, self.get_db_key())
logging.info(f"Cleared OAuth cache for {self.get_base_url(self.server_url)}")
@classmethod
async def clear_all(cls, db_client) -> None:
collection = db_client["connector_sessions"]
await asyncio.to_thread(collection.delete_many, {})
logging.info("Cleared all OAuth client cache data.")
class MCPOAuthManager:
"""Manager for handling MCP OAuth callbacks."""
def __init__(self, redis_client: Redis | None, redis_prefix: str = "mcp_oauth:"):
self.redis_client = redis_client
self.redis_prefix = redis_prefix
def handle_oauth_callback(
self, state: str, code: str, error: Optional[str] = None
) -> bool:
"""
Handle OAuth callback from provider.
Args:
state: The state parameter from OAuth callback
code: The authorization code from OAuth callback
error: Error message if OAuth failed
Returns:
True if successful, False otherwise
"""
try:
if not self.redis_client or not state:
raise Exception("Redis client or state not provided")
if error:
error_key = f"{self.redis_prefix}error:{state}"
self.redis_client.setex(error_key, 300, error)
raise Exception(f"OAuth error received: {error}")
code_key = f"{self.redis_prefix}code:{state}"
self.redis_client.setex(code_key, 300, code)
state_key = f"{self.redis_prefix}state:{state}"
self.redis_client.setex(state_key, 300, "completed")
return True
except Exception as e:
logging.error(f"Error handling OAuth callback: {e}")
return False
def get_oauth_status(self, task_id: str) -> Dict[str, Any]:
"""Get current status of OAuth flow using provided task_id."""
if not task_id:
return {"status": "not_started", "message": "OAuth flow not started"}
return mcp_oauth_status_task(task_id)

View File

@@ -0,0 +1,546 @@
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
import re
import uuid
from .base import Tool
from application.core.mongo_db import MongoDB
from application.core.settings import settings
class MemoryTool(Tool):
"""Memory
Stores and retrieves information across conversations through a memory file directory.
"""
def __init__(self, tool_config: Optional[Dict[str, Any]] = None, user_id: Optional[str] = None) -> None:
"""Initialize the tool.
Args:
tool_config: Optional tool configuration. Should include:
- tool_id: Unique identifier for this memory tool instance (from user_tools._id)
This ensures each user's tool configuration has isolated memories
user_id: The authenticated user's id (should come from decoded_token["sub"]).
"""
self.user_id: Optional[str] = user_id
# Get tool_id from configuration (passed from user_tools._id in production)
# In production, tool_id is the MongoDB ObjectId string from user_tools collection
if tool_config and "tool_id" in tool_config:
self.tool_id = tool_config["tool_id"]
elif user_id:
# Fallback for backward compatibility or testing
self.tool_id = f"default_{user_id}"
else:
# Last resort fallback (shouldn't happen in normal use)
self.tool_id = str(uuid.uuid4())
db = MongoDB.get_client()[settings.MONGO_DB_NAME]
self.collection = db["memories"]
# -----------------------------
# Action implementations
# -----------------------------
def execute_action(self, action_name: str, **kwargs: Any) -> str:
"""Execute an action by name.
Args:
action_name: One of view, create, str_replace, insert, delete, rename.
**kwargs: Parameters for the action.
Returns:
A human-readable string result.
"""
if not self.user_id:
return "Error: MemoryTool requires a valid user_id."
if action_name == "view":
return self._view(
kwargs.get("path", "/"),
kwargs.get("view_range")
)
if action_name == "create":
return self._create(
kwargs.get("path", ""),
kwargs.get("file_text", "")
)
if action_name == "str_replace":
return self._str_replace(
kwargs.get("path", ""),
kwargs.get("old_str", ""),
kwargs.get("new_str", "")
)
if action_name == "insert":
return self._insert(
kwargs.get("path", ""),
kwargs.get("insert_line", 1),
kwargs.get("insert_text", "")
)
if action_name == "delete":
return self._delete(kwargs.get("path", ""))
if action_name == "rename":
return self._rename(
kwargs.get("old_path", ""),
kwargs.get("new_path", "")
)
return f"Unknown action: {action_name}"
def get_actions_metadata(self) -> List[Dict[str, Any]]:
"""Return JSON metadata describing supported actions for tool schemas."""
return [
{
"name": "view",
"description": "Shows directory contents or file contents with optional line ranges.",
"parameters": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Path to file or directory (e.g., /notes.txt or /project/ or /)."
},
"view_range": {
"type": "array",
"items": {"type": "integer"},
"description": "Optional [start_line, end_line] to view specific lines (1-indexed)."
}
},
"required": ["path"]
},
},
{
"name": "create",
"description": "Create or overwrite a file.",
"parameters": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "File path to create (e.g., /notes.txt or /project/task.txt)."
},
"file_text": {
"type": "string",
"description": "Content to write to the file."
}
},
"required": ["path", "file_text"]
},
},
{
"name": "str_replace",
"description": "Replace text in a file.",
"parameters": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "File path (e.g., /notes.txt)."
},
"old_str": {
"type": "string",
"description": "String to find."
},
"new_str": {
"type": "string",
"description": "String to replace with."
}
},
"required": ["path", "old_str", "new_str"]
},
},
{
"name": "insert",
"description": "Insert text at a specific line in a file.",
"parameters": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "File path (e.g., /notes.txt)."
},
"insert_line": {
"type": "integer",
"description": "Line number to insert at (1-indexed)."
},
"insert_text": {
"type": "string",
"description": "Text to insert."
}
},
"required": ["path", "insert_line", "insert_text"]
},
},
{
"name": "delete",
"description": "Delete a file or directory.",
"parameters": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Path to delete (e.g., /notes.txt or /project/)."
}
},
"required": ["path"]
},
},
{
"name": "rename",
"description": "Rename or move a file/directory.",
"parameters": {
"type": "object",
"properties": {
"old_path": {
"type": "string",
"description": "Current path (e.g., /old.txt)."
},
"new_path": {
"type": "string",
"description": "New path (e.g., /new.txt)."
}
},
"required": ["old_path", "new_path"]
},
},
]
def get_config_requirements(self) -> Dict[str, Any]:
"""Return configuration requirements."""
return {}
# -----------------------------
# Path validation
# -----------------------------
def _validate_path(self, path: str) -> Optional[str]:
"""Validate and normalize path.
Args:
path: User-provided path.
Returns:
Normalized path or None if invalid.
"""
if not path:
return None
# Remove any leading/trailing whitespace
path = path.strip()
# Preserve whether path ends with / (indicates directory)
is_directory = path.endswith("/")
# Ensure path starts with / for consistency
if not path.startswith("/"):
path = "/" + path
# Check for directory traversal patterns
if ".." in path or path.count("//") > 0:
return None
# Normalize the path
try:
# Convert to Path object and resolve to canonical form
normalized = str(Path(path).as_posix())
# Ensure it still starts with /
if not normalized.startswith("/"):
return None
# Preserve trailing slash for directories
if is_directory and not normalized.endswith("/") and normalized != "/":
normalized = normalized + "/"
return normalized
except Exception:
return None
# -----------------------------
# Internal helpers
# -----------------------------
def _view(self, path: str, view_range: Optional[List[int]] = None) -> str:
"""View directory contents or file contents."""
validated_path = self._validate_path(path)
if not validated_path:
return "Error: Invalid path."
# Check if viewing directory (ends with / or is root)
if validated_path == "/" or validated_path.endswith("/"):
return self._view_directory(validated_path)
# Otherwise view file
return self._view_file(validated_path, view_range)
def _view_directory(self, path: str) -> str:
"""List files in a directory."""
# Ensure path ends with / for proper prefix matching
search_path = path if path.endswith("/") else path + "/"
# Find all files that start with this directory path
query = {
"user_id": self.user_id,
"tool_id": self.tool_id,
"path": {"$regex": f"^{re.escape(search_path)}"}
}
docs = list(self.collection.find(query, {"path": 1}))
if not docs:
return f"Directory: {path}\n(empty)"
# Extract filenames relative to the directory
files = []
for doc in docs:
file_path = doc["path"]
# Remove the directory prefix
if file_path.startswith(search_path):
relative = file_path[len(search_path):]
if relative:
files.append(relative)
files.sort()
file_list = "\n".join(f"- {f}" for f in files)
return f"Directory: {path}\n{file_list}"
def _view_file(self, path: str, view_range: Optional[List[int]] = None) -> str:
"""View file contents with optional line range."""
doc = self.collection.find_one({"user_id": self.user_id, "tool_id": self.tool_id, "path": path})
if not doc or not doc.get("content"):
return f"Error: File not found: {path}"
content = str(doc["content"])
# Apply view_range if specified
if view_range and len(view_range) == 2:
lines = content.split("\n")
start, end = view_range
# Convert to 0-indexed
start_idx = max(0, start - 1)
end_idx = min(len(lines), end)
if start_idx >= len(lines):
return f"Error: Line range out of bounds. File has {len(lines)} lines."
selected_lines = lines[start_idx:end_idx]
# Add line numbers (enumerate with 1-based start)
numbered_lines = [f"{i}: {line}" for i, line in enumerate(selected_lines, start=start)]
return "\n".join(numbered_lines)
return content
def _create(self, path: str, file_text: str) -> str:
"""Create or overwrite a file."""
validated_path = self._validate_path(path)
if not validated_path:
return "Error: Invalid path."
if validated_path == "/" or validated_path.endswith("/"):
return "Error: Cannot create a file at directory path."
self.collection.update_one(
{"user_id": self.user_id, "tool_id": self.tool_id, "path": validated_path},
{
"$set": {
"content": file_text,
"updated_at": datetime.now()
}
},
upsert=True
)
return f"File created: {validated_path}"
def _str_replace(self, path: str, old_str: str, new_str: str) -> str:
"""Replace text in a file."""
validated_path = self._validate_path(path)
if not validated_path:
return "Error: Invalid path."
if not old_str:
return "Error: old_str is required."
doc = self.collection.find_one({"user_id": self.user_id, "tool_id": self.tool_id, "path": validated_path})
if not doc or not doc.get("content"):
return f"Error: File not found: {validated_path}"
current_content = str(doc["content"])
# Check if old_str exists (case-insensitive)
if old_str.lower() not in current_content.lower():
return f"Error: String '{old_str}' not found in file."
# Replace the string (case-insensitive)
import re as regex_module
updated_content = regex_module.sub(regex_module.escape(old_str), new_str, current_content, flags=regex_module.IGNORECASE)
self.collection.update_one(
{"user_id": self.user_id, "tool_id": self.tool_id, "path": validated_path},
{
"$set": {
"content": updated_content,
"updated_at": datetime.now()
}
}
)
return f"File updated: {validated_path}"
def _insert(self, path: str, insert_line: int, insert_text: str) -> str:
"""Insert text at a specific line."""
validated_path = self._validate_path(path)
if not validated_path:
return "Error: Invalid path."
if not insert_text:
return "Error: insert_text is required."
doc = self.collection.find_one({"user_id": self.user_id, "tool_id": self.tool_id, "path": validated_path})
if not doc or not doc.get("content"):
return f"Error: File not found: {validated_path}"
current_content = str(doc["content"])
lines = current_content.split("\n")
# Convert to 0-indexed
index = insert_line - 1
if index < 0 or index > len(lines):
return f"Error: Invalid line number. File has {len(lines)} lines."
lines.insert(index, insert_text)
updated_content = "\n".join(lines)
self.collection.update_one(
{"user_id": self.user_id, "tool_id": self.tool_id, "path": validated_path},
{
"$set": {
"content": updated_content,
"updated_at": datetime.now()
}
}
)
return f"Text inserted at line {insert_line} in {validated_path}"
def _delete(self, path: str) -> str:
"""Delete a file or directory."""
validated_path = self._validate_path(path)
if not validated_path:
return "Error: Invalid path."
if validated_path == "/":
# Delete all files for this user and tool
result = self.collection.delete_many({"user_id": self.user_id, "tool_id": self.tool_id})
return f"Deleted {result.deleted_count} file(s) from memory."
# Check if it's a directory (ends with /)
if validated_path.endswith("/"):
# Delete all files in directory
result = self.collection.delete_many({
"user_id": self.user_id,
"tool_id": self.tool_id,
"path": {"$regex": f"^{re.escape(validated_path)}"}
})
return f"Deleted directory and {result.deleted_count} file(s)."
# Try to delete as directory first (without trailing slash)
# Check if any files start with this path + /
search_path = validated_path + "/"
directory_result = self.collection.delete_many({
"user_id": self.user_id,
"tool_id": self.tool_id,
"path": {"$regex": f"^{re.escape(search_path)}"}
})
if directory_result.deleted_count > 0:
return f"Deleted directory and {directory_result.deleted_count} file(s)."
# Delete single file
result = self.collection.delete_one({
"user_id": self.user_id,
"tool_id": self.tool_id,
"path": validated_path
})
if result.deleted_count:
return f"Deleted: {validated_path}"
return f"Error: File not found: {validated_path}"
def _rename(self, old_path: str, new_path: str) -> str:
"""Rename or move a file/directory."""
validated_old = self._validate_path(old_path)
validated_new = self._validate_path(new_path)
if not validated_old or not validated_new:
return "Error: Invalid path."
if validated_old == "/" or validated_new == "/":
return "Error: Cannot rename root directory."
# Check if renaming a directory
if validated_old.endswith("/"):
# Ensure validated_new also ends with / for proper path replacement
if not validated_new.endswith("/"):
validated_new = validated_new + "/"
# Find all files in the old directory
docs = list(self.collection.find({
"user_id": self.user_id,
"tool_id": self.tool_id,
"path": {"$regex": f"^{re.escape(validated_old)}"}
}))
if not docs:
return f"Error: Directory not found: {validated_old}"
# Update paths for all files
for doc in docs:
old_file_path = doc["path"]
new_file_path = old_file_path.replace(validated_old, validated_new, 1)
self.collection.update_one(
{"_id": doc["_id"]},
{"$set": {"path": new_file_path, "updated_at": datetime.now()}}
)
return f"Renamed directory: {validated_old} -> {validated_new} ({len(docs)} files)"
# Rename single file
doc = self.collection.find_one({
"user_id": self.user_id,
"tool_id": self.tool_id,
"path": validated_old
})
if not doc:
return f"Error: File not found: {validated_old}"
# Check if new path already exists
existing = self.collection.find_one({
"user_id": self.user_id,
"tool_id": self.tool_id,
"path": validated_new
})
if existing:
return f"Error: File already exists at {validated_new}"
# Delete the old document and create a new one with the new path
self.collection.delete_one({"user_id": self.user_id, "tool_id": self.tool_id, "path": validated_old})
self.collection.insert_one({
"user_id": self.user_id,
"tool_id": self.tool_id,
"path": validated_new,
"content": doc.get("content", ""),
"updated_at": datetime.now()
})
return f"Renamed: {validated_old} -> {validated_new}"

View File

@@ -0,0 +1,199 @@
from datetime import datetime
from typing import Any, Dict, List, Optional
import uuid
from .base import Tool
from application.core.mongo_db import MongoDB
from application.core.settings import settings
class NotesTool(Tool):
"""Notepad
Single note. Supports viewing, overwriting, string replacement.
"""
def __init__(self, tool_config: Optional[Dict[str, Any]] = None, user_id: Optional[str] = None) -> None:
"""Initialize the tool.
Args:
tool_config: Optional tool configuration. Should include:
- tool_id: Unique identifier for this notes tool instance (from user_tools._id)
This ensures each user's tool configuration has isolated notes
user_id: The authenticated user's id (should come from decoded_token["sub"]).
"""
self.user_id: Optional[str] = user_id
# Get tool_id from configuration (passed from user_tools._id in production)
# In production, tool_id is the MongoDB ObjectId string from user_tools collection
if tool_config and "tool_id" in tool_config:
self.tool_id = tool_config["tool_id"]
elif user_id:
# Fallback for backward compatibility or testing
self.tool_id = f"default_{user_id}"
else:
# Last resort fallback (shouldn't happen in normal use)
self.tool_id = str(uuid.uuid4())
db = MongoDB.get_client()[settings.MONGO_DB_NAME]
self.collection = db["notes"]
# -----------------------------
# Action implementations
# -----------------------------
def execute_action(self, action_name: str, **kwargs: Any) -> str:
"""Execute an action by name.
Args:
action_name: One of view, overwrite, str_replace, insert, delete.
**kwargs: Parameters for the action.
Returns:
A human-readable string result.
"""
if not self.user_id:
return "Error: NotesTool requires a valid user_id."
if action_name == "view":
return self._get_note()
if action_name == "overwrite":
return self._overwrite_note(kwargs.get("text", ""))
if action_name == "str_replace":
return self._str_replace(kwargs.get("old_str", ""), kwargs.get("new_str", ""))
if action_name == "insert":
return self._insert(kwargs.get("line_number", 1), kwargs.get("text", ""))
if action_name == "delete":
return self._delete_note()
return f"Unknown action: {action_name}"
def get_actions_metadata(self) -> List[Dict[str, Any]]:
"""Return JSON metadata describing supported actions for tool schemas."""
return [
{
"name": "view",
"description": "Retrieve the user's note.",
"parameters": {"type": "object", "properties": {}},
},
{
"name": "overwrite",
"description": "Replace the entire note content (creates if doesn't exist).",
"parameters": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "New note content."}
},
"required": ["text"],
},
},
{
"name": "str_replace",
"description": "Replace occurrences of old_str with new_str in the note.",
"parameters": {
"type": "object",
"properties": {
"old_str": {"type": "string", "description": "String to find."},
"new_str": {"type": "string", "description": "String to replace with."}
},
"required": ["old_str", "new_str"],
},
},
{
"name": "insert",
"description": "Insert text at the specified line number (1-indexed).",
"parameters": {
"type": "object",
"properties": {
"line_number": {"type": "integer", "description": "Line number to insert at (1-indexed)."},
"text": {"type": "string", "description": "Text to insert."}
},
"required": ["line_number", "text"],
},
},
{
"name": "delete",
"description": "Delete the user's note.",
"parameters": {"type": "object", "properties": {}},
},
]
def get_config_requirements(self) -> Dict[str, Any]:
"""Return configuration requirements (none for now)."""
return {}
# -----------------------------
# Internal helpers (single-note)
# -----------------------------
def _get_note(self) -> str:
doc = self.collection.find_one({"user_id": self.user_id, "tool_id": self.tool_id})
if not doc or not doc.get("note"):
return "No note found."
return str(doc["note"])
def _overwrite_note(self, content: str) -> str:
content = (content or "").strip()
if not content:
return "Note content required."
self.collection.update_one(
{"user_id": self.user_id, "tool_id": self.tool_id},
{"$set": {"note": content, "updated_at": datetime.utcnow()}},
upsert=True, # ✅ create if missing
)
return "Note saved."
def _str_replace(self, old_str: str, new_str: str) -> str:
if not old_str:
return "old_str is required."
doc = self.collection.find_one({"user_id": self.user_id, "tool_id": self.tool_id})
if not doc or not doc.get("note"):
return "No note found."
current_note = str(doc["note"])
# Case-insensitive search
if old_str.lower() not in current_note.lower():
return f"String '{old_str}' not found in note."
# Case-insensitive replacement
import re
updated_note = re.sub(re.escape(old_str), new_str, current_note, flags=re.IGNORECASE)
self.collection.update_one(
{"user_id": self.user_id, "tool_id": self.tool_id},
{"$set": {"note": updated_note, "updated_at": datetime.utcnow()}},
)
return "Note updated."
def _insert(self, line_number: int, text: str) -> str:
if not text:
return "Text is required."
doc = self.collection.find_one({"user_id": self.user_id, "tool_id": self.tool_id})
if not doc or not doc.get("note"):
return "No note found."
current_note = str(doc["note"])
lines = current_note.split("\n")
# Convert to 0-indexed and validate
index = line_number - 1
if index < 0 or index > len(lines):
return f"Invalid line number. Note has {len(lines)} lines."
lines.insert(index, text)
updated_note = "\n".join(lines)
self.collection.update_one(
{"user_id": self.user_id, "tool_id": self.tool_id},
{"$set": {"note": updated_note, "updated_at": datetime.utcnow()}},
)
return "Text inserted."
def _delete_note(self) -> str:
res = self.collection.delete_one({"user_id": self.user_id, "tool_id": self.tool_id})
return "Note deleted." if res.deleted_count else "No note found to delete."

View File

@@ -0,0 +1,321 @@
from datetime import datetime
from typing import Any, Dict, List, Optional
import uuid
from .base import Tool
from application.core.mongo_db import MongoDB
from application.core.settings import settings
class TodoListTool(Tool):
"""Todo List
Manages todo items for users. Supports creating, viewing, updating, and deleting todos.
"""
def __init__(self, tool_config: Optional[Dict[str, Any]] = None, user_id: Optional[str] = None) -> None:
"""Initialize the tool.
Args:
tool_config: Optional tool configuration. Should include:
- tool_id: Unique identifier for this todo list tool instance (from user_tools._id)
This ensures each user's tool configuration has isolated todos
user_id: The authenticated user's id (should come from decoded_token["sub"]).
"""
self.user_id: Optional[str] = user_id
# Get tool_id from configuration (passed from user_tools._id in production)
# In production, tool_id is the MongoDB ObjectId string from user_tools collection
if tool_config and "tool_id" in tool_config:
self.tool_id = tool_config["tool_id"]
elif user_id:
# Fallback for backward compatibility or testing
self.tool_id = f"default_{user_id}"
else:
# Last resort fallback (shouldn't happen in normal use)
self.tool_id = str(uuid.uuid4())
db = MongoDB.get_client()[settings.MONGO_DB_NAME]
self.collection = db["todos"]
# -----------------------------
# Action implementations
# -----------------------------
def execute_action(self, action_name: str, **kwargs: Any) -> str:
"""Execute an action by name.
Args:
action_name: One of list, create, get, update, complete, delete.
**kwargs: Parameters for the action.
Returns:
A human-readable string result.
"""
if not self.user_id:
return "Error: TodoListTool requires a valid user_id."
if action_name == "list":
return self._list()
if action_name == "create":
return self._create(kwargs.get("title", ""))
if action_name == "get":
return self._get(kwargs.get("todo_id"))
if action_name == "update":
return self._update(
kwargs.get("todo_id"),
kwargs.get("title", "")
)
if action_name == "complete":
return self._complete(kwargs.get("todo_id"))
if action_name == "delete":
return self._delete(kwargs.get("todo_id"))
return f"Unknown action: {action_name}"
def get_actions_metadata(self) -> List[Dict[str, Any]]:
"""Return JSON metadata describing supported actions for tool schemas."""
return [
{
"name": "list",
"description": "List all todos for the user.",
"parameters": {"type": "object", "properties": {}},
},
{
"name": "create",
"description": "Create a new todo item.",
"parameters": {
"type": "object",
"properties": {
"title": {
"type": "string",
"description": "Title of the todo item."
}
},
"required": ["title"],
},
},
{
"name": "get",
"description": "Get a specific todo by ID.",
"parameters": {
"type": "object",
"properties": {
"todo_id": {
"type": "integer",
"description": "The ID of the todo to retrieve."
}
},
"required": ["todo_id"],
},
},
{
"name": "update",
"description": "Update a todo's title by ID.",
"parameters": {
"type": "object",
"properties": {
"todo_id": {
"type": "integer",
"description": "The ID of the todo to update."
},
"title": {
"type": "string",
"description": "The new title for the todo."
}
},
"required": ["todo_id", "title"],
},
},
{
"name": "complete",
"description": "Mark a todo as completed.",
"parameters": {
"type": "object",
"properties": {
"todo_id": {
"type": "integer",
"description": "The ID of the todo to mark as completed."
}
},
"required": ["todo_id"],
},
},
{
"name": "delete",
"description": "Delete a specific todo by ID.",
"parameters": {
"type": "object",
"properties": {
"todo_id": {
"type": "integer",
"description": "The ID of the todo to delete."
}
},
"required": ["todo_id"],
},
},
]
def get_config_requirements(self) -> Dict[str, Any]:
"""Return configuration requirements."""
return {}
# -----------------------------
# Internal helpers
# -----------------------------
def _coerce_todo_id(self, value: Optional[Any]) -> Optional[int]:
"""Convert todo identifiers to sequential integers."""
if value is None:
return None
if isinstance(value, int):
return value if value > 0 else None
if isinstance(value, str):
stripped = value.strip()
if stripped.isdigit():
numeric_value = int(stripped)
return numeric_value if numeric_value > 0 else None
return None
def _get_next_todo_id(self) -> int:
"""Get the next sequential todo_id for this user and tool.
Returns a simple integer (1, 2, 3, ...) scoped to this user/tool.
With 5-10 todos max, scanning is negligible.
"""
# Find all todos for this user/tool and get their IDs
todos = list(self.collection.find(
{"user_id": self.user_id, "tool_id": self.tool_id},
{"todo_id": 1}
))
# Find the maximum todo_id
max_id = 0
for todo in todos:
todo_id = self._coerce_todo_id(todo.get("todo_id"))
if todo_id is not None:
max_id = max(max_id, todo_id)
return max_id + 1
def _list(self) -> str:
"""List all todos for the user."""
cursor = self.collection.find({"user_id": self.user_id, "tool_id": self.tool_id})
todos = list(cursor)
if not todos:
return "No todos found."
result_lines = ["Todos:"]
for doc in todos:
todo_id = doc.get("todo_id")
title = doc.get("title", "Untitled")
status = doc.get("status", "open")
line = f"[{todo_id}] {title} ({status})"
result_lines.append(line)
return "\n".join(result_lines)
def _create(self, title: str) -> str:
"""Create a new todo item."""
title = (title or "").strip()
if not title:
return "Error: Title is required."
now = datetime.now()
todo_id = self._get_next_todo_id()
doc = {
"todo_id": todo_id,
"user_id": self.user_id,
"tool_id": self.tool_id,
"title": title,
"status": "open",
"created_at": now,
"updated_at": now,
}
self.collection.insert_one(doc)
return f"Todo created with ID {todo_id}: {title}"
def _get(self, todo_id: Optional[Any]) -> str:
"""Get a specific todo by ID."""
parsed_todo_id = self._coerce_todo_id(todo_id)
if parsed_todo_id is None:
return "Error: todo_id must be a positive integer."
doc = self.collection.find_one({
"user_id": self.user_id,
"tool_id": self.tool_id,
"todo_id": parsed_todo_id
})
if not doc:
return f"Error: Todo with ID {parsed_todo_id} not found."
title = doc.get("title", "Untitled")
status = doc.get("status", "open")
result = f"Todo [{parsed_todo_id}]:\nTitle: {title}\nStatus: {status}"
return result
def _update(self, todo_id: Optional[Any], title: str) -> str:
"""Update a todo's title by ID."""
parsed_todo_id = self._coerce_todo_id(todo_id)
if parsed_todo_id is None:
return "Error: todo_id must be a positive integer."
title = (title or "").strip()
if not title:
return "Error: Title is required."
result = self.collection.update_one(
{"user_id": self.user_id, "tool_id": self.tool_id, "todo_id": parsed_todo_id},
{"$set": {"title": title, "updated_at": datetime.now()}}
)
if result.matched_count == 0:
return f"Error: Todo with ID {parsed_todo_id} not found."
return f"Todo {parsed_todo_id} updated to: {title}"
def _complete(self, todo_id: Optional[Any]) -> str:
"""Mark a todo as completed."""
parsed_todo_id = self._coerce_todo_id(todo_id)
if parsed_todo_id is None:
return "Error: todo_id must be a positive integer."
result = self.collection.update_one(
{"user_id": self.user_id, "tool_id": self.tool_id, "todo_id": parsed_todo_id},
{"$set": {"status": "completed", "updated_at": datetime.now()}}
)
if result.matched_count == 0:
return f"Error: Todo with ID {parsed_todo_id} not found."
return f"Todo {parsed_todo_id} marked as completed."
def _delete(self, todo_id: Optional[Any]) -> str:
"""Delete a specific todo by ID."""
parsed_todo_id = self._coerce_todo_id(todo_id)
if parsed_todo_id is None:
return "Error: todo_id must be a positive integer."
result = self.collection.delete_one({
"user_id": self.user_id,
"tool_id": self.tool_id,
"todo_id": parsed_todo_id
})
if result.deleted_count == 0:
return f"Error: Todo with ID {parsed_todo_id} not found."
return f"Todo {parsed_todo_id} deleted."

View File

@@ -20,20 +20,24 @@ class ToolActionParser:
try:
call_args = json.loads(call.arguments)
tool_parts = call.name.split("_")
# If the tool name doesn't contain an underscore, it's likely a hallucinated tool
if len(tool_parts) < 2:
logger.warning(f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id")
logger.warning(
f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id"
)
return None, None, None
tool_id = tool_parts[-1]
action_name = "_".join(tool_parts[:-1])
# Validate that tool_id looks like a numerical ID
if not tool_id.isdigit():
logger.warning(f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call.")
except (AttributeError, TypeError) as e:
logger.warning(
f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call."
)
except (AttributeError, TypeError, json.JSONDecodeError) as e:
logger.error(f"Error parsing OpenAI LLM call: {e}")
return None, None, None
return tool_id, action_name, call_args
@@ -42,19 +46,23 @@ class ToolActionParser:
try:
call_args = call.arguments
tool_parts = call.name.split("_")
# If the tool name doesn't contain an underscore, it's likely a hallucinated tool
if len(tool_parts) < 2:
logger.warning(f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id")
logger.warning(
f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id"
)
return None, None, None
tool_id = tool_parts[-1]
action_name = "_".join(tool_parts[:-1])
# Validate that tool_id looks like a numerical ID
if not tool_id.isdigit():
logger.warning(f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call.")
logger.warning(
f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call."
)
except (AttributeError, TypeError) as e:
logger.error(f"Error parsing Google LLM call: {e}")
return None, None, None

View File

@@ -23,16 +23,23 @@ class ToolManager:
tool_config = self.config.get(name, {})
self.tools[name] = obj(tool_config)
def load_tool(self, tool_name, tool_config):
def load_tool(self, tool_name, tool_config, user_id=None):
self.config[tool_name] = tool_config
module = importlib.import_module(f"application.agents.tools.{tool_name}")
for member_name, obj in inspect.getmembers(module, inspect.isclass):
if issubclass(obj, Tool) and obj is not Tool:
return obj(tool_config)
if tool_name in {"mcp_tool", "notes", "memory", "todo_list"} and user_id:
return obj(tool_config, user_id)
else:
return obj(tool_config)
def execute_action(self, tool_name, action_name, **kwargs):
def execute_action(self, tool_name, action_name, user_id=None, **kwargs):
if tool_name not in self.tools:
raise ValueError(f"Tool '{tool_name}' not loaded")
if tool_name in {"mcp_tool", "memory", "todo_list"} and user_id:
tool_config = self.config.get(tool_name, {})
tool = self.load_tool(tool_name, tool_config, user_id)
return tool.execute_action(action_name, **kwargs)
return self.tools[tool_name].execute_action(action_name, **kwargs)
def get_all_actions_metadata(self):

View File

@@ -54,6 +54,10 @@ class AnswerResource(Resource, BaseAnswerResource):
default=True,
description="Whether to save the conversation",
),
"passthrough": fields.Raw(
required=False,
description="Dynamic parameters to inject into prompt template",
),
},
)
@@ -69,13 +73,24 @@ class AnswerResource(Resource, BaseAnswerResource):
processor.initialize()
if not processor.decoded_token:
return make_response({"error": "Unauthorized"}, 401)
agent = processor.create_agent()
retriever = processor.create_retriever()
docs_together, docs_list = processor.pre_fetch_docs(
data.get("question", "")
)
tools_data = processor.pre_fetch_tools()
agent = processor.create_agent(
docs_together=docs_together,
docs=docs_list,
tools_data=tools_data,
)
if error := self.check_usage(processor.agent_config):
return error
stream = self.complete_stream(
question=data["question"],
agent=agent,
retriever=retriever,
conversation_id=processor.conversation_id,
user_api_key=processor.agent_config.get("user_api_key"),
decoded_token=processor.decoded_token,

View File

@@ -3,7 +3,7 @@ import json
import logging
from typing import Any, Dict, Generator, List, Optional
from flask import Response
from flask import jsonify, make_response, Response
from flask_restx import Namespace
from application.api.answer.services.conversation_service import ConversationService
@@ -25,6 +25,7 @@ class BaseAnswerResource:
def __init__(self):
mongo = MongoDB.get_client()
db = mongo[settings.MONGO_DB_NAME]
self.db = db
self.user_logs_collection = db["user_logs"]
self.gpt_model = get_gpt_model()
self.conversation_service = ConversationService()
@@ -40,11 +41,110 @@ class BaseAnswerResource:
return missing_fields
return None
def check_usage(self, agent_config: Dict) -> Optional[Response]:
"""Check if there is a usage limit and if it is exceeded
Args:
agent_config: The config dict of agent instance
Returns:
None or Response if either of limits exceeded.
"""
api_key = agent_config.get("user_api_key")
if not api_key:
return None
agents_collection = self.db["agents"]
agent = agents_collection.find_one({"key": api_key})
if not agent:
return make_response(
jsonify({"success": False, "message": "Invalid API key."}), 401
)
limited_token_mode_raw = agent.get("limited_token_mode", False)
limited_request_mode_raw = agent.get("limited_request_mode", False)
limited_token_mode = (
limited_token_mode_raw
if isinstance(limited_token_mode_raw, bool)
else limited_token_mode_raw == "True"
)
limited_request_mode = (
limited_request_mode_raw
if isinstance(limited_request_mode_raw, bool)
else limited_request_mode_raw == "True"
)
token_limit = int(
agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"])
)
request_limit = int(
agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"])
)
token_usage_collection = self.db["token_usage"]
end_date = datetime.datetime.now()
start_date = end_date - datetime.timedelta(hours=24)
match_query = {
"timestamp": {"$gte": start_date, "$lte": end_date},
"api_key": api_key,
}
if limited_token_mode:
token_pipeline = [
{"$match": match_query},
{
"$group": {
"_id": None,
"total_tokens": {
"$sum": {"$add": ["$prompt_tokens", "$generated_tokens"]}
},
}
},
]
token_result = list(token_usage_collection.aggregate(token_pipeline))
daily_token_usage = token_result[0]["total_tokens"] if token_result else 0
else:
daily_token_usage = 0
if limited_request_mode:
daily_request_usage = token_usage_collection.count_documents(match_query)
else:
daily_request_usage = 0
if not limited_token_mode and not limited_request_mode:
return None
token_exceeded = (
limited_token_mode and token_limit > 0 and daily_token_usage >= token_limit
)
request_exceeded = (
limited_request_mode
and request_limit > 0
and daily_request_usage >= request_limit
)
if token_exceeded or request_exceeded:
return make_response(
jsonify(
{
"success": False,
"message": "Exceeding usage limit, please try again later.",
}
),
429,
)
return None
def complete_stream(
self,
question: str,
agent: Any,
retriever: Any,
conversation_id: Optional[str],
user_api_key: Optional[str],
decoded_token: Dict[str, Any],
@@ -73,6 +173,7 @@ class BaseAnswerResource:
agent_id: ID of agent used
is_shared_usage: Flag for shared agent usage
shared_token: Token for shared agent
retrieved_docs: Pre-fetched documents for sources (optional)
Yields:
Server-sent event strings
@@ -83,7 +184,7 @@ class BaseAnswerResource:
schema_info = None
structured_chunks = []
for line in agent.gen(query=question, retriever=retriever):
for line in agent.gen(query=question):
if "answer" in line:
response_full += str(line["answer"])
if line.get("structured"):
@@ -110,6 +211,8 @@ class BaseAnswerResource:
yield f"data: {data}\n\n"
elif "tool_calls" in line:
tool_calls = line["tool_calls"]
data = json.dumps({"type": "tool_calls", "tool_calls": tool_calls})
yield f"data: {data}\n\n"
elif "thought" in line:
thought += line["thought"]
data = json.dumps({"type": "thought", "thought": line["thought"]})
@@ -162,7 +265,6 @@ class BaseAnswerResource:
data = json.dumps(id_data)
yield f"data: {data}\n\n"
retriever_params = retriever.get_params()
log_data = {
"action": "stream_answer",
"level": "info",
@@ -171,7 +273,6 @@ class BaseAnswerResource:
"question": question,
"response": response_full,
"sources": source_log_docs,
"retriever_params": retriever_params,
"attachments": attachment_ids,
"timestamp": datetime.datetime.now(datetime.timezone.utc),
}
@@ -179,18 +280,52 @@ class BaseAnswerResource:
log_data["structured_output"] = True
if schema_info:
log_data["schema"] = schema_info
# clean up text fields to be no longer than 10000 characters
# Clean up text fields to be no longer than 10000 characters
for key, value in log_data.items():
if isinstance(value, str) and len(value) > 10000:
log_data[key] = value[:10000]
self.user_logs_collection.insert_one(log_data)
# End of stream
self.user_logs_collection.insert_one(log_data)
data = json.dumps({"type": "end"})
yield f"data: {data}\n\n"
except GeneratorExit:
logger.info(f"Stream aborted by client for question: {question[:50]}... ")
# Save partial response
if should_save_conversation and response_full:
try:
if isNoneDoc:
for doc in source_log_docs:
doc["source"] = "None"
llm = LLMCreator.create_llm(
settings.LLM_PROVIDER,
api_key=settings.API_KEY,
user_api_key=user_api_key,
decoded_token=decoded_token,
)
self.conversation_service.save_conversation(
conversation_id,
question,
response_full,
thought,
source_log_docs,
tool_calls,
llm,
self.gpt_model,
decoded_token,
index=index,
api_key=user_api_key,
agent_id=agent_id,
is_shared_usage=is_shared_usage,
shared_token=shared_token,
attachment_ids=attachment_ids,
)
except Exception as e:
logger.error(
f"Error saving partial response: {str(e)}", exc_info=True
)
raise
except Exception as e:
logger.error(f"Error in stream: {str(e)}", exc_info=True)
data = json.dumps(

View File

@@ -60,6 +60,10 @@ class StreamResource(Resource, BaseAnswerResource):
"attachments": fields.List(
fields.String, required=False, description="List of attachment IDs"
),
"passthrough": fields.Raw(
required=False,
description="Dynamic parameters to inject into prompt template",
),
},
)
@@ -73,14 +77,20 @@ class StreamResource(Resource, BaseAnswerResource):
processor = StreamProcessor(data, decoded_token)
try:
processor.initialize()
agent = processor.create_agent()
retriever = processor.create_retriever()
docs_together, docs_list = processor.pre_fetch_docs(data["question"])
tools_data = processor.pre_fetch_tools()
agent = processor.create_agent(
docs_together=docs_together, docs=docs_list, tools_data=tools_data
)
if error := self.check_usage(processor.agent_config):
return error
return Response(
self.complete_stream(
question=data["question"],
agent=agent,
retriever=retriever,
conversation_id=processor.conversation_id,
user_api_key=processor.agent_config.get("user_api_key"),
decoded_token=processor.decoded_token,

View File

@@ -133,10 +133,9 @@ class ConversationService:
messages_summary = [
{
"role": "assistant",
"content": "Summarise following conversation in no more than 3 "
"words, respond ONLY with the summary, use the same "
"language as the user query",
"role": "system",
"content": "You are a helpful assistant that creates concise conversation titles. "
"Summarize conversations in 3 words or less using the same language as the user.",
},
{
"role": "user",

View File

@@ -0,0 +1,97 @@
import logging
from typing import Any, Dict, Optional
from application.templates.namespaces import NamespaceManager
from application.templates.template_engine import TemplateEngine, TemplateRenderError
logger = logging.getLogger(__name__)
class PromptRenderer:
"""Service for rendering prompts with dynamic context using namespaces"""
def __init__(self):
self.template_engine = TemplateEngine()
self.namespace_manager = NamespaceManager()
def render_prompt(
self,
prompt_content: str,
user_id: Optional[str] = None,
request_id: Optional[str] = None,
passthrough_data: Optional[Dict[str, Any]] = None,
docs: Optional[list] = None,
docs_together: Optional[str] = None,
tools_data: Optional[Dict[str, Any]] = None,
**kwargs,
) -> str:
"""
Render prompt with full context from all namespaces.
Args:
prompt_content: Raw prompt template string
user_id: Current user identifier
request_id: Unique request identifier
passthrough_data: Parameters from web request
docs: RAG retrieved documents
docs_together: Concatenated document content
tools_data: Pre-fetched tool results organized by tool name
**kwargs: Additional parameters for namespace builders
Returns:
Rendered prompt string with all variables substituted
Raises:
TemplateRenderError: If template rendering fails
"""
if not prompt_content:
return ""
uses_template = self._uses_template_syntax(prompt_content)
if not uses_template:
return self._apply_legacy_substitutions(prompt_content, docs_together)
try:
context = self.namespace_manager.build_context(
user_id=user_id,
request_id=request_id,
passthrough_data=passthrough_data,
docs=docs,
docs_together=docs_together,
tools_data=tools_data,
**kwargs,
)
return self.template_engine.render(prompt_content, context)
except TemplateRenderError:
raise
except Exception as e:
error_msg = f"Prompt rendering failed: {str(e)}"
logger.error(error_msg)
raise TemplateRenderError(error_msg) from e
def _uses_template_syntax(self, prompt_content: str) -> bool:
"""Check if prompt uses Jinja2 template syntax"""
return "{{" in prompt_content and "}}" in prompt_content
def _apply_legacy_substitutions(
self, prompt_content: str, docs_together: Optional[str] = None
) -> str:
"""
Apply backward-compatible substitutions for old prompt format.
Handles legacy {summaries} and {query} placeholders during transition period.
"""
if docs_together:
prompt_content = prompt_content.replace("{summaries}", docs_together)
return prompt_content
def validate_template(self, prompt_content: str) -> bool:
"""Validate prompt template syntax"""
return self.template_engine.validate_template(prompt_content)
def extract_variables(self, prompt_content: str) -> set[str]:
"""Extract all variable names from prompt template"""
return self.template_engine.extract_variables(prompt_content)

View File

@@ -3,7 +3,7 @@ import json
import logging
import os
from pathlib import Path
from typing import Any, Dict, Optional
from typing import Any, Dict, Optional, Set
from bson.dbref import DBRef
@@ -11,10 +11,15 @@ from bson.objectid import ObjectId
from application.agents.agent_creator import AgentCreator
from application.api.answer.services.conversation_service import ConversationService
from application.api.answer.services.prompt_renderer import PromptRenderer
from application.core.mongo_db import MongoDB
from application.core.settings import settings
from application.retriever.retriever_creator import RetrieverCreator
from application.utils import get_gpt_model, limit_chat_history
from application.utils import (
calculate_doc_token_budget,
get_gpt_model,
limit_chat_history,
)
logger = logging.getLogger(__name__)
@@ -69,22 +74,25 @@ class StreamProcessor:
self.decoded_token.get("sub") if self.decoded_token is not None else None
)
self.conversation_id = self.data.get("conversation_id")
self.source = (
{"active_docs": self.data["active_docs"]}
if "active_docs" in self.data
else {}
)
self.source = {}
self.all_sources = []
self.attachments = []
self.history = []
self.retrieved_docs = []
self.agent_config = {}
self.retriever_config = {}
self.is_shared_usage = False
self.shared_token = None
self.gpt_model = get_gpt_model()
self.conversation_service = ConversationService()
self.prompt_renderer = PromptRenderer()
self._prompt_content: Optional[str] = None
self._required_tool_actions: Optional[Dict[str, Set[Optional[str]]]] = None
def initialize(self):
"""Initialize all required components for processing"""
self._configure_agent()
self._configure_source()
self._configure_retriever()
self._configure_agent()
self._load_conversation_history()
@@ -171,13 +179,77 @@ class StreamProcessor:
source = data.get("source")
if isinstance(source, DBRef):
source_doc = self.db.dereference(source)
data["source"] = str(source_doc["_id"])
data["retriever"] = source_doc.get("retriever", data.get("retriever"))
data["chunks"] = source_doc.get("chunks", data.get("chunks"))
if source_doc:
data["source"] = str(source_doc["_id"])
data["retriever"] = source_doc.get("retriever", data.get("retriever"))
data["chunks"] = source_doc.get("chunks", data.get("chunks"))
else:
data["source"] = None
elif source == "default":
data["source"] = "default"
else:
data["source"] = None
# Handle multiple sources
sources = data.get("sources", [])
if sources and isinstance(sources, list):
sources_list = []
for i, source_ref in enumerate(sources):
if source_ref == "default":
processed_source = {
"id": "default",
"retriever": "classic",
"chunks": data.get("chunks", "2"),
}
sources_list.append(processed_source)
elif isinstance(source_ref, DBRef):
source_doc = self.db.dereference(source_ref)
if source_doc:
processed_source = {
"id": str(source_doc["_id"]),
"retriever": source_doc.get("retriever", "classic"),
"chunks": source_doc.get("chunks", data.get("chunks", "2")),
}
sources_list.append(processed_source)
data["sources"] = sources_list
else:
data["sources"] = []
return data
def _configure_source(self):
"""Configure the source based on agent data"""
api_key = self.data.get("api_key") or self.agent_key
if api_key:
agent_data = self._get_data_from_api_key(api_key)
if agent_data.get("sources") and len(agent_data["sources"]) > 0:
source_ids = [
source["id"] for source in agent_data["sources"] if source.get("id")
]
if source_ids:
self.source = {"active_docs": source_ids}
else:
self.source = {}
self.all_sources = agent_data["sources"]
elif agent_data.get("source"):
self.source = {"active_docs": agent_data["source"]}
self.all_sources = [
{
"id": agent_data["source"],
"retriever": agent_data.get("retriever", "classic"),
}
]
else:
self.source = {}
self.all_sources = []
return
if "active_docs" in self.data:
self.source = {"active_docs": self.data["active_docs"]}
return
self.source = {}
self.all_sources = []
def _configure_agent(self):
"""Configure the agent based on request data"""
agent_id = self.data.get("agent_id")
@@ -203,7 +275,13 @@ class StreamProcessor:
if data_key.get("retriever"):
self.retriever_config["retriever_name"] = data_key["retriever"]
if data_key.get("chunks") is not None:
self.retriever_config["chunks"] = data_key["chunks"]
try:
self.retriever_config["chunks"] = int(data_key["chunks"])
except (ValueError, TypeError):
logger.warning(
f"Invalid chunks value: {data_key['chunks']}, using default value 2"
)
self.retriever_config["chunks"] = 2
elif self.agent_key:
data_key = self._get_data_from_api_key(self.agent_key)
self.agent_config.update(
@@ -224,7 +302,13 @@ class StreamProcessor:
if data_key.get("retriever"):
self.retriever_config["retriever_name"] = data_key["retriever"]
if data_key.get("chunks") is not None:
self.retriever_config["chunks"] = data_key["chunks"]
try:
self.retriever_config["chunks"] = int(data_key["chunks"])
except (ValueError, TypeError):
logger.warning(
f"Invalid chunks value: {data_key['chunks']}, using default value 2"
)
self.retriever_config["chunks"] = 2
else:
self.agent_config.update(
{
@@ -236,18 +320,312 @@ class StreamProcessor:
)
def _configure_retriever(self):
"""Configure the retriever based on request data"""
history_token_limit = int(self.data.get("token_limit", 2000))
doc_token_limit = calculate_doc_token_budget(
gpt_model=self.gpt_model, history_token_limit=history_token_limit
)
self.retriever_config = {
"retriever_name": self.data.get("retriever", "classic"),
"chunks": int(self.data.get("chunks", 2)),
"token_limit": self.data.get("token_limit", settings.DEFAULT_MAX_HISTORY),
"doc_token_limit": doc_token_limit,
"history_token_limit": history_token_limit,
}
if "isNoneDoc" in self.data and self.data["isNoneDoc"]:
api_key = self.data.get("api_key") or self.agent_key
if not api_key and "isNoneDoc" in self.data and self.data["isNoneDoc"]:
self.retriever_config["chunks"] = 0
def create_agent(self):
"""Create and return the configured agent"""
def create_retriever(self):
return RetrieverCreator.create_retriever(
self.retriever_config["retriever_name"],
source=self.source,
chat_history=self.history,
prompt=get_prompt(self.agent_config["prompt_id"], self.prompts_collection),
chunks=self.retriever_config["chunks"],
doc_token_limit=self.retriever_config.get("doc_token_limit", 50000),
gpt_model=self.gpt_model,
user_api_key=self.agent_config["user_api_key"],
decoded_token=self.decoded_token,
)
def pre_fetch_docs(self, question: str) -> tuple[Optional[str], Optional[list]]:
"""Pre-fetch documents for template rendering before agent creation"""
if self.data.get("isNoneDoc", False):
logger.info("Pre-fetch skipped: isNoneDoc=True")
return None, None
try:
retriever = self.create_retriever()
logger.info(
f"Pre-fetching docs with chunks={retriever.chunks}, doc_token_limit={retriever.doc_token_limit}"
)
docs = retriever.search(question)
logger.info(f"Pre-fetch retrieved {len(docs) if docs else 0} documents")
if not docs:
logger.info("Pre-fetch: No documents returned from search")
return None, None
self.retrieved_docs = docs
docs_with_filenames = []
for doc in docs:
filename = doc.get("filename") or doc.get("title") or doc.get("source")
if filename:
chunk_header = str(filename)
docs_with_filenames.append(f"{chunk_header}\n{doc['text']}")
else:
docs_with_filenames.append(doc["text"])
docs_together = "\n\n".join(docs_with_filenames)
logger.info(f"Pre-fetch docs_together size: {len(docs_together)} chars")
return docs_together, docs
except Exception as e:
logger.error(f"Failed to pre-fetch docs: {str(e)}", exc_info=True)
return None, None
def pre_fetch_tools(self) -> Optional[Dict[str, Any]]:
"""Pre-fetch tool data for template rendering before agent creation
Can be controlled via:
1. Global setting: ENABLE_TOOL_PREFETCH in .env
2. Per-request: disable_tool_prefetch in request data
"""
if not settings.ENABLE_TOOL_PREFETCH:
logger.info(
"Tool pre-fetching disabled globally via ENABLE_TOOL_PREFETCH setting"
)
return None
if self.data.get("disable_tool_prefetch", False):
logger.info("Tool pre-fetching disabled for this request")
return None
required_tool_actions = self._get_required_tool_actions()
filtering_enabled = required_tool_actions is not None
try:
user_tools_collection = self.db["user_tools"]
user_id = self.initial_user_id or "local"
user_tools = list(
user_tools_collection.find({"user": user_id, "status": True})
)
if not user_tools:
return None
tools_data = {}
for tool_doc in user_tools:
tool_name = tool_doc.get("name")
tool_id = str(tool_doc.get("_id"))
if filtering_enabled:
required_actions_by_name = required_tool_actions.get(
tool_name, set()
)
required_actions_by_id = required_tool_actions.get(tool_id, set())
required_actions = required_actions_by_name | required_actions_by_id
if not required_actions:
continue
else:
required_actions = None
tool_data = self._fetch_tool_data(tool_doc, required_actions)
if tool_data:
tools_data[tool_name] = tool_data
tools_data[tool_id] = tool_data
return tools_data if tools_data else None
except Exception as e:
logger.warning(f"Failed to pre-fetch tools: {type(e).__name__}")
return None
def _fetch_tool_data(
self,
tool_doc: Dict[str, Any],
required_actions: Optional[Set[Optional[str]]],
) -> Optional[Dict[str, Any]]:
"""Fetch and execute tool actions with saved parameters"""
try:
from application.agents.tools.tool_manager import ToolManager
tool_name = tool_doc.get("name")
tool_config = tool_doc.get("config", {}).copy()
tool_config["tool_id"] = str(tool_doc["_id"])
tool_manager = ToolManager(config={tool_name: tool_config})
user_id = self.initial_user_id or "local"
tool = tool_manager.load_tool(tool_name, tool_config, user_id=user_id)
if not tool:
logger.debug(f"Tool '{tool_name}' failed to load")
return None
tool_actions = tool.get_actions_metadata()
if not tool_actions:
logger.debug(f"Tool '{tool_name}' has no actions")
return None
saved_actions = tool_doc.get("actions", [])
include_all_actions = required_actions is None or (
required_actions and None in required_actions
)
allowed_actions: Set[str] = (
{action for action in required_actions if isinstance(action, str)}
if required_actions
else set()
)
action_results = {}
for action_meta in tool_actions:
action_name = action_meta.get("name")
if action_name is None:
continue
if (
not include_all_actions
and allowed_actions
and action_name not in allowed_actions
):
continue
try:
saved_action = None
for sa in saved_actions:
if sa.get("name") == action_name:
saved_action = sa
break
action_params = action_meta.get("parameters", {})
properties = action_params.get("properties", {})
kwargs = {}
for param_name, param_spec in properties.items():
if saved_action:
saved_props = saved_action.get("parameters", {}).get(
"properties", {}
)
if param_name in saved_props:
param_value = saved_props[param_name].get("value")
if param_value is not None:
kwargs[param_name] = param_value
continue
if param_name in tool_config:
kwargs[param_name] = tool_config[param_name]
elif "default" in param_spec:
kwargs[param_name] = param_spec["default"]
result = tool.execute_action(action_name, **kwargs)
action_results[action_name] = result
except Exception as e:
logger.debug(
f"Action '{action_name}' execution failed: {type(e).__name__}"
)
continue
return action_results if action_results else None
except Exception as e:
logger.debug(f"Tool pre-fetch failed for '{tool_name}': {type(e).__name__}")
return None
def _get_prompt_content(self) -> Optional[str]:
"""Retrieve and cache the raw prompt content for the current agent configuration."""
if self._prompt_content is not None:
return self._prompt_content
prompt_id = (
self.agent_config.get("prompt_id")
if isinstance(self.agent_config, dict)
else None
)
if not prompt_id:
return None
try:
self._prompt_content = get_prompt(prompt_id, self.prompts_collection)
except ValueError as e:
logger.debug(f"Invalid prompt ID '{prompt_id}': {str(e)}")
self._prompt_content = None
except Exception as e:
logger.debug(f"Failed to fetch prompt '{prompt_id}': {type(e).__name__}")
self._prompt_content = None
return self._prompt_content
def _get_required_tool_actions(self) -> Optional[Dict[str, Set[Optional[str]]]]:
"""Determine which tool actions are referenced in the prompt template"""
if self._required_tool_actions is not None:
return self._required_tool_actions
prompt_content = self._get_prompt_content()
if prompt_content is None:
return None
if "{{" not in prompt_content or "}}" not in prompt_content:
self._required_tool_actions = {}
return self._required_tool_actions
try:
from application.templates.template_engine import TemplateEngine
template_engine = TemplateEngine()
usages = template_engine.extract_tool_usages(prompt_content)
self._required_tool_actions = usages
return self._required_tool_actions
except Exception as e:
logger.debug(f"Failed to extract tool usages: {type(e).__name__}")
self._required_tool_actions = {}
return self._required_tool_actions
def _fetch_memory_tool_data(
self, tool_doc: Dict[str, Any]
) -> Optional[Dict[str, Any]]:
"""Fetch memory tool data for pre-injection into prompt"""
try:
tool_config = tool_doc.get("config", {}).copy()
tool_config["tool_id"] = str(tool_doc["_id"])
from application.agents.tools.memory import MemoryTool
memory_tool = MemoryTool(tool_config, self.initial_user_id)
root_view = memory_tool.execute_action("view", path="/")
if "Error:" in root_view or not root_view.strip():
return None
return {"root": root_view, "available": True}
except Exception as e:
logger.warning(f"Failed to fetch memory tool data: {str(e)}")
return None
def create_agent(
self,
docs_together: Optional[str] = None,
docs: Optional[list] = None,
tools_data: Optional[Dict[str, Any]] = None,
):
"""Create and return the configured agent with rendered prompt"""
raw_prompt = self._get_prompt_content()
if raw_prompt is None:
raw_prompt = get_prompt(
self.agent_config["prompt_id"], self.prompts_collection
)
self._prompt_content = raw_prompt
rendered_prompt = self.prompt_renderer.render_prompt(
prompt_content=raw_prompt,
user_id=self.initial_user_id,
request_id=self.data.get("request_id"),
passthrough_data=self.data.get("passthrough"),
docs=docs,
docs_together=docs_together,
tools_data=tools_data,
)
return AgentCreator.create_agent(
self.agent_config["agent_type"],
endpoint="stream",
@@ -255,23 +633,10 @@ class StreamProcessor:
gpt_model=self.gpt_model,
api_key=settings.API_KEY,
user_api_key=self.agent_config["user_api_key"],
prompt=get_prompt(self.agent_config["prompt_id"], self.prompts_collection),
prompt=rendered_prompt,
chat_history=self.history,
retrieved_docs=self.retrieved_docs,
decoded_token=self.decoded_token,
attachments=self.attachments,
json_schema=self.agent_config.get("json_schema"),
)
def create_retriever(self):
"""Create and return the configured retriever"""
return RetrieverCreator.create_retriever(
self.retriever_config["retriever_name"],
source=self.source,
chat_history=self.history,
prompt=get_prompt(self.agent_config["prompt_id"], self.prompts_collection),
chunks=self.retriever_config["chunks"],
token_limit=self.retriever_config["token_limit"],
gpt_model=self.gpt_model,
user_api_key=self.agent_config["user_api_key"],
decoded_token=self.decoded_token,
)

View File

@@ -1,6 +1,7 @@
import base64
import datetime
import json
import logging
import uuid
from bson.objectid import ObjectId
@@ -14,8 +15,6 @@ from flask import (
from flask_restx import fields, Namespace, Resource
from application.api.user.tasks import (
ingest_connector_task,
)
@@ -24,15 +23,9 @@ from application.core.settings import settings
from application.api import api
from application.utils import (
check_required_fields
)
from application.parser.connectors.connector_creator import ConnectorCreator
mongo = MongoDB.get_client()
db = mongo[settings.MONGO_DB_NAME]
sources_collection = db["sources"]
@@ -44,185 +37,6 @@ api.add_namespace(connectors_ns)
@connectors_ns.route("/api/connectors/upload")
class UploadConnector(Resource):
@api.expect(
api.model(
"ConnectorUploadModel",
{
"user": fields.String(required=True, description="User ID"),
"source": fields.String(
required=True, description="Source type (google_drive, github, etc.)"
),
"name": fields.String(required=True, description="Job name"),
"data": fields.String(required=True, description="Configuration data"),
"repo_url": fields.String(description="GitHub repository URL"),
},
)
)
@api.doc(
description="Uploads connector source for vectorization",
)
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
data = request.form
required_fields = ["user", "source", "name", "data"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
config = json.loads(data["data"])
source_data = None
sync_frequency = config.get("sync_frequency", "never")
if data["source"] == "github":
source_data = config.get("repo_url")
elif data["source"] in ["crawler", "url"]:
source_data = config.get("url")
elif data["source"] == "reddit":
source_data = config
elif data["source"] in ConnectorCreator.get_supported_connectors():
session_token = config.get("session_token")
if not session_token:
return make_response(jsonify({
"success": False,
"error": f"Missing session_token in {data['source']} configuration"
}), 400)
file_ids = config.get("file_ids", [])
if isinstance(file_ids, str):
file_ids = [id.strip() for id in file_ids.split(',') if id.strip()]
elif not isinstance(file_ids, list):
file_ids = []
folder_ids = config.get("folder_ids", [])
if isinstance(folder_ids, str):
folder_ids = [id.strip() for id in folder_ids.split(',') if id.strip()]
elif not isinstance(folder_ids, list):
folder_ids = []
config["file_ids"] = file_ids
config["folder_ids"] = folder_ids
task = ingest_connector_task.delay(
job_name=data["name"],
user=decoded_token.get("sub"),
source_type=data["source"],
session_token=session_token,
file_ids=file_ids,
folder_ids=folder_ids,
recursive=config.get("recursive", False),
retriever=config.get("retriever", "classic"),
sync_frequency=sync_frequency
)
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
task = ingest_connector_task.delay(
source_data=source_data,
job_name=data["name"],
user=decoded_token.get("sub"),
loader=data["source"],
sync_frequency=sync_frequency
)
except Exception as err:
current_app.logger.error(
f"Error uploading connector source: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
@connectors_ns.route("/api/connectors/task_status")
class ConnectorTaskStatus(Resource):
task_status_model = api.model(
"ConnectorTaskStatusModel",
{"task_id": fields.String(required=True, description="Task ID")},
)
@api.expect(task_status_model)
@api.doc(description="Get connector task status")
def get(self):
task_id = request.args.get("task_id")
if not task_id:
return make_response(
jsonify({"success": False, "message": "Task ID is required"}), 400
)
try:
from application.celery_init import celery
task = celery.AsyncResult(task_id)
task_meta = task.info
print(f"Task status: {task.status}")
if not isinstance(
task_meta, (dict, list, str, int, float, bool, type(None))
):
task_meta = str(task_meta)
except Exception as err:
current_app.logger.error(f"Error getting task status: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"status": task.status, "result": task_meta}), 200)
@connectors_ns.route("/api/connectors/sources")
class ConnectorSources(Resource):
@api.doc(description="Get connector sources")
def get(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
try:
sources = sources_collection.find({"user": user, "type": "connector"}).sort("date", -1)
connector_sources = []
for source in sources:
connector_sources.append({
"id": str(source["_id"]),
"name": source.get("name"),
"date": source.get("date"),
"type": source.get("type"),
"source": source.get("source"),
"tokens": source.get("tokens", ""),
"retriever": source.get("retriever", "classic"),
"syncFrequency": source.get("sync_frequency", ""),
})
except Exception as err:
current_app.logger.error(f"Error retrieving connector sources: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify(connector_sources), 200)
@connectors_ns.route("/api/connectors/delete")
class DeleteConnectorSource(Resource):
@api.doc(
description="Delete a connector source",
params={"source_id": "The source ID to delete"},
)
def delete(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
source_id = request.args.get("source_id")
if not source_id:
return make_response(
jsonify({"success": False, "message": "source_id is required"}), 400
)
try:
result = sources_collection.delete_one(
{"_id": ObjectId(source_id), "user": decoded_token.get("sub")}
)
if result.deleted_count == 0:
return make_response(
jsonify({"success": False, "message": "Source not found"}), 404
)
except Exception as err:
current_app.logger.error(
f"Error deleting connector source: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)
@connectors_ns.route("/api/connectors/auth")
class ConnectorAuth(Resource):
@api.doc(description="Get connector OAuth authorization URL", params={"provider": "Connector provider (e.g., google_drive)"})
@@ -235,8 +49,24 @@ class ConnectorAuth(Resource):
if not ConnectorCreator.is_supported(provider):
return make_response(jsonify({"success": False, "error": f"Unsupported provider: {provider}"}), 400)
import uuid
state = str(uuid.uuid4())
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
user_id = decoded_token.get('sub')
now = datetime.datetime.now(datetime.timezone.utc)
result = sessions_collection.insert_one({
"provider": provider,
"user": user_id,
"status": "pending",
"created_at": now
})
state_dict = {
"provider": provider,
"object_id": str(result.inserted_id)
}
state = base64.urlsafe_b64encode(json.dumps(state_dict).encode()).decode()
auth = ConnectorCreator.create_auth(provider)
authorization_url = auth.get_authorization_url(state=state)
return make_response(jsonify({
@@ -257,25 +87,30 @@ class ConnectorsCallback(Resource):
try:
from application.parser.connectors.connector_creator import ConnectorCreator
from flask import request, redirect
import uuid
provider = request.args.get('provider', 'google_drive')
authorization_code = request.args.get('code')
_ = request.args.get('state')
state = request.args.get('state')
error = request.args.get('error')
state_dict = json.loads(base64.urlsafe_b64decode(state.encode()).decode())
provider = state_dict["provider"]
state_object_id = state_dict["object_id"]
if error:
return redirect(f"/api/connectors/callback-status?status=error&message=OAuth+error:+{error}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.&provider={provider}")
if error == "access_denied":
return redirect(f"/api/connectors/callback-status?status=cancelled&message=Authentication+was+cancelled.+You+can+try+again+if+you'd+like+to+connect+your+account.&provider={provider}")
else:
current_app.logger.warning(f"OAuth error in callback: {error}")
return redirect(f"/api/connectors/callback-status?status=error&message=Authentication+failed.+Please+try+again+and+make+sure+to+grant+all+requested+permissions.&provider={provider}")
if not authorization_code:
return redirect(f"/api/connectors/callback-status?status=error&message=Authorization+code+not+provided.+Please+complete+the+authorization+process+and+make+sure+to+grant+offline+access.&provider={provider}")
return redirect(f"/api/connectors/callback-status?status=error&message=Authentication+failed.+Please+try+again+and+make+sure+to+grant+all+requested+permissions.&provider={provider}")
try:
auth = ConnectorCreator.create_auth(provider)
token_info = auth.exchange_code_for_tokens(authorization_code)
session_token = str(uuid.uuid4())
try:
credentials = auth.create_credentials_from_token_info(token_info)
@@ -290,57 +125,44 @@ class ConnectorsCallback(Resource):
"access_token": token_info.get("access_token"),
"refresh_token": token_info.get("refresh_token"),
"token_uri": token_info.get("token_uri"),
"expiry": token_info.get("expiry"),
"scopes": token_info.get("scopes")
"expiry": token_info.get("expiry")
}
user_id = request.decoded_token.get("sub") if getattr(request, "decoded_token", None) else None
sessions_collection.insert_one({
"session_token": session_token,
"user": user_id,
"token_info": sanitized_token_info,
"created_at": datetime.datetime.now(datetime.timezone.utc),
"user_email": user_email,
"provider": provider
})
sessions_collection.find_one_and_update(
{"_id": ObjectId(state_object_id), "provider": provider},
{
"$set": {
"session_token": session_token,
"token_info": sanitized_token_info,
"user_email": user_email,
"status": "authorized"
}
}
)
# Redirect to success page with session token and user email
return redirect(f"/api/connectors/callback-status?status=success&message=Authentication+successful&provider={provider}&session_token={session_token}&user_email={user_email}")
except Exception as e:
current_app.logger.error(f"Error exchanging code for tokens: {str(e)}", exc_info=True)
return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+exchange+authorization+code+for+tokens:+{str(e)}&provider={provider}")
return redirect(f"/api/connectors/callback-status?status=error&message=Authentication+failed.+Please+try+again+and+make+sure+to+grant+all+requested+permissions.&provider={provider}")
except Exception as e:
current_app.logger.error(f"Error handling connector callback: {e}")
return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+complete+connector+authentication:+{str(e)}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.")
@connectors_ns.route("/api/connectors/refresh")
class ConnectorRefresh(Resource):
@api.expect(api.model("ConnectorRefreshModel", {"provider": fields.String(required=True), "refresh_token": fields.String(required=True)}))
@api.doc(description="Refresh connector access token")
def post(self):
try:
data = request.get_json()
provider = data.get('provider')
refresh_token = data.get('refresh_token')
if not provider or not refresh_token:
return make_response(jsonify({"success": False, "error": "provider and refresh_token are required"}), 400)
auth = ConnectorCreator.create_auth(provider)
token_info = auth.refresh_access_token(refresh_token)
return make_response(jsonify({"success": True, "token_info": token_info}), 200)
except Exception as e:
current_app.logger.error(f"Error refreshing token for connector: {e}")
return make_response(jsonify({"success": False, "error": str(e)}), 500)
return redirect("/api/connectors/callback-status?status=error&message=Authentication+failed.+Please+try+again+and+make+sure+to+grant+all+requested+permissions.")
@connectors_ns.route("/api/connectors/files")
class ConnectorFiles(Resource):
@api.expect(api.model("ConnectorFilesModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True), "folder_id": fields.String(required=False), "limit": fields.Integer(required=False), "page_token": fields.String(required=False)}))
@api.doc(description="List files from a connector provider (supports pagination)")
@api.expect(api.model("ConnectorFilesModel", {
"provider": fields.String(required=True),
"session_token": fields.String(required=True),
"folder_id": fields.String(required=False),
"limit": fields.Integer(required=False),
"page_token": fields.String(required=False),
"search_query": fields.String(required=False)
}))
@api.doc(description="List files from a connector provider (supports pagination and search)")
def post(self):
try:
data = request.get_json()
@@ -349,10 +171,11 @@ class ConnectorFiles(Resource):
folder_id = data.get('folder_id')
limit = data.get('limit', 10)
page_token = data.get('page_token')
search_query = data.get('search_query')
if not provider or not session_token:
return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400)
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
@@ -362,13 +185,17 @@ class ConnectorFiles(Resource):
return make_response(jsonify({"success": False, "error": "Invalid or unauthorized session"}), 401)
loader = ConnectorCreator.create_connector(provider, session_token)
documents = loader.load_data({
input_config = {
'limit': limit,
'list_only': True,
'session_token': session_token,
'folder_id': folder_id,
'page_token': page_token
})
}
if search_query:
input_config['search_query'] = search_query
documents = loader.load_data(input_config)
files = []
for doc in documents[:limit]:
@@ -386,13 +213,20 @@ class ConnectorFiles(Resource):
'name': metadata.get('file_name', 'Unknown File'),
'type': metadata.get('mime_type', 'unknown'),
'size': metadata.get('size', None),
'modifiedTime': formatted_time
'modifiedTime': formatted_time,
'isFolder': metadata.get('is_folder', False)
})
next_token = getattr(loader, 'next_page_token', None)
has_more = bool(next_token)
return make_response(jsonify({"success": True, "files": files, "total": len(files), "next_page_token": next_token, "has_more": has_more}), 200)
return make_response(jsonify({
"success": True,
"files": files,
"total": len(files),
"next_page_token": next_token,
"has_more": has_more
}), 200)
except Exception as e:
current_app.logger.error(f"Error loading connector files: {e}")
return make_response(jsonify({"success": False, "error": f"Failed to load files: {str(e)}"}), 500)
@@ -401,7 +235,7 @@ class ConnectorFiles(Resource):
@connectors_ns.route("/api/connectors/validate-session")
class ConnectorValidateSession(Resource):
@api.expect(api.model("ConnectorValidateSessionModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True)}))
@api.doc(description="Validate connector session token and return user info")
@api.doc(description="Validate connector session token and return user info and access token")
def post(self):
try:
data = request.get_json()
@@ -410,7 +244,6 @@ class ConnectorValidateSession(Resource):
if not provider or not session_token:
return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400)
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
@@ -424,10 +257,36 @@ class ConnectorValidateSession(Resource):
auth = ConnectorCreator.create_auth(provider)
is_expired = auth.is_token_expired(token_info)
if is_expired and token_info.get('refresh_token'):
try:
refreshed_token_info = auth.refresh_access_token(token_info.get('refresh_token'))
sanitized_token_info = {
"access_token": refreshed_token_info.get("access_token"),
"refresh_token": refreshed_token_info.get("refresh_token"),
"token_uri": refreshed_token_info.get("token_uri"),
"expiry": refreshed_token_info.get("expiry")
}
sessions_collection.update_one(
{"session_token": session_token},
{"$set": {"token_info": sanitized_token_info}}
)
token_info = sanitized_token_info
is_expired = False
except Exception as refresh_error:
current_app.logger.error(f"Failed to refresh token: {refresh_error}")
if is_expired:
return make_response(jsonify({
"success": False,
"expired": True,
"error": "Session token has expired. Please reconnect."
}), 401)
return make_response(jsonify({
"success": True,
"expired": is_expired,
"user_email": session.get('user_email', 'Connected User')
"expired": False,
"user_email": session.get('user_email', 'Connected User'),
"access_token": token_info.get('access_token')
}), 200)
except Exception as e:
current_app.logger.error(f"Error validating connector session: {e}")
@@ -587,20 +446,23 @@ class ConnectorCallbackStatus(Resource):
.container {{ max-width: 600px; margin: 0 auto; }}
.success {{ color: #4CAF50; }}
.error {{ color: #F44336; }}
.cancelled {{ color: #FF9800; }}
</style>
<script>
window.onload = function() {{
const status = "{status}";
const sessionToken = "{session_token}";
const userEmail = "{user_email}";
if (status === "success" && window.opener) {{
window.opener.postMessage({{
type: '{provider}_auth_success',
session_token: sessionToken,
user_email: userEmail
}}, '*');
setTimeout(() => window.close(), 3000);
}} else if (status === "cancelled" || status === "error") {{
setTimeout(() => window.close(), 3000);
}}
}};
@@ -613,7 +475,7 @@ class ConnectorCallbackStatus(Resource):
<p>{message}</p>
{f'<p>Connected as: {user_email}</p>' if status == 'success' else ''}
</div>
<p><small>You can close this window. {f"Your {provider.replace('_', ' ').title()} is now connected and ready to use." if status == 'success' else ''}</small></p>
<p><small>You can close this window. {f"Your {provider.replace('_', ' ').title()} is now connected and ready to use." if status == 'success' else "Feel free to close this window."}</small></p>
</div>
</body>
</html>

View File

@@ -0,0 +1,5 @@
"""User API module - provides all user-related API endpoints"""
from .routes import user
__all__ = ["user"]

View File

@@ -0,0 +1,7 @@
"""Agents module."""
from .routes import agents_ns
from .sharing import agents_sharing_ns
from .webhooks import agents_webhooks_ns
__all__ = ["agents_ns", "agents_sharing_ns", "agents_webhooks_ns"]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,263 @@
"""Agent management sharing functionality."""
import datetime
import secrets
from bson import DBRef
from bson.objectid import ObjectId
from flask import current_app, jsonify, make_response, request
from flask_restx import fields, Namespace, Resource
from application.api import api
from application.core.settings import settings
from application.api.user.base import (
agents_collection,
db,
ensure_user_doc,
resolve_tool_details,
user_tools_collection,
users_collection,
)
from application.utils import generate_image_url
agents_sharing_ns = Namespace(
"agents", description="Agent management operations", path="/api"
)
@agents_sharing_ns.route("/shared_agent")
class SharedAgent(Resource):
@api.doc(
params={
"token": "Shared token of the agent",
},
description="Get a shared agent by token or ID",
)
def get(self):
shared_token = request.args.get("token")
if not shared_token:
return make_response(
jsonify({"success": False, "message": "Token or ID is required"}), 400
)
try:
query = {
"shared_publicly": True,
"shared_token": shared_token,
}
shared_agent = agents_collection.find_one(query)
if not shared_agent:
return make_response(
jsonify({"success": False, "message": "Shared agent not found"}),
404,
)
agent_id = str(shared_agent["_id"])
data = {
"id": agent_id,
"user": shared_agent.get("user", ""),
"name": shared_agent.get("name", ""),
"image": (
generate_image_url(shared_agent["image"])
if shared_agent.get("image")
else ""
),
"description": shared_agent.get("description", ""),
"source": (
str(source_doc["_id"])
if isinstance(shared_agent.get("source"), DBRef)
and (source_doc := db.dereference(shared_agent.get("source")))
else ""
),
"chunks": shared_agent.get("chunks", "0"),
"retriever": shared_agent.get("retriever", "classic"),
"prompt_id": shared_agent.get("prompt_id", "default"),
"tools": shared_agent.get("tools", []),
"tool_details": resolve_tool_details(shared_agent.get("tools", [])),
"agent_type": shared_agent.get("agent_type", ""),
"status": shared_agent.get("status", ""),
"json_schema": shared_agent.get("json_schema"),
"limited_token_mode": shared_agent.get("limited_token_mode", False),
"token_limit": shared_agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
"limited_request_mode": shared_agent.get("limited_request_mode", False),
"request_limit": shared_agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
"created_at": shared_agent.get("createdAt", ""),
"updated_at": shared_agent.get("updatedAt", ""),
"shared": shared_agent.get("shared_publicly", False),
"shared_token": shared_agent.get("shared_token", ""),
"shared_metadata": shared_agent.get("shared_metadata", {}),
}
if data["tools"]:
enriched_tools = []
for tool in data["tools"]:
tool_data = user_tools_collection.find_one({"_id": ObjectId(tool)})
if tool_data:
enriched_tools.append(tool_data.get("name", ""))
data["tools"] = enriched_tools
decoded_token = getattr(request, "decoded_token", None)
if decoded_token:
user_id = decoded_token.get("sub")
owner_id = shared_agent.get("user")
if user_id != owner_id:
ensure_user_doc(user_id)
users_collection.update_one(
{"user_id": user_id},
{"$addToSet": {"agent_preferences.shared_with_me": agent_id}},
)
return make_response(jsonify(data), 200)
except Exception as err:
current_app.logger.error(f"Error retrieving shared agent: {err}")
return make_response(jsonify({"success": False}), 400)
@agents_sharing_ns.route("/shared_agents")
class SharedAgents(Resource):
@api.doc(description="Get shared agents explicitly shared with the user")
def get(self):
try:
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user_id = decoded_token.get("sub")
user_doc = ensure_user_doc(user_id)
shared_with_ids = user_doc.get("agent_preferences", {}).get(
"shared_with_me", []
)
shared_object_ids = [ObjectId(id) for id in shared_with_ids]
shared_agents_cursor = agents_collection.find(
{"_id": {"$in": shared_object_ids}, "shared_publicly": True}
)
shared_agents = list(shared_agents_cursor)
found_ids_set = {str(agent["_id"]) for agent in shared_agents}
stale_ids = [id for id in shared_with_ids if id not in found_ids_set]
if stale_ids:
users_collection.update_one(
{"user_id": user_id},
{"$pullAll": {"agent_preferences.shared_with_me": stale_ids}},
)
pinned_ids = set(user_doc.get("agent_preferences", {}).get("pinned", []))
list_shared_agents = [
{
"id": str(agent["_id"]),
"name": agent.get("name", ""),
"description": agent.get("description", ""),
"image": (
generate_image_url(agent["image"]) if agent.get("image") else ""
),
"tools": agent.get("tools", []),
"tool_details": resolve_tool_details(agent.get("tools", [])),
"agent_type": agent.get("agent_type", ""),
"status": agent.get("status", ""),
"json_schema": agent.get("json_schema"),
"limited_token_mode": agent.get("limited_token_mode", False),
"token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
"limited_request_mode": agent.get("limited_request_mode", False),
"request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
"created_at": agent.get("createdAt", ""),
"updated_at": agent.get("updatedAt", ""),
"pinned": str(agent["_id"]) in pinned_ids,
"shared": agent.get("shared_publicly", False),
"shared_token": agent.get("shared_token", ""),
"shared_metadata": agent.get("shared_metadata", {}),
}
for agent in shared_agents
]
return make_response(jsonify(list_shared_agents), 200)
except Exception as err:
current_app.logger.error(f"Error retrieving shared agents: {err}")
return make_response(jsonify({"success": False}), 400)
@agents_sharing_ns.route("/share_agent")
class ShareAgent(Resource):
@api.expect(
api.model(
"ShareAgentModel",
{
"id": fields.String(required=True, description="ID of the agent"),
"shared": fields.Boolean(
required=True, description="Share or unshare the agent"
),
"username": fields.String(
required=False, description="Name of the user"
),
},
)
)
@api.doc(description="Share or unshare an agent")
def put(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
if not data:
return make_response(
jsonify({"success": False, "message": "Missing JSON body"}), 400
)
agent_id = data.get("id")
shared = data.get("shared")
username = data.get("username", "")
if not agent_id:
return make_response(
jsonify({"success": False, "message": "ID is required"}), 400
)
if shared is None:
return make_response(
jsonify(
{
"success": False,
"message": "Shared parameter is required and must be true or false",
}
),
400,
)
try:
try:
agent_oid = ObjectId(agent_id)
except Exception:
return make_response(
jsonify({"success": False, "message": "Invalid agent ID"}), 400
)
agent = agents_collection.find_one({"_id": agent_oid, "user": user})
if not agent:
return make_response(
jsonify({"success": False, "message": "Agent not found"}), 404
)
if shared:
shared_metadata = {
"shared_by": username,
"shared_at": datetime.datetime.now(datetime.timezone.utc),
}
shared_token = secrets.token_urlsafe(32)
agents_collection.update_one(
{"_id": agent_oid, "user": user},
{
"$set": {
"shared_publicly": shared,
"shared_metadata": shared_metadata,
"shared_token": shared_token,
}
},
)
else:
agents_collection.update_one(
{"_id": agent_oid, "user": user},
{"$set": {"shared_publicly": shared, "shared_token": None}},
{"$unset": {"shared_metadata": ""}},
)
except Exception as err:
current_app.logger.error(f"Error sharing/unsharing agent: {err}")
return make_response(jsonify({"success": False, "error": str(err)}), 400)
shared_token = shared_token if shared else None
return make_response(
jsonify({"success": True, "shared_token": shared_token}), 200
)

View File

@@ -0,0 +1,119 @@
"""Agent management webhook handlers."""
import secrets
from bson.objectid import ObjectId
from flask import current_app, jsonify, make_response, request
from flask_restx import Namespace, Resource
from application.api import api
from application.api.user.base import agents_collection, require_agent
from application.api.user.tasks import process_agent_webhook
from application.core.settings import settings
agents_webhooks_ns = Namespace(
"agents", description="Agent management operations", path="/api"
)
@agents_webhooks_ns.route("/agent_webhook")
class AgentWebhook(Resource):
@api.doc(
params={"id": "ID of the agent"},
description="Generate webhook URL for the agent",
)
def get(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
agent_id = request.args.get("id")
if not agent_id:
return make_response(
jsonify({"success": False, "message": "ID is required"}), 400
)
try:
agent = agents_collection.find_one(
{"_id": ObjectId(agent_id), "user": user}
)
if not agent:
return make_response(
jsonify({"success": False, "message": "Agent not found"}), 404
)
webhook_token = agent.get("incoming_webhook_token")
if not webhook_token:
webhook_token = secrets.token_urlsafe(32)
agents_collection.update_one(
{"_id": ObjectId(agent_id), "user": user},
{"$set": {"incoming_webhook_token": webhook_token}},
)
base_url = settings.API_URL.rstrip("/")
full_webhook_url = f"{base_url}/api/webhooks/agents/{webhook_token}"
except Exception as err:
current_app.logger.error(
f"Error generating webhook URL: {err}", exc_info=True
)
return make_response(
jsonify({"success": False, "message": "Error generating webhook URL"}),
400,
)
return make_response(
jsonify({"success": True, "webhook_url": full_webhook_url}), 200
)
@agents_webhooks_ns.route("/webhooks/agents/<string:webhook_token>")
class AgentWebhookListener(Resource):
method_decorators = [require_agent]
def _enqueue_webhook_task(self, agent_id_str, payload, source_method):
if not payload:
current_app.logger.warning(
f"Webhook ({source_method}) received for agent {agent_id_str} with empty payload."
)
current_app.logger.info(
f"Incoming {source_method} webhook for agent {agent_id_str}. Enqueuing task with payload: {payload}"
)
try:
task = process_agent_webhook.delay(
agent_id=agent_id_str,
payload=payload,
)
current_app.logger.info(
f"Task {task.id} enqueued for agent {agent_id_str} ({source_method})."
)
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
except Exception as err:
current_app.logger.error(
f"Error enqueuing webhook task ({source_method}) for agent {agent_id_str}: {err}",
exc_info=True,
)
return make_response(
jsonify({"success": False, "message": "Error processing webhook"}), 500
)
@api.doc(
description="Webhook listener for agent events (POST). Expects JSON payload, which is used to trigger processing.",
)
def post(self, webhook_token, agent, agent_id_str):
payload = request.get_json()
if payload is None:
return make_response(
jsonify(
{
"success": False,
"message": "Invalid or missing JSON data in request body",
}
),
400,
)
return self._enqueue_webhook_task(agent_id_str, payload, source_method="POST")
@api.doc(
description="Webhook listener for agent events (GET). Uses URL query parameters as payload to trigger processing.",
)
def get(self, webhook_token, agent, agent_id_str):
payload = request.args.to_dict(flat=True)
return self._enqueue_webhook_task(agent_id_str, payload, source_method="GET")

View File

@@ -0,0 +1,5 @@
"""Analytics module."""
from .routes import analytics_ns
__all__ = ["analytics_ns"]

View File

@@ -0,0 +1,540 @@
"""Analytics and reporting routes."""
import datetime
from bson.objectid import ObjectId
from flask import current_app, jsonify, make_response, request
from flask_restx import fields, Namespace, Resource
from application.api import api
from application.api.user.base import (
agents_collection,
conversations_collection,
generate_date_range,
generate_hourly_range,
generate_minute_range,
token_usage_collection,
user_logs_collection,
)
analytics_ns = Namespace(
"analytics", description="Analytics and reporting operations", path="/api"
)
@analytics_ns.route("/get_message_analytics")
class GetMessageAnalytics(Resource):
get_message_analytics_model = api.model(
"GetMessageAnalyticsModel",
{
"api_key_id": fields.String(required=False, description="API Key ID"),
"filter_option": fields.String(
required=False,
description="Filter option for analytics",
default="last_30_days",
enum=[
"last_hour",
"last_24_hour",
"last_7_days",
"last_15_days",
"last_30_days",
],
),
},
)
@api.expect(get_message_analytics_model)
@api.doc(description="Get message analytics based on filter option")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
api_key_id = data.get("api_key_id")
filter_option = data.get("filter_option", "last_30_days")
try:
api_key = (
agents_collection.find_one({"_id": ObjectId(api_key_id), "user": user})[
"key"
]
if api_key_id
else None
)
except Exception as err:
current_app.logger.error(f"Error getting API key: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
end_date = datetime.datetime.now(datetime.timezone.utc)
if filter_option == "last_hour":
start_date = end_date - datetime.timedelta(hours=1)
group_format = "%Y-%m-%d %H:%M:00"
elif filter_option == "last_24_hour":
start_date = end_date - datetime.timedelta(hours=24)
group_format = "%Y-%m-%d %H:00"
else:
if filter_option in ["last_7_days", "last_15_days", "last_30_days"]:
filter_days = (
6
if filter_option == "last_7_days"
else 14 if filter_option == "last_15_days" else 29
)
else:
return make_response(
jsonify({"success": False, "message": "Invalid option"}), 400
)
start_date = end_date - datetime.timedelta(days=filter_days)
start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)
end_date = end_date.replace(
hour=23, minute=59, second=59, microsecond=999999
)
group_format = "%Y-%m-%d"
try:
match_stage = {
"$match": {
"user": user,
}
}
if api_key:
match_stage["$match"]["api_key"] = api_key
pipeline = [
match_stage,
{"$unwind": "$queries"},
{
"$match": {
"queries.timestamp": {"$gte": start_date, "$lte": end_date}
}
},
{
"$group": {
"_id": {
"$dateToString": {
"format": group_format,
"date": "$queries.timestamp",
}
},
"count": {"$sum": 1},
}
},
{"$sort": {"_id": 1}},
]
message_data = conversations_collection.aggregate(pipeline)
if filter_option == "last_hour":
intervals = generate_minute_range(start_date, end_date)
elif filter_option == "last_24_hour":
intervals = generate_hourly_range(start_date, end_date)
else:
intervals = generate_date_range(start_date, end_date)
daily_messages = {interval: 0 for interval in intervals}
for entry in message_data:
daily_messages[entry["_id"]] = entry["count"]
except Exception as err:
current_app.logger.error(
f"Error getting message analytics: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(
jsonify({"success": True, "messages": daily_messages}), 200
)
@analytics_ns.route("/get_token_analytics")
class GetTokenAnalytics(Resource):
get_token_analytics_model = api.model(
"GetTokenAnalyticsModel",
{
"api_key_id": fields.String(required=False, description="API Key ID"),
"filter_option": fields.String(
required=False,
description="Filter option for analytics",
default="last_30_days",
enum=[
"last_hour",
"last_24_hour",
"last_7_days",
"last_15_days",
"last_30_days",
],
),
},
)
@api.expect(get_token_analytics_model)
@api.doc(description="Get token analytics data")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
api_key_id = data.get("api_key_id")
filter_option = data.get("filter_option", "last_30_days")
try:
api_key = (
agents_collection.find_one({"_id": ObjectId(api_key_id), "user": user})[
"key"
]
if api_key_id
else None
)
except Exception as err:
current_app.logger.error(f"Error getting API key: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
end_date = datetime.datetime.now(datetime.timezone.utc)
if filter_option == "last_hour":
start_date = end_date - datetime.timedelta(hours=1)
group_format = "%Y-%m-%d %H:%M:00"
group_stage = {
"$group": {
"_id": {
"minute": {
"$dateToString": {
"format": group_format,
"date": "$timestamp",
}
}
},
"total_tokens": {
"$sum": {"$add": ["$prompt_tokens", "$generated_tokens"]}
},
}
}
elif filter_option == "last_24_hour":
start_date = end_date - datetime.timedelta(hours=24)
group_format = "%Y-%m-%d %H:00"
group_stage = {
"$group": {
"_id": {
"hour": {
"$dateToString": {
"format": group_format,
"date": "$timestamp",
}
}
},
"total_tokens": {
"$sum": {"$add": ["$prompt_tokens", "$generated_tokens"]}
},
}
}
else:
if filter_option in ["last_7_days", "last_15_days", "last_30_days"]:
filter_days = (
6
if filter_option == "last_7_days"
else (14 if filter_option == "last_15_days" else 29)
)
else:
return make_response(
jsonify({"success": False, "message": "Invalid option"}), 400
)
start_date = end_date - datetime.timedelta(days=filter_days)
start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)
end_date = end_date.replace(
hour=23, minute=59, second=59, microsecond=999999
)
group_format = "%Y-%m-%d"
group_stage = {
"$group": {
"_id": {
"day": {
"$dateToString": {
"format": group_format,
"date": "$timestamp",
}
}
},
"total_tokens": {
"$sum": {"$add": ["$prompt_tokens", "$generated_tokens"]}
},
}
}
try:
match_stage = {
"$match": {
"user_id": user,
"timestamp": {"$gte": start_date, "$lte": end_date},
}
}
if api_key:
match_stage["$match"]["api_key"] = api_key
token_usage_data = token_usage_collection.aggregate(
[
match_stage,
group_stage,
{"$sort": {"_id": 1}},
]
)
if filter_option == "last_hour":
intervals = generate_minute_range(start_date, end_date)
elif filter_option == "last_24_hour":
intervals = generate_hourly_range(start_date, end_date)
else:
intervals = generate_date_range(start_date, end_date)
daily_token_usage = {interval: 0 for interval in intervals}
for entry in token_usage_data:
if filter_option == "last_hour":
daily_token_usage[entry["_id"]["minute"]] = entry["total_tokens"]
elif filter_option == "last_24_hour":
daily_token_usage[entry["_id"]["hour"]] = entry["total_tokens"]
else:
daily_token_usage[entry["_id"]["day"]] = entry["total_tokens"]
except Exception as err:
current_app.logger.error(
f"Error getting token analytics: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(
jsonify({"success": True, "token_usage": daily_token_usage}), 200
)
@analytics_ns.route("/get_feedback_analytics")
class GetFeedbackAnalytics(Resource):
get_feedback_analytics_model = api.model(
"GetFeedbackAnalyticsModel",
{
"api_key_id": fields.String(required=False, description="API Key ID"),
"filter_option": fields.String(
required=False,
description="Filter option for analytics",
default="last_30_days",
enum=[
"last_hour",
"last_24_hour",
"last_7_days",
"last_15_days",
"last_30_days",
],
),
},
)
@api.expect(get_feedback_analytics_model)
@api.doc(description="Get feedback analytics data")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
api_key_id = data.get("api_key_id")
filter_option = data.get("filter_option", "last_30_days")
try:
api_key = (
agents_collection.find_one({"_id": ObjectId(api_key_id), "user": user})[
"key"
]
if api_key_id
else None
)
except Exception as err:
current_app.logger.error(f"Error getting API key: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
end_date = datetime.datetime.now(datetime.timezone.utc)
if filter_option == "last_hour":
start_date = end_date - datetime.timedelta(hours=1)
group_format = "%Y-%m-%d %H:%M:00"
date_field = {
"$dateToString": {
"format": group_format,
"date": "$queries.feedback_timestamp",
}
}
elif filter_option == "last_24_hour":
start_date = end_date - datetime.timedelta(hours=24)
group_format = "%Y-%m-%d %H:00"
date_field = {
"$dateToString": {
"format": group_format,
"date": "$queries.feedback_timestamp",
}
}
else:
if filter_option in ["last_7_days", "last_15_days", "last_30_days"]:
filter_days = (
6
if filter_option == "last_7_days"
else (14 if filter_option == "last_15_days" else 29)
)
else:
return make_response(
jsonify({"success": False, "message": "Invalid option"}), 400
)
start_date = end_date - datetime.timedelta(days=filter_days)
start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)
end_date = end_date.replace(
hour=23, minute=59, second=59, microsecond=999999
)
group_format = "%Y-%m-%d"
date_field = {
"$dateToString": {
"format": group_format,
"date": "$queries.feedback_timestamp",
}
}
try:
match_stage = {
"$match": {
"queries.feedback_timestamp": {
"$gte": start_date,
"$lte": end_date,
},
"queries.feedback": {"$exists": True},
}
}
if api_key:
match_stage["$match"]["api_key"] = api_key
pipeline = [
match_stage,
{"$unwind": "$queries"},
{"$match": {"queries.feedback": {"$exists": True}}},
{
"$group": {
"_id": {"time": date_field, "feedback": "$queries.feedback"},
"count": {"$sum": 1},
}
},
{
"$group": {
"_id": "$_id.time",
"positive": {
"$sum": {
"$cond": [
{"$eq": ["$_id.feedback", "LIKE"]},
"$count",
0,
]
}
},
"negative": {
"$sum": {
"$cond": [
{"$eq": ["$_id.feedback", "DISLIKE"]},
"$count",
0,
]
}
},
}
},
{"$sort": {"_id": 1}},
]
feedback_data = conversations_collection.aggregate(pipeline)
if filter_option == "last_hour":
intervals = generate_minute_range(start_date, end_date)
elif filter_option == "last_24_hour":
intervals = generate_hourly_range(start_date, end_date)
else:
intervals = generate_date_range(start_date, end_date)
daily_feedback = {
interval: {"positive": 0, "negative": 0} for interval in intervals
}
for entry in feedback_data:
daily_feedback[entry["_id"]] = {
"positive": entry["positive"],
"negative": entry["negative"],
}
except Exception as err:
current_app.logger.error(
f"Error getting feedback analytics: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(
jsonify({"success": True, "feedback": daily_feedback}), 200
)
@analytics_ns.route("/get_user_logs")
class GetUserLogs(Resource):
get_user_logs_model = api.model(
"GetUserLogsModel",
{
"page": fields.Integer(
required=False,
description="Page number for pagination",
default=1,
),
"api_key_id": fields.String(required=False, description="API Key ID"),
"page_size": fields.Integer(
required=False,
description="Number of logs per page",
default=10,
),
},
)
@api.expect(get_user_logs_model)
@api.doc(description="Get user logs with pagination")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
page = int(data.get("page", 1))
api_key_id = data.get("api_key_id")
page_size = int(data.get("page_size", 10))
skip = (page - 1) * page_size
try:
api_key = (
agents_collection.find_one({"_id": ObjectId(api_key_id)})["key"]
if api_key_id
else None
)
except Exception as err:
current_app.logger.error(f"Error getting API key: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
query = {"user": user}
if api_key:
query = {"api_key": api_key}
items_cursor = (
user_logs_collection.find(query)
.sort("timestamp", -1)
.skip(skip)
.limit(page_size + 1)
)
items = list(items_cursor)
results = [
{
"id": str(item.get("_id")),
"action": item.get("action"),
"level": item.get("level"),
"user": item.get("user"),
"question": item.get("question"),
"sources": item.get("sources"),
"retriever_params": item.get("retriever_params"),
"timestamp": item.get("timestamp"),
}
for item in items[:page_size]
]
has_more = len(items) > page_size
return make_response(
jsonify(
{
"success": True,
"logs": results,
"page": page,
"page_size": page_size,
"has_more": has_more,
}
),
200,
)

View File

@@ -0,0 +1,5 @@
"""Attachments module."""
from .routes import attachments_ns
__all__ = ["attachments_ns"]

View File

@@ -0,0 +1,154 @@
"""File attachments and media routes."""
import os
from bson.objectid import ObjectId
from flask import current_app, jsonify, make_response, request
from flask_restx import fields, Namespace, Resource
from application.api import api
from application.api.user.base import agents_collection, storage
from application.api.user.tasks import store_attachment
from application.core.settings import settings
from application.tts.tts_creator import TTSCreator
from application.utils import safe_filename
attachments_ns = Namespace(
"attachments", description="File attachments and media operations", path="/api"
)
@attachments_ns.route("/store_attachment")
class StoreAttachment(Resource):
@api.expect(
api.model(
"AttachmentModel",
{
"file": fields.Raw(required=True, description="File to upload"),
"api_key": fields.String(
required=False, description="API key (optional)"
),
},
)
)
@api.doc(
description="Stores a single attachment without vectorization or training. Supports user or API key authentication."
)
def post(self):
decoded_token = getattr(request, "decoded_token", None)
api_key = request.form.get("api_key") or request.args.get("api_key")
file = request.files.get("file")
if not file or file.filename == "":
return make_response(
jsonify({"status": "error", "message": "Missing file"}),
400,
)
user = None
if decoded_token:
user = safe_filename(decoded_token.get("sub"))
elif api_key:
agent = agents_collection.find_one({"key": api_key})
if not agent:
return make_response(
jsonify({"success": False, "message": "Invalid API key"}), 401
)
user = safe_filename(agent.get("user"))
else:
return make_response(
jsonify({"success": False, "message": "Authentication required"}), 401
)
try:
attachment_id = ObjectId()
original_filename = safe_filename(os.path.basename(file.filename))
relative_path = f"{settings.UPLOAD_FOLDER}/{user}/attachments/{str(attachment_id)}/{original_filename}"
metadata = storage.save_file(file, relative_path)
file_info = {
"filename": original_filename,
"attachment_id": str(attachment_id),
"path": relative_path,
"metadata": metadata,
}
task = store_attachment.delay(file_info, user)
return make_response(
jsonify(
{
"success": True,
"task_id": task.id,
"message": "File uploaded successfully. Processing started.",
}
),
200,
)
except Exception as err:
current_app.logger.error(f"Error storing attachment: {err}", exc_info=True)
return make_response(jsonify({"success": False, "error": str(err)}), 400)
@attachments_ns.route("/images/<path:image_path>")
class ServeImage(Resource):
@api.doc(description="Serve an image from storage")
def get(self, image_path):
try:
file_obj = storage.get_file(image_path)
extension = image_path.split(".")[-1].lower()
content_type = f"image/{extension}"
if extension == "jpg":
content_type = "image/jpeg"
response = make_response(file_obj.read())
response.headers.set("Content-Type", content_type)
response.headers.set("Cache-Control", "max-age=86400")
return response
except FileNotFoundError:
return make_response(
jsonify({"success": False, "message": "Image not found"}), 404
)
except Exception as e:
current_app.logger.error(f"Error serving image: {e}")
return make_response(
jsonify({"success": False, "message": "Error retrieving image"}), 500
)
@attachments_ns.route("/tts")
class TextToSpeech(Resource):
tts_model = api.model(
"TextToSpeechModel",
{
"text": fields.String(
required=True, description="Text to be synthesized as audio"
),
},
)
@api.expect(tts_model)
@api.doc(description="Synthesize audio speech from text")
def post(self):
from application.utils import clean_text_for_tts
data = request.get_json()
text = data["text"]
cleaned_text = clean_text_for_tts(text)
try:
tts_instance = TTSCreator.create_tts(settings.TTS_PROVIDER)
audio_base64, detected_language = tts_instance.text_to_speech(cleaned_text)
return make_response(
jsonify(
{
"success": True,
"audio_base64": audio_base64,
"lang": detected_language,
}
),
200,
)
except Exception as err:
current_app.logger.error(f"Error synthesizing audio: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)

View File

@@ -0,0 +1,222 @@
"""
Shared utilities, database connections, and helper functions for user API routes.
"""
import datetime
import os
import uuid
from functools import wraps
from typing import Optional, Tuple
from bson.objectid import ObjectId
from flask import current_app, jsonify, make_response, Response
from pymongo import ReturnDocument
from werkzeug.utils import secure_filename
from application.core.mongo_db import MongoDB
from application.core.settings import settings
from application.storage.storage_creator import StorageCreator
from application.vectorstore.vector_creator import VectorCreator
storage = StorageCreator.get_storage()
mongo = MongoDB.get_client()
db = mongo[settings.MONGO_DB_NAME]
conversations_collection = db["conversations"]
sources_collection = db["sources"]
prompts_collection = db["prompts"]
feedback_collection = db["feedback"]
agents_collection = db["agents"]
token_usage_collection = db["token_usage"]
shared_conversations_collections = db["shared_conversations"]
users_collection = db["users"]
user_logs_collection = db["user_logs"]
user_tools_collection = db["user_tools"]
attachments_collection = db["attachments"]
try:
agents_collection.create_index(
[("shared", 1)],
name="shared_index",
background=True,
)
users_collection.create_index("user_id", unique=True)
except Exception as e:
print("Error creating indexes:", e)
current_dir = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
def generate_minute_range(start_date, end_date):
"""Generate a dictionary with minute-level time ranges."""
return {
(start_date + datetime.timedelta(minutes=i)).strftime("%Y-%m-%d %H:%M:00"): 0
for i in range(int((end_date - start_date).total_seconds() // 60) + 1)
}
def generate_hourly_range(start_date, end_date):
"""Generate a dictionary with hourly time ranges."""
return {
(start_date + datetime.timedelta(hours=i)).strftime("%Y-%m-%d %H:00"): 0
for i in range(int((end_date - start_date).total_seconds() // 3600) + 1)
}
def generate_date_range(start_date, end_date):
"""Generate a dictionary with daily date ranges."""
return {
(start_date + datetime.timedelta(days=i)).strftime("%Y-%m-%d"): 0
for i in range((end_date - start_date).days + 1)
}
def ensure_user_doc(user_id):
"""
Ensure user document exists with proper agent preferences structure.
Args:
user_id: The user ID to ensure
Returns:
The user document
"""
default_prefs = {
"pinned": [],
"shared_with_me": [],
}
user_doc = users_collection.find_one_and_update(
{"user_id": user_id},
{"$setOnInsert": {"agent_preferences": default_prefs}},
upsert=True,
return_document=ReturnDocument.AFTER,
)
prefs = user_doc.get("agent_preferences", {})
updates = {}
if "pinned" not in prefs:
updates["agent_preferences.pinned"] = []
if "shared_with_me" not in prefs:
updates["agent_preferences.shared_with_me"] = []
if updates:
users_collection.update_one({"user_id": user_id}, {"$set": updates})
user_doc = users_collection.find_one({"user_id": user_id})
return user_doc
def resolve_tool_details(tool_ids):
"""
Resolve tool IDs to their details.
Args:
tool_ids: List of tool IDs
Returns:
List of tool details with id, name, and display_name
"""
tools = user_tools_collection.find(
{"_id": {"$in": [ObjectId(tid) for tid in tool_ids]}}
)
return [
{
"id": str(tool["_id"]),
"name": tool.get("name", ""),
"display_name": tool.get("displayName", tool.get("name", "")),
}
for tool in tools
]
def get_vector_store(source_id):
"""
Get the Vector Store for a given source ID.
Args:
source_id (str): source id of the document
Returns:
Vector store instance
"""
store = VectorCreator.create_vectorstore(
settings.VECTOR_STORE,
source_id=source_id,
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
)
return store
def handle_image_upload(
request, existing_url: str, user: str, storage, base_path: str = "attachments/"
) -> Tuple[str, Optional[Response]]:
"""
Handle image file upload from request.
Args:
request: Flask request object
existing_url: Existing image URL (fallback)
user: User ID
storage: Storage instance
base_path: Base path for upload
Returns:
Tuple of (image_url, error_response)
"""
image_url = existing_url
if "image" in request.files:
file = request.files["image"]
if file.filename != "":
filename = secure_filename(file.filename)
upload_path = f"{settings.UPLOAD_FOLDER.rstrip('/')}/{user}/{base_path.rstrip('/')}/{uuid.uuid4()}_{filename}"
try:
storage.save_file(file, upload_path, storage_class="STANDARD")
image_url = upload_path
except Exception as e:
current_app.logger.error(f"Error uploading image: {e}")
return None, make_response(
jsonify({"success": False, "message": "Image upload failed"}),
400,
)
return image_url, None
def require_agent(func):
"""
Decorator to require valid agent webhook token.
Args:
func: Function to decorate
Returns:
Wrapped function
"""
@wraps(func)
def wrapper(*args, **kwargs):
webhook_token = kwargs.get("webhook_token")
if not webhook_token:
return make_response(
jsonify({"success": False, "message": "Webhook token missing"}), 400
)
agent = agents_collection.find_one(
{"incoming_webhook_token": webhook_token}, {"_id": 1}
)
if not agent:
current_app.logger.warning(
f"Webhook attempt with invalid token: {webhook_token}"
)
return make_response(
jsonify({"success": False, "message": "Agent not found"}), 404
)
kwargs["agent"] = agent
kwargs["agent_id_str"] = str(agent["_id"])
return func(*args, **kwargs)
return wrapper

View File

@@ -0,0 +1,5 @@
"""Conversation management module."""
from .routes import conversations_ns
__all__ = ["conversations_ns"]

View File

@@ -0,0 +1,280 @@
"""Conversation management routes."""
import datetime
from bson.objectid import ObjectId
from flask import current_app, jsonify, make_response, request
from flask_restx import fields, Namespace, Resource
from application.api import api
from application.api.user.base import attachments_collection, conversations_collection
from application.utils import check_required_fields
conversations_ns = Namespace(
"conversations", description="Conversation management operations", path="/api"
)
@conversations_ns.route("/delete_conversation")
class DeleteConversation(Resource):
@api.doc(
description="Deletes a conversation by ID",
params={"id": "The ID of the conversation to delete"},
)
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
conversation_id = request.args.get("id")
if not conversation_id:
return make_response(
jsonify({"success": False, "message": "ID is required"}), 400
)
try:
conversations_collection.delete_one(
{"_id": ObjectId(conversation_id), "user": decoded_token["sub"]}
)
except Exception as err:
current_app.logger.error(
f"Error deleting conversation: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)
@conversations_ns.route("/delete_all_conversations")
class DeleteAllConversations(Resource):
@api.doc(
description="Deletes all conversations for a specific user",
)
def get(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user_id = decoded_token.get("sub")
try:
conversations_collection.delete_many({"user": user_id})
except Exception as err:
current_app.logger.error(
f"Error deleting all conversations: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)
@conversations_ns.route("/get_conversations")
class GetConversations(Resource):
@api.doc(
description="Retrieve a list of the latest 30 conversations (excluding API key conversations)",
)
def get(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
try:
conversations = (
conversations_collection.find(
{
"$or": [
{"api_key": {"$exists": False}},
{"agent_id": {"$exists": True}},
],
"user": decoded_token.get("sub"),
}
)
.sort("date", -1)
.limit(30)
)
list_conversations = [
{
"id": str(conversation["_id"]),
"name": conversation["name"],
"agent_id": conversation.get("agent_id", None),
"is_shared_usage": conversation.get("is_shared_usage", False),
"shared_token": conversation.get("shared_token", None),
}
for conversation in conversations
]
except Exception as err:
current_app.logger.error(
f"Error retrieving conversations: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify(list_conversations), 200)
@conversations_ns.route("/get_single_conversation")
class GetSingleConversation(Resource):
@api.doc(
description="Retrieve a single conversation by ID",
params={"id": "The conversation ID"},
)
def get(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
conversation_id = request.args.get("id")
if not conversation_id:
return make_response(
jsonify({"success": False, "message": "ID is required"}), 400
)
try:
conversation = conversations_collection.find_one(
{"_id": ObjectId(conversation_id), "user": decoded_token.get("sub")}
)
if not conversation:
return make_response(jsonify({"status": "not found"}), 404)
# Process queries to include attachment names
queries = conversation["queries"]
for query in queries:
if "attachments" in query and query["attachments"]:
attachment_details = []
for attachment_id in query["attachments"]:
try:
attachment = attachments_collection.find_one(
{"_id": ObjectId(attachment_id)}
)
if attachment:
attachment_details.append(
{
"id": str(attachment["_id"]),
"fileName": attachment.get(
"filename", "Unknown file"
),
}
)
except Exception as e:
current_app.logger.error(
f"Error retrieving attachment {attachment_id}: {e}",
exc_info=True,
)
query["attachments"] = attachment_details
except Exception as err:
current_app.logger.error(
f"Error retrieving conversation: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
data = {
"queries": queries,
"agent_id": conversation.get("agent_id"),
"is_shared_usage": conversation.get("is_shared_usage", False),
"shared_token": conversation.get("shared_token", None),
}
return make_response(jsonify(data), 200)
@conversations_ns.route("/update_conversation_name")
class UpdateConversationName(Resource):
@api.expect(
api.model(
"UpdateConversationModel",
{
"id": fields.String(required=True, description="Conversation ID"),
"name": fields.String(
required=True, description="New name of the conversation"
),
},
)
)
@api.doc(
description="Updates the name of a conversation",
)
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
data = request.get_json()
required_fields = ["id", "name"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
conversations_collection.update_one(
{"_id": ObjectId(data["id"]), "user": decoded_token.get("sub")},
{"$set": {"name": data["name"]}},
)
except Exception as err:
current_app.logger.error(
f"Error updating conversation name: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)
@conversations_ns.route("/feedback")
class SubmitFeedback(Resource):
@api.expect(
api.model(
"FeedbackModel",
{
"question": fields.String(
required=False, description="The user question"
),
"answer": fields.String(required=False, description="The AI answer"),
"feedback": fields.String(required=True, description="User feedback"),
"question_index": fields.Integer(
required=True,
description="The question number in that particular conversation",
),
"conversation_id": fields.String(
required=True, description="id of the particular conversation"
),
"api_key": fields.String(description="Optional API key"),
},
)
)
@api.doc(
description="Submit feedback for a conversation",
)
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
data = request.get_json()
required_fields = ["feedback", "conversation_id", "question_index"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
if data["feedback"] is None:
# Remove feedback and feedback_timestamp if feedback is null
conversations_collection.update_one(
{
"_id": ObjectId(data["conversation_id"]),
"user": decoded_token.get("sub"),
f"queries.{data['question_index']}": {"$exists": True},
},
{
"$unset": {
f"queries.{data['question_index']}.feedback": "",
f"queries.{data['question_index']}.feedback_timestamp": "",
}
},
)
else:
# Set feedback and feedback_timestamp if feedback has a value
conversations_collection.update_one(
{
"_id": ObjectId(data["conversation_id"]),
"user": decoded_token.get("sub"),
f"queries.{data['question_index']}": {"$exists": True},
},
{
"$set": {
f"queries.{data['question_index']}.feedback": data[
"feedback"
],
f"queries.{data['question_index']}.feedback_timestamp": datetime.datetime.now(
datetime.timezone.utc
),
}
},
)
except Exception as err:
current_app.logger.error(f"Error submitting feedback: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)

View File

@@ -0,0 +1,5 @@
"""Prompts module."""
from .routes import prompts_ns
__all__ = ["prompts_ns"]

View File

@@ -0,0 +1,191 @@
"""Prompt management routes."""
import os
from bson.objectid import ObjectId
from flask import current_app, jsonify, make_response, request
from flask_restx import fields, Namespace, Resource
from application.api import api
from application.api.user.base import current_dir, prompts_collection
from application.utils import check_required_fields
prompts_ns = Namespace(
"prompts", description="Prompt management operations", path="/api"
)
@prompts_ns.route("/create_prompt")
class CreatePrompt(Resource):
create_prompt_model = api.model(
"CreatePromptModel",
{
"content": fields.String(
required=True, description="Content of the prompt"
),
"name": fields.String(required=True, description="Name of the prompt"),
},
)
@api.expect(create_prompt_model)
@api.doc(description="Create a new prompt")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
data = request.get_json()
required_fields = ["content", "name"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
user = decoded_token.get("sub")
try:
resp = prompts_collection.insert_one(
{
"name": data["name"],
"content": data["content"],
"user": user,
}
)
new_id = str(resp.inserted_id)
except Exception as err:
current_app.logger.error(f"Error creating prompt: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"id": new_id}), 200)
@prompts_ns.route("/get_prompts")
class GetPrompts(Resource):
@api.doc(description="Get all prompts for the user")
def get(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
try:
prompts = prompts_collection.find({"user": user})
list_prompts = [
{"id": "default", "name": "default", "type": "public"},
{"id": "creative", "name": "creative", "type": "public"},
{"id": "strict", "name": "strict", "type": "public"},
]
for prompt in prompts:
list_prompts.append(
{
"id": str(prompt["_id"]),
"name": prompt["name"],
"type": "private",
}
)
except Exception as err:
current_app.logger.error(f"Error retrieving prompts: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify(list_prompts), 200)
@prompts_ns.route("/get_single_prompt")
class GetSinglePrompt(Resource):
@api.doc(params={"id": "ID of the prompt"}, description="Get a single prompt by ID")
def get(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
prompt_id = request.args.get("id")
if not prompt_id:
return make_response(
jsonify({"success": False, "message": "ID is required"}), 400
)
try:
if prompt_id == "default":
with open(
os.path.join(current_dir, "prompts", "chat_combine_default.txt"),
"r",
) as f:
chat_combine_template = f.read()
return make_response(jsonify({"content": chat_combine_template}), 200)
elif prompt_id == "creative":
with open(
os.path.join(current_dir, "prompts", "chat_combine_creative.txt"),
"r",
) as f:
chat_reduce_creative = f.read()
return make_response(jsonify({"content": chat_reduce_creative}), 200)
elif prompt_id == "strict":
with open(
os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r"
) as f:
chat_reduce_strict = f.read()
return make_response(jsonify({"content": chat_reduce_strict}), 200)
prompt = prompts_collection.find_one(
{"_id": ObjectId(prompt_id), "user": user}
)
except Exception as err:
current_app.logger.error(f"Error retrieving prompt: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"content": prompt["content"]}), 200)
@prompts_ns.route("/delete_prompt")
class DeletePrompt(Resource):
delete_prompt_model = api.model(
"DeletePromptModel",
{"id": fields.String(required=True, description="Prompt ID to delete")},
)
@api.expect(delete_prompt_model)
@api.doc(description="Delete a prompt by ID")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = ["id"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
prompts_collection.delete_one({"_id": ObjectId(data["id"]), "user": user})
except Exception as err:
current_app.logger.error(f"Error deleting prompt: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)
@prompts_ns.route("/update_prompt")
class UpdatePrompt(Resource):
update_prompt_model = api.model(
"UpdatePromptModel",
{
"id": fields.String(required=True, description="Prompt ID to update"),
"name": fields.String(required=True, description="New name of the prompt"),
"content": fields.String(
required=True, description="New content of the prompt"
),
},
)
@api.expect(update_prompt_model)
@api.doc(description="Update an existing prompt")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = ["id", "name", "content"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
prompts_collection.update_one(
{"_id": ObjectId(data["id"]), "user": user},
{"$set": {"name": data["name"], "content": data["content"]}},
)
except Exception as err:
current_app.logger.error(f"Error updating prompt: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,5 @@
"""Sharing module."""
from .routes import sharing_ns
__all__ = ["sharing_ns"]

View File

@@ -0,0 +1,301 @@
"""Conversation sharing routes."""
import uuid
from bson.binary import Binary, UuidRepresentation
from bson.dbref import DBRef
from bson.objectid import ObjectId
from flask import current_app, jsonify, make_response, request
from flask_restx import fields, inputs, Namespace, Resource
from application.api import api
from application.api.user.base import (
agents_collection,
attachments_collection,
conversations_collection,
db,
shared_conversations_collections,
)
from application.utils import check_required_fields
sharing_ns = Namespace(
"sharing", description="Conversation sharing operations", path="/api"
)
@sharing_ns.route("/share")
class ShareConversation(Resource):
share_conversation_model = api.model(
"ShareConversationModel",
{
"conversation_id": fields.String(
required=True, description="Conversation ID"
),
"user": fields.String(description="User ID (optional)"),
"prompt_id": fields.String(description="Prompt ID (optional)"),
"chunks": fields.Integer(description="Chunks count (optional)"),
},
)
@api.expect(share_conversation_model)
@api.doc(description="Share a conversation")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = ["conversation_id"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
is_promptable = request.args.get("isPromptable", type=inputs.boolean)
if is_promptable is None:
return make_response(
jsonify({"success": False, "message": "isPromptable is required"}), 400
)
conversation_id = data["conversation_id"]
try:
conversation = conversations_collection.find_one(
{"_id": ObjectId(conversation_id)}
)
if conversation is None:
return make_response(
jsonify(
{
"status": "error",
"message": "Conversation does not exist",
}
),
404,
)
current_n_queries = len(conversation["queries"])
explicit_binary = Binary.from_uuid(
uuid.uuid4(), UuidRepresentation.STANDARD
)
if is_promptable:
prompt_id = data.get("prompt_id", "default")
chunks = data.get("chunks", "2")
name = conversation["name"] + "(shared)"
new_api_key_data = {
"prompt_id": prompt_id,
"chunks": chunks,
"user": user,
}
if "source" in data and ObjectId.is_valid(data["source"]):
new_api_key_data["source"] = DBRef(
"sources", ObjectId(data["source"])
)
if "retriever" in data:
new_api_key_data["retriever"] = data["retriever"]
pre_existing_api_document = agents_collection.find_one(new_api_key_data)
if pre_existing_api_document:
api_uuid = pre_existing_api_document["key"]
pre_existing = shared_conversations_collections.find_one(
{
"conversation_id": DBRef(
"conversations", ObjectId(conversation_id)
),
"isPromptable": is_promptable,
"first_n_queries": current_n_queries,
"user": user,
"api_key": api_uuid,
}
)
if pre_existing is not None:
return make_response(
jsonify(
{
"success": True,
"identifier": str(pre_existing["uuid"].as_uuid()),
}
),
200,
)
else:
shared_conversations_collections.insert_one(
{
"uuid": explicit_binary,
"conversation_id": {
"$ref": "conversations",
"$id": ObjectId(conversation_id),
},
"isPromptable": is_promptable,
"first_n_queries": current_n_queries,
"user": user,
"api_key": api_uuid,
}
)
return make_response(
jsonify(
{
"success": True,
"identifier": str(explicit_binary.as_uuid()),
}
),
201,
)
else:
api_uuid = str(uuid.uuid4())
new_api_key_data["key"] = api_uuid
new_api_key_data["name"] = name
if "source" in data and ObjectId.is_valid(data["source"]):
new_api_key_data["source"] = DBRef(
"sources", ObjectId(data["source"])
)
if "retriever" in data:
new_api_key_data["retriever"] = data["retriever"]
agents_collection.insert_one(new_api_key_data)
shared_conversations_collections.insert_one(
{
"uuid": explicit_binary,
"conversation_id": {
"$ref": "conversations",
"$id": ObjectId(conversation_id),
},
"isPromptable": is_promptable,
"first_n_queries": current_n_queries,
"user": user,
"api_key": api_uuid,
}
)
return make_response(
jsonify(
{
"success": True,
"identifier": str(explicit_binary.as_uuid()),
}
),
201,
)
pre_existing = shared_conversations_collections.find_one(
{
"conversation_id": DBRef(
"conversations", ObjectId(conversation_id)
),
"isPromptable": is_promptable,
"first_n_queries": current_n_queries,
"user": user,
}
)
if pre_existing is not None:
return make_response(
jsonify(
{
"success": True,
"identifier": str(pre_existing["uuid"].as_uuid()),
}
),
200,
)
else:
shared_conversations_collections.insert_one(
{
"uuid": explicit_binary,
"conversation_id": {
"$ref": "conversations",
"$id": ObjectId(conversation_id),
},
"isPromptable": is_promptable,
"first_n_queries": current_n_queries,
"user": user,
}
)
return make_response(
jsonify(
{"success": True, "identifier": str(explicit_binary.as_uuid())}
),
201,
)
except Exception as err:
current_app.logger.error(
f"Error sharing conversation: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
@sharing_ns.route("/shared_conversation/<string:identifier>")
class GetPubliclySharedConversations(Resource):
@api.doc(description="Get publicly shared conversations by identifier")
def get(self, identifier: str):
try:
query_uuid = Binary.from_uuid(
uuid.UUID(identifier), UuidRepresentation.STANDARD
)
shared = shared_conversations_collections.find_one({"uuid": query_uuid})
conversation_queries = []
if (
shared
and "conversation_id" in shared
and isinstance(shared["conversation_id"], DBRef)
):
conversation_ref = shared["conversation_id"]
conversation = db.dereference(conversation_ref)
if conversation is None:
return make_response(
jsonify(
{
"success": False,
"error": "might have broken url or the conversation does not exist",
}
),
404,
)
conversation_queries = conversation["queries"][
: (shared["first_n_queries"])
]
for query in conversation_queries:
if "attachments" in query and query["attachments"]:
attachment_details = []
for attachment_id in query["attachments"]:
try:
attachment = attachments_collection.find_one(
{"_id": ObjectId(attachment_id)}
)
if attachment:
attachment_details.append(
{
"id": str(attachment["_id"]),
"fileName": attachment.get(
"filename", "Unknown file"
),
}
)
except Exception as e:
current_app.logger.error(
f"Error retrieving attachment {attachment_id}: {e}",
exc_info=True,
)
query["attachments"] = attachment_details
else:
return make_response(
jsonify(
{
"success": False,
"error": "might have broken url or the conversation does not exist",
}
),
404,
)
date = conversation["_id"].generation_time.isoformat()
res = {
"success": True,
"queries": conversation_queries,
"title": conversation["name"],
"timestamp": date,
}
if shared["isPromptable"] and "api_key" in shared:
res["api_key"] = shared["api_key"]
return make_response(jsonify(res), 200)
except Exception as err:
current_app.logger.error(
f"Error getting shared conversation: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)

View File

@@ -0,0 +1,7 @@
"""Sources module."""
from .chunks import sources_chunks_ns
from .routes import sources_ns
from .upload import sources_upload_ns
__all__ = ["sources_ns", "sources_chunks_ns", "sources_upload_ns"]

View File

@@ -0,0 +1,278 @@
"""Source document management chunk management."""
from bson.objectid import ObjectId
from flask import current_app, jsonify, make_response, request
from flask_restx import fields, Namespace, Resource
from application.api import api
from application.api.user.base import get_vector_store, sources_collection
from application.utils import check_required_fields, num_tokens_from_string
sources_chunks_ns = Namespace(
"sources", description="Source document management operations", path="/api"
)
@sources_chunks_ns.route("/get_chunks")
class GetChunks(Resource):
@api.doc(
description="Retrieves chunks from a document, optionally filtered by file path and search term",
params={
"id": "The document ID",
"page": "Page number for pagination",
"per_page": "Number of chunks per page",
"path": "Optional: Filter chunks by relative file path",
"search": "Optional: Search term to filter chunks by title or content",
},
)
def get(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
doc_id = request.args.get("id")
page = int(request.args.get("page", 1))
per_page = int(request.args.get("per_page", 10))
path = request.args.get("path")
search_term = request.args.get("search", "").strip().lower()
if not ObjectId.is_valid(doc_id):
return make_response(jsonify({"error": "Invalid doc_id"}), 400)
doc = sources_collection.find_one({"_id": ObjectId(doc_id), "user": user})
if not doc:
return make_response(
jsonify({"error": "Document not found or access denied"}), 404
)
try:
store = get_vector_store(doc_id)
chunks = store.get_chunks()
filtered_chunks = []
for chunk in chunks:
metadata = chunk.get("metadata", {})
# Filter by path if provided
if path:
chunk_source = metadata.get("source", "")
# Check if the chunk's source matches the requested path
if not chunk_source or not chunk_source.endswith(path):
continue
# Filter by search term if provided
if search_term:
text_match = search_term in chunk.get("text", "").lower()
title_match = search_term in metadata.get("title", "").lower()
if not (text_match or title_match):
continue
filtered_chunks.append(chunk)
chunks = filtered_chunks
total_chunks = len(chunks)
start = (page - 1) * per_page
end = start + per_page
paginated_chunks = chunks[start:end]
return make_response(
jsonify(
{
"page": page,
"per_page": per_page,
"total": total_chunks,
"chunks": paginated_chunks,
"path": path if path else None,
"search": search_term if search_term else None,
}
),
200,
)
except Exception as e:
current_app.logger.error(f"Error getting chunks: {e}", exc_info=True)
return make_response(jsonify({"success": False}), 500)
@sources_chunks_ns.route("/add_chunk")
class AddChunk(Resource):
@api.expect(
api.model(
"AddChunkModel",
{
"id": fields.String(required=True, description="Document ID"),
"text": fields.String(required=True, description="Text of the chunk"),
"metadata": fields.Raw(
required=False,
description="Metadata associated with the chunk",
),
},
)
)
@api.doc(
description="Adds a new chunk to the document",
)
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = ["id", "text"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
doc_id = data.get("id")
text = data.get("text")
metadata = data.get("metadata", {})
token_count = num_tokens_from_string(text)
metadata["token_count"] = token_count
if not ObjectId.is_valid(doc_id):
return make_response(jsonify({"error": "Invalid doc_id"}), 400)
doc = sources_collection.find_one({"_id": ObjectId(doc_id), "user": user})
if not doc:
return make_response(
jsonify({"error": "Document not found or access denied"}), 404
)
try:
store = get_vector_store(doc_id)
chunk_id = store.add_chunk(text, metadata)
return make_response(
jsonify({"message": "Chunk added successfully", "chunk_id": chunk_id}),
201,
)
except Exception as e:
current_app.logger.error(f"Error adding chunk: {e}", exc_info=True)
return make_response(jsonify({"success": False}), 500)
@sources_chunks_ns.route("/delete_chunk")
class DeleteChunk(Resource):
@api.doc(
description="Deletes a specific chunk from the document.",
params={"id": "The document ID", "chunk_id": "The ID of the chunk to delete"},
)
def delete(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
doc_id = request.args.get("id")
chunk_id = request.args.get("chunk_id")
if not ObjectId.is_valid(doc_id):
return make_response(jsonify({"error": "Invalid doc_id"}), 400)
doc = sources_collection.find_one({"_id": ObjectId(doc_id), "user": user})
if not doc:
return make_response(
jsonify({"error": "Document not found or access denied"}), 404
)
try:
store = get_vector_store(doc_id)
deleted = store.delete_chunk(chunk_id)
if deleted:
return make_response(
jsonify({"message": "Chunk deleted successfully"}), 200
)
else:
return make_response(
jsonify({"message": "Chunk not found or could not be deleted"}),
404,
)
except Exception as e:
current_app.logger.error(f"Error deleting chunk: {e}", exc_info=True)
return make_response(jsonify({"success": False}), 500)
@sources_chunks_ns.route("/update_chunk")
class UpdateChunk(Resource):
@api.expect(
api.model(
"UpdateChunkModel",
{
"id": fields.String(required=True, description="Document ID"),
"chunk_id": fields.String(
required=True, description="Chunk ID to update"
),
"text": fields.String(
required=False, description="New text of the chunk"
),
"metadata": fields.Raw(
required=False,
description="Updated metadata associated with the chunk",
),
},
)
)
@api.doc(
description="Updates an existing chunk in the document.",
)
def put(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = ["id", "chunk_id"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
doc_id = data.get("id")
chunk_id = data.get("chunk_id")
text = data.get("text")
metadata = data.get("metadata")
if text is not None:
token_count = num_tokens_from_string(text)
if metadata is None:
metadata = {}
metadata["token_count"] = token_count
if not ObjectId.is_valid(doc_id):
return make_response(jsonify({"error": "Invalid doc_id"}), 400)
doc = sources_collection.find_one({"_id": ObjectId(doc_id), "user": user})
if not doc:
return make_response(
jsonify({"error": "Document not found or access denied"}), 404
)
try:
store = get_vector_store(doc_id)
chunks = store.get_chunks()
existing_chunk = next((c for c in chunks if c["doc_id"] == chunk_id), None)
if not existing_chunk:
return make_response(jsonify({"error": "Chunk not found"}), 404)
new_text = text if text is not None else existing_chunk["text"]
if metadata is not None:
new_metadata = existing_chunk["metadata"].copy()
new_metadata.update(metadata)
else:
new_metadata = existing_chunk["metadata"].copy()
if text is not None:
new_metadata["token_count"] = num_tokens_from_string(new_text)
try:
new_chunk_id = store.add_chunk(new_text, new_metadata)
deleted = store.delete_chunk(chunk_id)
if not deleted:
current_app.logger.warning(
f"Failed to delete old chunk {chunk_id}, but new chunk {new_chunk_id} was created"
)
return make_response(
jsonify(
{
"message": "Chunk updated successfully",
"chunk_id": new_chunk_id,
"original_chunk_id": chunk_id,
}
),
200,
)
except Exception as add_error:
current_app.logger.error(f"Failed to add updated chunk: {add_error}")
return make_response(
jsonify({"error": "Failed to update chunk - addition failed"}), 500
)
except Exception as e:
current_app.logger.error(f"Error updating chunk: {e}", exc_info=True)
return make_response(jsonify({"success": False}), 500)

View File

@@ -0,0 +1,323 @@
"""Source document management routes."""
import json
import math
from bson.objectid import ObjectId
from flask import current_app, jsonify, make_response, redirect, request
from flask_restx import fields, Namespace, Resource
from application.api import api
from application.api.user.base import sources_collection
from application.core.settings import settings
from application.storage.storage_creator import StorageCreator
from application.utils import check_required_fields
from application.vectorstore.vector_creator import VectorCreator
sources_ns = Namespace(
"sources", description="Source document management operations", path="/api"
)
@sources_ns.route("/sources")
class CombinedJson(Resource):
@api.doc(description="Provide JSON file with combined available indexes")
def get(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = [
{
"name": "Default",
"date": "default",
"model": settings.EMBEDDINGS_NAME,
"location": "remote",
"tokens": "",
"retriever": "classic",
}
]
try:
for index in sources_collection.find({"user": user}).sort("date", -1):
data.append(
{
"id": str(index["_id"]),
"name": index.get("name"),
"date": index.get("date"),
"model": settings.EMBEDDINGS_NAME,
"location": "local",
"tokens": index.get("tokens", ""),
"retriever": index.get("retriever", "classic"),
"syncFrequency": index.get("sync_frequency", ""),
"is_nested": bool(index.get("directory_structure")),
"type": index.get(
"type", "file"
), # Add type field with default "file"
}
)
except Exception as err:
current_app.logger.error(f"Error retrieving sources: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify(data), 200)
@sources_ns.route("/sources/paginated")
class PaginatedSources(Resource):
@api.doc(description="Get document with pagination, sorting and filtering")
def get(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
sort_field = request.args.get("sort", "date") # Default to 'date'
sort_order = request.args.get("order", "desc") # Default to 'desc'
page = int(request.args.get("page", 1)) # Default to 1
rows_per_page = int(request.args.get("rows", 10)) # Default to 10
# add .strip() to remove leading and trailing whitespaces
search_term = request.args.get(
"search", ""
).strip() # add search for filter documents
# Prepare query for filtering
query = {"user": user}
if search_term:
query["name"] = {
"$regex": search_term,
"$options": "i", # using case-insensitive search
}
total_documents = sources_collection.count_documents(query)
total_pages = max(1, math.ceil(total_documents / rows_per_page))
page = min(
max(1, page), total_pages
) # add this to make sure page inbound is within the range
sort_order = 1 if sort_order == "asc" else -1
skip = (page - 1) * rows_per_page
try:
documents = (
sources_collection.find(query)
.sort(sort_field, sort_order)
.skip(skip)
.limit(rows_per_page)
)
paginated_docs = []
for doc in documents:
doc_data = {
"id": str(doc["_id"]),
"name": doc.get("name", ""),
"date": doc.get("date", ""),
"model": settings.EMBEDDINGS_NAME,
"location": "local",
"tokens": doc.get("tokens", ""),
"retriever": doc.get("retriever", "classic"),
"syncFrequency": doc.get("sync_frequency", ""),
"isNested": bool(doc.get("directory_structure")),
"type": doc.get("type", "file"),
}
paginated_docs.append(doc_data)
response = {
"total": total_documents,
"totalPages": total_pages,
"currentPage": page,
"paginated": paginated_docs,
}
return make_response(jsonify(response), 200)
except Exception as err:
current_app.logger.error(
f"Error retrieving paginated sources: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
@sources_ns.route("/delete_by_ids")
class DeleteByIds(Resource):
@api.doc(
description="Deletes documents from the vector store by IDs",
params={"path": "Comma-separated list of IDs"},
)
def get(self):
ids = request.args.get("path")
if not ids:
return make_response(
jsonify({"success": False, "message": "Missing required fields"}), 400
)
try:
result = sources_collection.delete_index(ids=ids)
if result:
return make_response(jsonify({"success": True}), 200)
except Exception as err:
current_app.logger.error(f"Error deleting indexes: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": False}), 400)
@sources_ns.route("/delete_old")
class DeleteOldIndexes(Resource):
@api.doc(
description="Deletes old indexes and associated files",
params={"source_id": "The source ID to delete"},
)
def get(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
source_id = request.args.get("source_id")
if not source_id:
return make_response(
jsonify({"success": False, "message": "Missing required fields"}), 400
)
doc = sources_collection.find_one(
{"_id": ObjectId(source_id), "user": decoded_token.get("sub")}
)
if not doc:
return make_response(jsonify({"status": "not found"}), 404)
storage = StorageCreator.get_storage()
try:
# Delete vector index
if settings.VECTOR_STORE == "faiss":
index_path = f"indexes/{str(doc['_id'])}"
if storage.file_exists(f"{index_path}/index.faiss"):
storage.delete_file(f"{index_path}/index.faiss")
if storage.file_exists(f"{index_path}/index.pkl"):
storage.delete_file(f"{index_path}/index.pkl")
else:
vectorstore = VectorCreator.create_vectorstore(
settings.VECTOR_STORE, source_id=str(doc["_id"])
)
vectorstore.delete_index()
if "file_path" in doc and doc["file_path"]:
file_path = doc["file_path"]
if storage.is_directory(file_path):
files = storage.list_files(file_path)
for f in files:
storage.delete_file(f)
else:
storage.delete_file(file_path)
except FileNotFoundError:
pass
except Exception as err:
current_app.logger.error(
f"Error deleting files and indexes: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
sources_collection.delete_one({"_id": ObjectId(source_id)})
return make_response(jsonify({"success": True}), 200)
@sources_ns.route("/combine")
class RedirectToSources(Resource):
@api.doc(
description="Redirects /api/combine to /api/sources for backward compatibility"
)
def get(self):
return redirect("/api/sources", code=301)
@sources_ns.route("/manage_sync")
class ManageSync(Resource):
manage_sync_model = api.model(
"ManageSyncModel",
{
"source_id": fields.String(required=True, description="Source ID"),
"sync_frequency": fields.String(
required=True,
description="Sync frequency (never, daily, weekly, monthly)",
),
},
)
@api.expect(manage_sync_model)
@api.doc(description="Manage sync frequency for sources")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = ["source_id", "sync_frequency"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
source_id = data["source_id"]
sync_frequency = data["sync_frequency"]
if sync_frequency not in ["never", "daily", "weekly", "monthly"]:
return make_response(
jsonify({"success": False, "message": "Invalid frequency"}), 400
)
update_data = {"$set": {"sync_frequency": sync_frequency}}
try:
sources_collection.update_one(
{
"_id": ObjectId(source_id),
"user": user,
},
update_data,
)
except Exception as err:
current_app.logger.error(
f"Error updating sync frequency: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)
@sources_ns.route("/directory_structure")
class DirectoryStructure(Resource):
@api.doc(
description="Get the directory structure for a document",
params={"id": "The document ID"},
)
def get(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
doc_id = request.args.get("id")
if not doc_id:
return make_response(jsonify({"error": "Document ID is required"}), 400)
if not ObjectId.is_valid(doc_id):
return make_response(jsonify({"error": "Invalid document ID"}), 400)
try:
doc = sources_collection.find_one({"_id": ObjectId(doc_id), "user": user})
if not doc:
return make_response(
jsonify({"error": "Document not found or access denied"}), 404
)
directory_structure = doc.get("directory_structure", {})
base_path = doc.get("file_path", "")
provider = None
remote_data = doc.get("remote_data")
try:
if isinstance(remote_data, str) and remote_data:
remote_data_obj = json.loads(remote_data)
provider = remote_data_obj.get("provider")
except Exception as e:
current_app.logger.warning(
f"Failed to parse remote_data for doc {doc_id}: {e}"
)
return make_response(
jsonify(
{
"success": True,
"directory_structure": directory_structure,
"base_path": base_path,
"provider": provider,
}
),
200,
)
except Exception as e:
current_app.logger.error(
f"Error retrieving directory structure: {e}", exc_info=True
)
return make_response(jsonify({"success": False, "error": str(e)}), 500)

View File

@@ -0,0 +1,583 @@
"""Source document management upload functionality."""
import json
import os
import tempfile
import zipfile
from bson.objectid import ObjectId
from flask import current_app, jsonify, make_response, request
from flask_restx import fields, Namespace, Resource
from application.api import api
from application.api.user.base import sources_collection
from application.api.user.tasks import ingest, ingest_connector_task, ingest_remote
from application.core.settings import settings
from application.parser.connectors.connector_creator import ConnectorCreator
from application.storage.storage_creator import StorageCreator
from application.utils import check_required_fields, safe_filename
sources_upload_ns = Namespace(
"sources", description="Source document management operations", path="/api"
)
@sources_upload_ns.route("/upload")
class UploadFile(Resource):
@api.expect(
api.model(
"UploadModel",
{
"user": fields.String(required=True, description="User ID"),
"name": fields.String(required=True, description="Job name"),
"file": fields.Raw(required=True, description="File(s) to upload"),
},
)
)
@api.doc(
description="Uploads a file to be vectorized and indexed",
)
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
data = request.form
files = request.files.getlist("file")
required_fields = ["user", "name"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields or not files or all(file.filename == "" for file in files):
return make_response(
jsonify(
{
"status": "error",
"message": "Missing required fields or files",
}
),
400,
)
user = decoded_token.get("sub")
job_name = request.form["name"]
# Create safe versions for filesystem operations
safe_user = safe_filename(user)
dir_name = safe_filename(job_name)
base_path = f"{settings.UPLOAD_FOLDER}/{safe_user}/{dir_name}"
try:
storage = StorageCreator.get_storage()
for file in files:
original_filename = file.filename
safe_file = safe_filename(original_filename)
with tempfile.TemporaryDirectory() as temp_dir:
temp_file_path = os.path.join(temp_dir, safe_file)
file.save(temp_file_path)
if zipfile.is_zipfile(temp_file_path):
try:
with zipfile.ZipFile(temp_file_path, "r") as zip_ref:
zip_ref.extractall(path=temp_dir)
# Walk through extracted files and upload them
for root, _, files in os.walk(temp_dir):
for extracted_file in files:
if (
os.path.join(root, extracted_file)
== temp_file_path
):
continue
rel_path = os.path.relpath(
os.path.join(root, extracted_file), temp_dir
)
storage_path = f"{base_path}/{rel_path}"
with open(
os.path.join(root, extracted_file), "rb"
) as f:
storage.save_file(f, storage_path)
except Exception as e:
current_app.logger.error(
f"Error extracting zip: {e}", exc_info=True
)
# If zip extraction fails, save the original zip file
file_path = f"{base_path}/{safe_file}"
with open(temp_file_path, "rb") as f:
storage.save_file(f, file_path)
else:
# For non-zip files, save directly
file_path = f"{base_path}/{safe_file}"
with open(temp_file_path, "rb") as f:
storage.save_file(f, file_path)
task = ingest.delay(
settings.UPLOAD_FOLDER,
[
".rst",
".md",
".pdf",
".txt",
".docx",
".csv",
".epub",
".html",
".mdx",
".json",
".xlsx",
".pptx",
".png",
".jpg",
".jpeg",
],
job_name,
user,
file_path=base_path,
filename=dir_name,
)
except Exception as err:
current_app.logger.error(f"Error uploading file: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
@sources_upload_ns.route("/remote")
class UploadRemote(Resource):
@api.expect(
api.model(
"RemoteUploadModel",
{
"user": fields.String(required=True, description="User ID"),
"source": fields.String(
required=True, description="Source of the data"
),
"name": fields.String(required=True, description="Job name"),
"data": fields.String(required=True, description="Data to process"),
"repo_url": fields.String(description="GitHub repository URL"),
},
)
)
@api.doc(
description="Uploads remote source for vectorization",
)
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
data = request.form
required_fields = ["user", "source", "name", "data"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
config = json.loads(data["data"])
source_data = None
if data["source"] == "github":
source_data = config.get("repo_url")
elif data["source"] in ["crawler", "url"]:
source_data = config.get("url")
elif data["source"] == "reddit":
source_data = config
elif data["source"] in ConnectorCreator.get_supported_connectors():
session_token = config.get("session_token")
if not session_token:
return make_response(
jsonify(
{
"success": False,
"error": f"Missing session_token in {data['source']} configuration",
}
),
400,
)
# Process file_ids
file_ids = config.get("file_ids", [])
if isinstance(file_ids, str):
file_ids = [id.strip() for id in file_ids.split(",") if id.strip()]
elif not isinstance(file_ids, list):
file_ids = []
# Process folder_ids
folder_ids = config.get("folder_ids", [])
if isinstance(folder_ids, str):
folder_ids = [
id.strip() for id in folder_ids.split(",") if id.strip()
]
elif not isinstance(folder_ids, list):
folder_ids = []
config["file_ids"] = file_ids
config["folder_ids"] = folder_ids
task = ingest_connector_task.delay(
job_name=data["name"],
user=decoded_token.get("sub"),
source_type=data["source"],
session_token=session_token,
file_ids=file_ids,
folder_ids=folder_ids,
recursive=config.get("recursive", False),
retriever=config.get("retriever", "classic"),
)
return make_response(
jsonify({"success": True, "task_id": task.id}), 200
)
task = ingest_remote.delay(
source_data=source_data,
job_name=data["name"],
user=decoded_token.get("sub"),
loader=data["source"],
)
except Exception as err:
current_app.logger.error(
f"Error uploading remote source: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
@sources_upload_ns.route("/manage_source_files")
class ManageSourceFiles(Resource):
@api.expect(
api.model(
"ManageSourceFilesModel",
{
"source_id": fields.String(
required=True, description="Source ID to modify"
),
"operation": fields.String(
required=True,
description="Operation: 'add', 'remove', or 'remove_directory'",
),
"file_paths": fields.List(
fields.String,
required=False,
description="File paths to remove (for remove operation)",
),
"directory_path": fields.String(
required=False,
description="Directory path to remove (for remove_directory operation)",
),
"file": fields.Raw(
required=False, description="Files to add (for add operation)"
),
"parent_dir": fields.String(
required=False,
description="Parent directory path relative to source root",
),
},
)
)
@api.doc(
description="Add files, remove files, or remove directories from an existing source",
)
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(
jsonify({"success": False, "message": "Unauthorized"}), 401
)
user = decoded_token.get("sub")
source_id = request.form.get("source_id")
operation = request.form.get("operation")
if not source_id or not operation:
return make_response(
jsonify(
{
"success": False,
"message": "source_id and operation are required",
}
),
400,
)
if operation not in ["add", "remove", "remove_directory"]:
return make_response(
jsonify(
{
"success": False,
"message": "operation must be 'add', 'remove', or 'remove_directory'",
}
),
400,
)
try:
ObjectId(source_id)
except Exception:
return make_response(
jsonify({"success": False, "message": "Invalid source ID format"}), 400
)
try:
source = sources_collection.find_one(
{"_id": ObjectId(source_id), "user": user}
)
if not source:
return make_response(
jsonify(
{
"success": False,
"message": "Source not found or access denied",
}
),
404,
)
except Exception as err:
current_app.logger.error(f"Error finding source: {err}", exc_info=True)
return make_response(
jsonify({"success": False, "message": "Database error"}), 500
)
try:
storage = StorageCreator.get_storage()
source_file_path = source.get("file_path", "")
parent_dir = request.form.get("parent_dir", "")
if parent_dir and (parent_dir.startswith("/") or ".." in parent_dir):
return make_response(
jsonify(
{"success": False, "message": "Invalid parent directory path"}
),
400,
)
if operation == "add":
files = request.files.getlist("file")
if not files or all(file.filename == "" for file in files):
return make_response(
jsonify(
{
"success": False,
"message": "No files provided for add operation",
}
),
400,
)
added_files = []
target_dir = source_file_path
if parent_dir:
target_dir = f"{source_file_path}/{parent_dir}"
for file in files:
if file.filename:
safe_filename_str = safe_filename(file.filename)
file_path = f"{target_dir}/{safe_filename_str}"
# Save file to storage
storage.save_file(file, file_path)
added_files.append(safe_filename_str)
# Trigger re-ingestion pipeline
from application.api.user.tasks import reingest_source_task
task = reingest_source_task.delay(source_id=source_id, user=user)
return make_response(
jsonify(
{
"success": True,
"message": f"Added {len(added_files)} files",
"added_files": added_files,
"parent_dir": parent_dir,
"reingest_task_id": task.id,
}
),
200,
)
elif operation == "remove":
file_paths_str = request.form.get("file_paths")
if not file_paths_str:
return make_response(
jsonify(
{
"success": False,
"message": "file_paths required for remove operation",
}
),
400,
)
try:
file_paths = (
json.loads(file_paths_str)
if isinstance(file_paths_str, str)
else file_paths_str
)
except Exception:
return make_response(
jsonify(
{"success": False, "message": "Invalid file_paths format"}
),
400,
)
# Remove files from storage and directory structure
removed_files = []
for file_path in file_paths:
full_path = f"{source_file_path}/{file_path}"
# Remove from storage
if storage.file_exists(full_path):
storage.delete_file(full_path)
removed_files.append(file_path)
# Trigger re-ingestion pipeline
from application.api.user.tasks import reingest_source_task
task = reingest_source_task.delay(source_id=source_id, user=user)
return make_response(
jsonify(
{
"success": True,
"message": f"Removed {len(removed_files)} files",
"removed_files": removed_files,
"reingest_task_id": task.id,
}
),
200,
)
elif operation == "remove_directory":
directory_path = request.form.get("directory_path")
if not directory_path:
return make_response(
jsonify(
{
"success": False,
"message": "directory_path required for remove_directory operation",
}
),
400,
)
# Validate directory path (prevent path traversal)
if directory_path.startswith("/") or ".." in directory_path:
current_app.logger.warning(
f"Invalid directory path attempted for removal. "
f"User: {user}, Source ID: {source_id}, Directory path: {directory_path}"
)
return make_response(
jsonify(
{"success": False, "message": "Invalid directory path"}
),
400,
)
full_directory_path = (
f"{source_file_path}/{directory_path}"
if directory_path
else source_file_path
)
if not storage.is_directory(full_directory_path):
current_app.logger.warning(
f"Directory not found or is not a directory for removal. "
f"User: {user}, Source ID: {source_id}, Directory path: {directory_path}, "
f"Full path: {full_directory_path}"
)
return make_response(
jsonify(
{
"success": False,
"message": "Directory not found or is not a directory",
}
),
404,
)
success = storage.remove_directory(full_directory_path)
if not success:
current_app.logger.error(
f"Failed to remove directory from storage. "
f"User: {user}, Source ID: {source_id}, Directory path: {directory_path}, "
f"Full path: {full_directory_path}"
)
return make_response(
jsonify(
{"success": False, "message": "Failed to remove directory"}
),
500,
)
current_app.logger.info(
f"Successfully removed directory. "
f"User: {user}, Source ID: {source_id}, Directory path: {directory_path}, "
f"Full path: {full_directory_path}"
)
# Trigger re-ingestion pipeline
from application.api.user.tasks import reingest_source_task
task = reingest_source_task.delay(source_id=source_id, user=user)
return make_response(
jsonify(
{
"success": True,
"message": f"Successfully removed directory: {directory_path}",
"removed_directory": directory_path,
"reingest_task_id": task.id,
}
),
200,
)
except Exception as err:
error_context = f"operation={operation}, user={user}, source_id={source_id}"
if operation == "remove_directory":
directory_path = request.form.get("directory_path", "")
error_context += f", directory_path={directory_path}"
elif operation == "remove":
file_paths_str = request.form.get("file_paths", "")
error_context += f", file_paths={file_paths_str}"
elif operation == "add":
parent_dir = request.form.get("parent_dir", "")
error_context += f", parent_dir={parent_dir}"
current_app.logger.error(
f"Error managing source files: {err} ({error_context})", exc_info=True
)
return make_response(
jsonify({"success": False, "message": "Operation failed"}), 500
)
@sources_upload_ns.route("/task_status")
class TaskStatus(Resource):
task_status_model = api.model(
"TaskStatusModel",
{"task_id": fields.String(required=True, description="Task ID")},
)
@api.expect(task_status_model)
@api.doc(description="Get celery job status")
def get(self):
task_id = request.args.get("task_id")
if not task_id:
return make_response(
jsonify({"success": False, "message": "Task ID is required"}), 400
)
try:
from application.celery_init import celery
task = celery.AsyncResult(task_id)
task_meta = task.info
print(f"Task status: {task.status}")
if task.status == "PENDING":
inspect = celery.control.inspect()
active_workers = inspect.ping()
if not active_workers:
raise ConnectionError("Service unavailable")
if not isinstance(
task_meta, (dict, list, str, int, float, bool, type(None))
):
task_meta = str(task_meta) # Convert to a string representation
except ConnectionError as err:
return make_response(
jsonify({"success": False, "message": str(err)}), 503
)
except Exception as err:
current_app.logger.error(f"Error getting task status: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"status": task.status, "result": task_meta}), 200)

View File

@@ -5,6 +5,8 @@ from application.worker import (
agent_webhook_worker,
attachment_worker,
ingest_worker,
mcp_oauth,
mcp_oauth_status,
remote_worker,
sync_worker,
)
@@ -25,6 +27,7 @@ def ingest_remote(self, source_data, job_name, user, loader):
@celery.task(bind=True)
def reingest_source_task(self, source_id, user):
from application.worker import reingest_source_worker
resp = reingest_source_worker(self, source_id, user)
return resp
@@ -60,9 +63,10 @@ def ingest_connector_task(
retriever="classic",
operation_mode="upload",
doc_id=None,
sync_frequency="never"
sync_frequency="never",
):
from application.worker import ingest_connector
resp = ingest_connector(
self,
job_name,
@@ -75,7 +79,7 @@ def ingest_connector_task(
retriever=retriever,
operation_mode=operation_mode,
doc_id=doc_id,
sync_frequency=sync_frequency
sync_frequency=sync_frequency,
)
return resp
@@ -94,3 +98,15 @@ def setup_periodic_tasks(sender, **kwargs):
timedelta(days=30),
schedule_syncs.s("monthly"),
)
@celery.task(bind=True)
def mcp_oauth_task(self, config, user):
resp = mcp_oauth(self, config, user)
return resp
@celery.task(bind=True)
def mcp_oauth_status_task(self, task_id):
resp = mcp_oauth_status(self, task_id)
return resp

View File

@@ -0,0 +1,6 @@
"""Tools module."""
from .mcp import tools_mcp_ns
from .routes import tools_ns
__all__ = ["tools_ns", "tools_mcp_ns"]

View File

@@ -0,0 +1,333 @@
"""Tool management MCP server integration."""
import json
from email.quoprimime import unquote
from bson.objectid import ObjectId
from flask import current_app, jsonify, make_response, redirect, request
from flask_restx import fields, Namespace, Resource
from application.agents.tools.mcp_tool import MCPOAuthManager, MCPTool
from application.api import api
from application.api.user.base import user_tools_collection
from application.cache import get_redis_instance
from application.security.encryption import encrypt_credentials
from application.utils import check_required_fields
tools_mcp_ns = Namespace("tools", description="Tool management operations", path="/api")
@tools_mcp_ns.route("/mcp_server/test")
class TestMCPServerConfig(Resource):
@api.expect(
api.model(
"MCPServerTestModel",
{
"config": fields.Raw(
required=True, description="MCP server configuration to test"
),
},
)
)
@api.doc(description="Test MCP server connection with provided configuration")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = ["config"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
config = data["config"]
auth_credentials = {}
auth_type = config.get("auth_type", "none")
if auth_type == "api_key" and "api_key" in config:
auth_credentials["api_key"] = config["api_key"]
if "api_key_header" in config:
auth_credentials["api_key_header"] = config["api_key_header"]
elif auth_type == "bearer" and "bearer_token" in config:
auth_credentials["bearer_token"] = config["bearer_token"]
elif auth_type == "basic":
if "username" in config:
auth_credentials["username"] = config["username"]
if "password" in config:
auth_credentials["password"] = config["password"]
test_config = config.copy()
test_config["auth_credentials"] = auth_credentials
mcp_tool = MCPTool(config=test_config, user_id=user)
result = mcp_tool.test_connection()
return make_response(jsonify(result), 200)
except Exception as e:
current_app.logger.error(f"Error testing MCP server: {e}", exc_info=True)
return make_response(
jsonify(
{"success": False, "error": f"Connection test failed: {str(e)}"}
),
500,
)
@tools_mcp_ns.route("/mcp_server/save")
class MCPServerSave(Resource):
@api.expect(
api.model(
"MCPServerSaveModel",
{
"id": fields.String(
required=False, description="Tool ID for updates (optional)"
),
"displayName": fields.String(
required=True, description="Display name for the MCP server"
),
"config": fields.Raw(
required=True, description="MCP server configuration"
),
"status": fields.Boolean(
required=False, default=True, description="Tool status"
),
},
)
)
@api.doc(description="Create or update MCP server with automatic tool discovery")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = ["displayName", "config"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
config = data["config"]
auth_credentials = {}
auth_type = config.get("auth_type", "none")
if auth_type == "api_key":
if "api_key" in config and config["api_key"]:
auth_credentials["api_key"] = config["api_key"]
if "api_key_header" in config:
auth_credentials["api_key_header"] = config["api_key_header"]
elif auth_type == "bearer":
if "bearer_token" in config and config["bearer_token"]:
auth_credentials["bearer_token"] = config["bearer_token"]
elif auth_type == "basic":
if "username" in config and config["username"]:
auth_credentials["username"] = config["username"]
if "password" in config and config["password"]:
auth_credentials["password"] = config["password"]
mcp_config = config.copy()
mcp_config["auth_credentials"] = auth_credentials
if auth_type == "oauth":
if not config.get("oauth_task_id"):
return make_response(
jsonify(
{
"success": False,
"error": "Connection not authorized. Please complete the OAuth authorization first.",
}
),
400,
)
redis_client = get_redis_instance()
manager = MCPOAuthManager(redis_client)
result = manager.get_oauth_status(config["oauth_task_id"])
if not result.get("status") == "completed":
return make_response(
jsonify(
{
"success": False,
"error": "OAuth failed or not completed. Please try authorizing again.",
}
),
400,
)
actions_metadata = result.get("tools", [])
elif auth_type == "none" or auth_credentials:
mcp_tool = MCPTool(config=mcp_config, user_id=user)
mcp_tool.discover_tools()
actions_metadata = mcp_tool.get_actions_metadata()
else:
raise Exception(
"No valid credentials provided for the selected authentication type"
)
storage_config = config.copy()
if auth_credentials:
encrypted_credentials_string = encrypt_credentials(
auth_credentials, user
)
storage_config["encrypted_credentials"] = encrypted_credentials_string
for field in [
"api_key",
"bearer_token",
"username",
"password",
"api_key_header",
]:
storage_config.pop(field, None)
transformed_actions = []
for action in actions_metadata:
action["active"] = True
if "parameters" in action:
if "properties" in action["parameters"]:
for param_name, param_details in action["parameters"][
"properties"
].items():
param_details["filled_by_llm"] = True
param_details["value"] = ""
transformed_actions.append(action)
tool_data = {
"name": "mcp_tool",
"displayName": data["displayName"],
"customName": data["displayName"],
"description": f"MCP Server: {storage_config.get('server_url', 'Unknown')}",
"config": storage_config,
"actions": transformed_actions,
"status": data.get("status", True),
"user": user,
}
tool_id = data.get("id")
if tool_id:
result = user_tools_collection.update_one(
{"_id": ObjectId(tool_id), "user": user, "name": "mcp_tool"},
{"$set": {k: v for k, v in tool_data.items() if k != "user"}},
)
if result.matched_count == 0:
return make_response(
jsonify(
{
"success": False,
"error": "Tool not found or access denied",
}
),
404,
)
response_data = {
"success": True,
"id": tool_id,
"message": f"MCP server updated successfully! Discovered {len(transformed_actions)} tools.",
"tools_count": len(transformed_actions),
}
else:
result = user_tools_collection.insert_one(tool_data)
tool_id = str(result.inserted_id)
response_data = {
"success": True,
"id": tool_id,
"message": f"MCP server created successfully! Discovered {len(transformed_actions)} tools.",
"tools_count": len(transformed_actions),
}
return make_response(jsonify(response_data), 200)
except Exception as e:
current_app.logger.error(f"Error saving MCP server: {e}", exc_info=True)
return make_response(
jsonify(
{"success": False, "error": f"Failed to save MCP server: {str(e)}"}
),
500,
)
@tools_mcp_ns.route("/mcp_server/callback")
class MCPOAuthCallback(Resource):
@api.expect(
api.model(
"MCPServerCallbackModel",
{
"code": fields.String(required=True, description="Authorization code"),
"state": fields.String(required=True, description="State parameter"),
"error": fields.String(
required=False, description="Error message (if any)"
),
},
)
)
@api.doc(
description="Handle OAuth callback by providing the authorization code and state"
)
def get(self):
code = request.args.get("code")
state = request.args.get("state")
error = request.args.get("error")
if error:
return redirect(
f"/api/connectors/callback-status?status=error&message=OAuth+error:+{error}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.&provider=mcp_tool"
)
if not code or not state:
return redirect(
"/api/connectors/callback-status?status=error&message=Authorization+code+or+state+not+provided.+Please+complete+the+authorization+process+and+make+sure+to+grant+offline+access.&provider=mcp_tool"
)
try:
redis_client = get_redis_instance()
if not redis_client:
return redirect(
"/api/connectors/callback-status?status=error&message=Internal+server+error:+Redis+not+available.&provider=mcp_tool"
)
code = unquote(code)
manager = MCPOAuthManager(redis_client)
success = manager.handle_oauth_callback(state, code, error)
if success:
return redirect(
"/api/connectors/callback-status?status=success&message=Authorization+code+received+successfully.+You+can+close+this+window.&provider=mcp_tool"
)
else:
return redirect(
"/api/connectors/callback-status?status=error&message=OAuth+callback+failed.&provider=mcp_tool"
)
except Exception as e:
current_app.logger.error(
f"Error handling MCP OAuth callback: {str(e)}", exc_info=True
)
return redirect(
f"/api/connectors/callback-status?status=error&message=Internal+server+error:+{str(e)}.&provider=mcp_tool"
)
@tools_mcp_ns.route("/mcp_server/oauth_status/<string:task_id>")
class MCPOAuthStatus(Resource):
def get(self, task_id):
"""
Get current status of OAuth flow.
Frontend should poll this endpoint periodically.
"""
try:
redis_client = get_redis_instance()
status_key = f"mcp_oauth_status:{task_id}"
status_data = redis_client.get(status_key)
if status_data:
status = json.loads(status_data)
return make_response(
jsonify({"success": True, "task_id": task_id, **status})
)
else:
return make_response(
jsonify(
{
"success": False,
"error": "Task not found or expired",
"task_id": task_id,
}
),
404,
)
except Exception as e:
current_app.logger.error(
f"Error getting OAuth status for task {task_id}: {str(e)}"
)
return make_response(
jsonify({"success": False, "error": str(e), "task_id": task_id}), 500
)

View File

@@ -0,0 +1,416 @@
"""Tool management routes."""
from bson.objectid import ObjectId
from flask import current_app, jsonify, make_response, request
from flask_restx import fields, Namespace, Resource
from application.agents.tools.tool_manager import ToolManager
from application.api import api
from application.api.user.base import user_tools_collection
from application.security.encryption import decrypt_credentials, encrypt_credentials
from application.utils import check_required_fields, validate_function_name
tool_config = {}
tool_manager = ToolManager(config=tool_config)
tools_ns = Namespace("tools", description="Tool management operations", path="/api")
@tools_ns.route("/available_tools")
class AvailableTools(Resource):
@api.doc(description="Get available tools for a user")
def get(self):
try:
tools_metadata = []
for tool_name, tool_instance in tool_manager.tools.items():
doc = tool_instance.__doc__.strip()
lines = doc.split("\n", 1)
name = lines[0].strip()
description = lines[1].strip() if len(lines) > 1 else ""
tools_metadata.append(
{
"name": tool_name,
"displayName": name,
"description": description,
"configRequirements": tool_instance.get_config_requirements(),
}
)
except Exception as err:
current_app.logger.error(
f"Error getting available tools: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True, "data": tools_metadata}), 200)
@tools_ns.route("/get_tools")
class GetTools(Resource):
@api.doc(description="Get tools created by a user")
def get(self):
try:
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
tools = user_tools_collection.find({"user": user})
user_tools = []
for tool in tools:
tool_copy = {**tool}
tool_copy["id"] = str(tool["_id"])
tool_copy.pop("_id", None)
user_tools.append(tool_copy)
except Exception as err:
current_app.logger.error(f"Error getting user tools: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True, "tools": user_tools}), 200)
@tools_ns.route("/create_tool")
class CreateTool(Resource):
@api.expect(
api.model(
"CreateToolModel",
{
"name": fields.String(required=True, description="Name of the tool"),
"displayName": fields.String(
required=True, description="Display name for the tool"
),
"description": fields.String(
required=True, description="Tool description"
),
"config": fields.Raw(
required=True, description="Configuration of the tool"
),
"customName": fields.String(
required=False, description="Custom name for the tool"
),
"status": fields.Boolean(
required=True, description="Status of the tool"
),
},
)
)
@api.doc(description="Create a new tool")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = [
"name",
"displayName",
"description",
"config",
"status",
]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
tool_instance = tool_manager.tools.get(data["name"])
if not tool_instance:
return make_response(
jsonify({"success": False, "message": "Tool not found"}), 404
)
actions_metadata = tool_instance.get_actions_metadata()
transformed_actions = []
for action in actions_metadata:
action["active"] = True
if "parameters" in action:
if "properties" in action["parameters"]:
for param_name, param_details in action["parameters"][
"properties"
].items():
param_details["filled_by_llm"] = True
param_details["value"] = ""
transformed_actions.append(action)
except Exception as err:
current_app.logger.error(
f"Error getting tool actions: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
try:
new_tool = {
"user": user,
"name": data["name"],
"displayName": data["displayName"],
"description": data["description"],
"customName": data.get("customName", ""),
"actions": transformed_actions,
"config": data["config"],
"status": data["status"],
}
resp = user_tools_collection.insert_one(new_tool)
new_id = str(resp.inserted_id)
except Exception as err:
current_app.logger.error(f"Error creating tool: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"id": new_id}), 200)
@tools_ns.route("/update_tool")
class UpdateTool(Resource):
@api.expect(
api.model(
"UpdateToolModel",
{
"id": fields.String(required=True, description="Tool ID"),
"name": fields.String(description="Name of the tool"),
"displayName": fields.String(description="Display name for the tool"),
"customName": fields.String(description="Custom name for the tool"),
"description": fields.String(description="Tool description"),
"config": fields.Raw(description="Configuration of the tool"),
"actions": fields.List(
fields.Raw, description="Actions the tool can perform"
),
"status": fields.Boolean(description="Status of the tool"),
},
)
)
@api.doc(description="Update a tool by ID")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = ["id"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
update_data = {}
if "name" in data:
update_data["name"] = data["name"]
if "displayName" in data:
update_data["displayName"] = data["displayName"]
if "customName" in data:
update_data["customName"] = data["customName"]
if "description" in data:
update_data["description"] = data["description"]
if "actions" in data:
update_data["actions"] = data["actions"]
if "config" in data:
if "actions" in data["config"]:
for action_name in list(data["config"]["actions"].keys()):
if not validate_function_name(action_name):
return make_response(
jsonify(
{
"success": False,
"message": f"Invalid function name '{action_name}'. Function names must match pattern '^[a-zA-Z0-9_-]+$'.",
"param": "tools[].function.name",
}
),
400,
)
tool_doc = user_tools_collection.find_one(
{"_id": ObjectId(data["id"]), "user": user}
)
if tool_doc and tool_doc.get("name") == "mcp_tool":
config = data["config"]
existing_config = tool_doc.get("config", {})
storage_config = existing_config.copy()
storage_config.update(config)
existing_credentials = {}
if "encrypted_credentials" in existing_config:
existing_credentials = decrypt_credentials(
existing_config["encrypted_credentials"], user
)
auth_credentials = existing_credentials.copy()
auth_type = storage_config.get("auth_type", "none")
if auth_type == "api_key":
if "api_key" in config and config["api_key"]:
auth_credentials["api_key"] = config["api_key"]
if "api_key_header" in config:
auth_credentials["api_key_header"] = config[
"api_key_header"
]
elif auth_type == "bearer":
if "bearer_token" in config and config["bearer_token"]:
auth_credentials["bearer_token"] = config["bearer_token"]
elif "encrypted_token" in config and config["encrypted_token"]:
auth_credentials["bearer_token"] = config["encrypted_token"]
elif auth_type == "basic":
if "username" in config and config["username"]:
auth_credentials["username"] = config["username"]
if "password" in config and config["password"]:
auth_credentials["password"] = config["password"]
if auth_type != "none" and auth_credentials:
encrypted_credentials_string = encrypt_credentials(
auth_credentials, user
)
storage_config["encrypted_credentials"] = (
encrypted_credentials_string
)
elif auth_type == "none":
storage_config.pop("encrypted_credentials", None)
for field in [
"api_key",
"bearer_token",
"encrypted_token",
"username",
"password",
"api_key_header",
]:
storage_config.pop(field, None)
update_data["config"] = storage_config
else:
update_data["config"] = data["config"]
if "status" in data:
update_data["status"] = data["status"]
user_tools_collection.update_one(
{"_id": ObjectId(data["id"]), "user": user},
{"$set": update_data},
)
except Exception as err:
current_app.logger.error(f"Error updating tool: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)
@tools_ns.route("/update_tool_config")
class UpdateToolConfig(Resource):
@api.expect(
api.model(
"UpdateToolConfigModel",
{
"id": fields.String(required=True, description="Tool ID"),
"config": fields.Raw(
required=True, description="Configuration of the tool"
),
},
)
)
@api.doc(description="Update the configuration of a tool")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = ["id", "config"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
user_tools_collection.update_one(
{"_id": ObjectId(data["id"]), "user": user},
{"$set": {"config": data["config"]}},
)
except Exception as err:
current_app.logger.error(
f"Error updating tool config: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)
@tools_ns.route("/update_tool_actions")
class UpdateToolActions(Resource):
@api.expect(
api.model(
"UpdateToolActionsModel",
{
"id": fields.String(required=True, description="Tool ID"),
"actions": fields.List(
fields.Raw,
required=True,
description="Actions the tool can perform",
),
},
)
)
@api.doc(description="Update the actions of a tool")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = ["id", "actions"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
user_tools_collection.update_one(
{"_id": ObjectId(data["id"]), "user": user},
{"$set": {"actions": data["actions"]}},
)
except Exception as err:
current_app.logger.error(
f"Error updating tool actions: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)
@tools_ns.route("/update_tool_status")
class UpdateToolStatus(Resource):
@api.expect(
api.model(
"UpdateToolStatusModel",
{
"id": fields.String(required=True, description="Tool ID"),
"status": fields.Boolean(
required=True, description="Status of the tool"
),
},
)
)
@api.doc(description="Update the status of a tool")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = ["id", "status"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
user_tools_collection.update_one(
{"_id": ObjectId(data["id"]), "user": user},
{"$set": {"status": data["status"]}},
)
except Exception as err:
current_app.logger.error(
f"Error updating tool status: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)
@tools_ns.route("/delete_tool")
class DeleteTool(Resource):
@api.expect(
api.model(
"DeleteToolModel",
{"id": fields.String(required=True, description="Tool ID")},
)
)
@api.doc(description="Delete a tool by ID")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json()
required_fields = ["id"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
result = user_tools_collection.delete_one(
{"_id": ObjectId(data["id"]), "user": user}
)
if result.deleted_count == 0:
return {"success": False, "message": "Tool not found"}, 404
except Exception as err:
current_app.logger.error(f"Error deleting tool: {err}", exc_info=True)
return {"success": False}, 400
return {"success": True}, 200

View File

@@ -23,10 +23,22 @@ class Settings(BaseSettings):
LLM_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
DEFAULT_MAX_HISTORY: int = 150
LLM_TOKEN_LIMITS: dict = {
"gpt-4o": 128000,
"gpt-4o-mini": 128000,
"gpt-4": 8192,
"gpt-3.5-turbo": 4096,
"claude-2": 1e5,
"gemini-2.0-flash-exp": 1e6,
"claude-2": int(1e5),
"gemini-2.5-flash": int(1e6),
}
DEFAULT_LLM_TOKEN_LIMIT: int = 128000
RESERVED_TOKENS: dict = {
"system_prompt": 500,
"current_query": 500,
"safety_buffer": 1000,
}
DEFAULT_AGENT_LIMITS: dict = {
"token_limit": 50000,
"request_limit": 500,
}
UPLOAD_FOLDER: str = "inputs"
PARSE_PDF_AS_IMAGE: bool = False
@@ -41,11 +53,18 @@ class Settings(BaseSettings):
FALLBACK_LLM_API_KEY: Optional[str] = None # api key for fallback llm
# Google Drive integration
GOOGLE_CLIENT_ID: Optional[str] = None # Replace with your actual Google OAuth client ID
GOOGLE_CLIENT_SECRET: Optional[str] = None# Replace with your actual Google OAuth client secret
CONNECTOR_REDIRECT_BASE_URI: Optional[str] = "http://127.0.0.1:7091/api/connectors/callback"
##append ?provider={provider_name} in your Provider console like http://127.0.0.1:7091/api/connectors/callback?provider=google_drive
GOOGLE_CLIENT_ID: Optional[str] = (
None # Replace with your actual Google OAuth client ID
)
GOOGLE_CLIENT_SECRET: Optional[str] = (
None # Replace with your actual Google OAuth client secret
)
CONNECTOR_REDIRECT_BASE_URI: Optional[str] = (
"http://127.0.0.1:7091/api/connectors/callback" ##add redirect url as it is to your provider's console(gcp)
)
# GitHub source
GITHUB_ACCESS_TOKEN: Optional[str] = None # PAT token with read repo access
# LLM Cache
CACHE_REDIS_URL: str = "redis://localhost:6379/2"
@@ -96,7 +115,7 @@ class Settings(BaseSettings):
QDRANT_HOST: Optional[str] = None
QDRANT_PATH: Optional[str] = None
QDRANT_DISTANCE_FUNC: str = "Cosine"
# PGVector vectorstore config
PGVECTOR_CONNECTION_STRING: Optional[str] = None
# Milvus vectorstore config
@@ -116,6 +135,14 @@ class Settings(BaseSettings):
JWT_SECRET_KEY: str = ""
# Encryption settings
ENCRYPTION_SECRET_KEY: str = "default-docsgpt-encryption-key"
TTS_PROVIDER: str = "google_tts" # google_tts or elevenlabs
ELEVENLABS_API_KEY: Optional[str] = None
# Tool pre-fetch settings
ENABLE_TOOL_PREFETCH: bool = True
path = Path(__file__).parent.parent.absolute()
settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")

View File

@@ -46,5 +46,9 @@ class AnthropicLLM(BaseLLM):
stream=True,
)
for completion in stream_response:
yield completion.completion
try:
for completion in stream_response:
yield completion.completion
finally:
if hasattr(stream_response, 'close'):
stream_response.close()

View File

@@ -44,6 +44,12 @@ class BaseLLM(ABC):
)
return self._fallback_llm
@staticmethod
def _remove_null_values(args_dict):
if not isinstance(args_dict, dict):
return args_dict
return {k: v for k, v in args_dict.items() if v is not None}
def _execute_with_fallback(
self, method_name: str, decorators: list, *args, **kwargs
):

View File

@@ -33,14 +33,15 @@ class DocsGPTAPILLM(BaseLLM):
{"role": role, "content": item["text"]}
)
elif "function_call" in item:
cleaned_args = self._remove_null_values(
item["function_call"]["args"]
)
tool_call = {
"id": item["function_call"]["call_id"],
"type": "function",
"function": {
"name": item["function_call"]["name"],
"arguments": json.dumps(
item["function_call"]["args"]
),
"arguments": json.dumps(cleaned_args),
},
}
cleaned_messages.append(
@@ -121,11 +122,19 @@ class DocsGPTAPILLM(BaseLLM):
model="docsgpt", messages=messages, stream=stream, **kwargs
)
for line in response:
if len(line.choices) > 0 and line.choices[0].delta.content is not None and len(line.choices[0].delta.content) > 0:
yield line.choices[0].delta.content
elif len(line.choices) > 0:
yield line.choices[0]
try:
for line in response:
if (
len(line.choices) > 0
and line.choices[0].delta.content is not None
and len(line.choices[0].delta.content) > 0
):
yield line.choices[0].delta.content
elif len(line.choices) > 0:
yield line.choices[0]
finally:
if hasattr(response, 'close'):
response.close()
def _supports_tools(self):
return True

View File

@@ -143,6 +143,7 @@ class GoogleLLM(BaseLLM):
raise
def _clean_messages_google(self, messages):
"""Convert OpenAI format messages to Google AI format."""
cleaned_messages = []
for message in messages:
role = message.get("role")
@@ -150,6 +151,8 @@ class GoogleLLM(BaseLLM):
if role == "assistant":
role = "model"
elif role == "tool":
role = "model"
parts = []
if role and content is not None:
@@ -160,10 +163,14 @@ class GoogleLLM(BaseLLM):
if "text" in item:
parts.append(types.Part.from_text(text=item["text"]))
elif "function_call" in item:
# Remove null values from args to avoid API errors
cleaned_args = self._remove_null_values(
item["function_call"]["args"]
)
parts.append(
types.Part.from_function_call(
name=item["function_call"]["name"],
args=item["function_call"]["args"],
args=cleaned_args,
)
)
elif "function_response" in item:
@@ -188,11 +195,63 @@ class GoogleLLM(BaseLLM):
else:
raise ValueError(f"Unexpected content type: {type(content)}")
cleaned_messages.append(types.Content(role=role, parts=parts))
if parts:
cleaned_messages.append(types.Content(role=role, parts=parts))
return cleaned_messages
def _clean_schema(self, schema_obj):
"""
Recursively remove unsupported fields from schema objects
and validate required properties.
"""
if not isinstance(schema_obj, dict):
return schema_obj
allowed_fields = {
"type",
"description",
"items",
"properties",
"required",
"enum",
"pattern",
"minimum",
"maximum",
"nullable",
"default",
}
cleaned = {}
for key, value in schema_obj.items():
if key not in allowed_fields:
continue
elif key == "type" and isinstance(value, str):
cleaned[key] = value.upper()
elif isinstance(value, dict):
cleaned[key] = self._clean_schema(value)
elif isinstance(value, list):
cleaned[key] = [self._clean_schema(item) for item in value]
else:
cleaned[key] = value
# Validate that required properties actually exist in properties
if "required" in cleaned and "properties" in cleaned:
valid_required = []
properties_keys = set(cleaned["properties"].keys())
for required_prop in cleaned["required"]:
if required_prop in properties_keys:
valid_required.append(required_prop)
if valid_required:
cleaned["required"] = valid_required
else:
cleaned.pop("required", None)
elif "required" in cleaned and "properties" not in cleaned:
cleaned.pop("required", None)
return cleaned
def _clean_tools_format(self, tools_list):
"""Convert OpenAI format tools to Google AI format."""
genai_tools = []
for tool_data in tools_list:
if tool_data["type"] == "function":
@@ -201,18 +260,16 @@ class GoogleLLM(BaseLLM):
properties = parameters.get("properties", {})
if properties:
cleaned_properties = {}
for k, v in properties.items():
cleaned_properties[k] = self._clean_schema(v)
genai_function = dict(
name=function["name"],
description=function["description"],
parameters={
"type": "OBJECT",
"properties": {
k: {
**v,
"type": v["type"].upper() if v["type"] else None,
}
for k, v in properties.items()
},
"properties": cleaned_properties,
"required": (
parameters["required"]
if "required" in parameters
@@ -242,6 +299,7 @@ class GoogleLLM(BaseLLM):
response_schema=None,
**kwargs,
):
"""Generate content using Google AI API without streaming."""
client = genai.Client(api_key=self.api_key)
if formatting == "openai":
messages = self._clean_messages_google(messages)
@@ -281,6 +339,7 @@ class GoogleLLM(BaseLLM):
response_schema=None,
**kwargs,
):
"""Generate content using Google AI API with streaming."""
client = genai.Client(api_key=self.api_key)
if formatting == "openai":
messages = self._clean_messages_google(messages)
@@ -318,25 +377,32 @@ class GoogleLLM(BaseLLM):
config=config,
)
for chunk in response:
if hasattr(chunk, "candidates") and chunk.candidates:
for candidate in chunk.candidates:
if candidate.content and candidate.content.parts:
for part in candidate.content.parts:
if part.function_call:
yield part
elif part.text:
yield part.text
elif hasattr(chunk, "text"):
yield chunk.text
try:
for chunk in response:
if hasattr(chunk, "candidates") and chunk.candidates:
for candidate in chunk.candidates:
if candidate.content and candidate.content.parts:
for part in candidate.content.parts:
if part.function_call:
yield part
elif part.text:
yield part.text
elif hasattr(chunk, "text"):
yield chunk.text
finally:
if hasattr(response, "close"):
response.close()
def _supports_tools(self):
"""Return whether this LLM supports function calling."""
return True
def _supports_structured_output(self):
"""Return whether this LLM supports structured JSON output."""
return True
def prepare_structured_output_format(self, json_schema):
"""Convert JSON schema to Google AI structured output format."""
if not json_schema:
return None

View File

@@ -205,7 +205,6 @@ class LLMHandler(ABC):
except StopIteration as e:
tool_response, call_id = e.value
break
updated_messages.append(
{
"role": "assistant",
@@ -222,17 +221,36 @@ class LLMHandler(ABC):
)
updated_messages.append(self.create_tool_message(call, tool_response))
except Exception as e:
logger.error(f"Error executing tool: {str(e)}", exc_info=True)
updated_messages.append(
{
"role": "tool",
"content": f"Error executing tool: {str(e)}",
"tool_call_id": call.id,
}
error_call = ToolCall(
id=call.id, name=call.name, arguments=call.arguments
)
error_response = f"Error executing tool: {str(e)}"
error_message = self.create_tool_message(error_call, error_response)
updated_messages.append(error_message)
call_parts = call.name.split("_")
if len(call_parts) >= 2:
tool_id = call_parts[-1] # Last part is tool ID (e.g., "1")
action_name = "_".join(call_parts[:-1])
tool_name = tools_dict.get(tool_id, {}).get("name", "unknown_tool")
full_action_name = f"{action_name}_{tool_id}"
else:
tool_name = "unknown_tool"
action_name = call.name
full_action_name = call.name
yield {
"type": "tool_call",
"data": {
"tool_name": tool_name,
"call_id": call.id,
"action_name": full_action_name,
"arguments": call.arguments,
"error": error_response,
"status": "error",
},
}
return updated_messages
def handle_non_streaming(
@@ -263,13 +281,11 @@ class LLMHandler(ABC):
except StopIteration as e:
messages = e.value
break
response = agent.llm.gen(
model=agent.gpt_model, messages=messages, tools=agent.tools
)
parsed = self.parse_response(response)
self.llm_calls.append(build_stack_data(agent.llm))
return parsed.content
def handle_streaming(

View File

@@ -17,7 +17,6 @@ class GoogleLLMHandler(LLMHandler):
finish_reason="stop",
raw_response=response,
)
if hasattr(response, "candidates"):
parts = response.candidates[0].content.parts if response.candidates else []
tool_calls = [
@@ -41,7 +40,6 @@ class GoogleLLMHandler(LLMHandler):
finish_reason="tool_calls" if tool_calls else "stop",
raw_response=response,
)
else:
tool_calls = []
if hasattr(response, "function_call"):
@@ -61,14 +59,16 @@ class GoogleLLMHandler(LLMHandler):
def create_tool_message(self, tool_call: ToolCall, result: Any) -> Dict:
"""Create Google-style tool message."""
from google.genai import types
return {
"role": "tool",
"role": "model",
"content": [
types.Part.from_function_response(
name=tool_call.name, response={"result": result}
).to_json_dict()
{
"function_response": {
"name": tool_call.name,
"response": {"result": result},
}
}
],
}

View File

@@ -44,14 +44,15 @@ class OpenAILLM(BaseLLM):
{"role": role, "content": item["text"]}
)
elif "function_call" in item:
cleaned_args = self._remove_null_values(
item["function_call"]["args"]
)
tool_call = {
"id": item["function_call"]["call_id"],
"type": "function",
"function": {
"name": item["function_call"]["name"],
"arguments": json.dumps(
item["function_call"]["args"]
),
"arguments": json.dumps(cleaned_args),
},
}
cleaned_messages.append(
@@ -170,15 +171,19 @@ class OpenAILLM(BaseLLM):
response = self.client.chat.completions.create(**request_params)
for line in response:
if (
len(line.choices) > 0
and line.choices[0].delta.content is not None
and len(line.choices[0].delta.content) > 0
):
yield line.choices[0].delta.content
elif len(line.choices) > 0:
yield line.choices[0]
try:
for line in response:
if (
len(line.choices) > 0
and line.choices[0].delta.content is not None
and len(line.choices[0].delta.content) > 0
):
yield line.choices[0].delta.content
elif len(line.choices) > 0:
yield line.choices[0]
finally:
if hasattr(response, "close"):
response.close()
def _supports_tools(self):
return True

View File

@@ -17,14 +17,13 @@ class GoogleDriveAuth(BaseConnectorAuth):
"""
SCOPES = [
'https://www.googleapis.com/auth/drive.readonly',
'https://www.googleapis.com/auth/drive.metadata.readonly'
'https://www.googleapis.com/auth/drive.file'
]
def __init__(self):
self.client_id = settings.GOOGLE_CLIENT_ID
self.client_secret = settings.GOOGLE_CLIENT_SECRET
self.redirect_uri = f"{settings.CONNECTOR_REDIRECT_BASE_URI}?provider=google_drive"
self.redirect_uri = f"{settings.CONNECTOR_REDIRECT_BASE_URI}"
if not self.client_id or not self.client_secret:
raise ValueError("Google OAuth credentials not configured. Please set GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET in settings.")
@@ -50,7 +49,7 @@ class GoogleDriveAuth(BaseConnectorAuth):
authorization_url, _ = flow.authorization_url(
access_type='offline',
prompt='consent',
include_granted_scopes='true',
include_granted_scopes='false',
state=state
)

View File

@@ -32,6 +32,10 @@ class GoogleDriveLoader(BaseConnectorLoader):
'text/plain': '.txt',
'text/csv': '.csv',
'text/html': '.html',
'text/markdown': '.md',
'text/x-rst': '.rst',
'application/json': '.json',
'application/epub+zip': '.epub',
'application/rtf': '.rtf',
'image/jpeg': '.jpg',
'image/jpg': '.jpg',
@@ -120,6 +124,7 @@ class GoogleDriveLoader(BaseConnectorLoader):
list_only = inputs.get('list_only', False)
load_content = not list_only
page_token = inputs.get('page_token')
search_query = inputs.get('search_query')
self.next_page_token = None
if file_ids:
@@ -128,12 +133,18 @@ class GoogleDriveLoader(BaseConnectorLoader):
try:
doc = self._load_file_by_id(file_id, load_content=load_content)
if doc:
documents.append(doc)
if not search_query or (
search_query.lower() in doc.extra_info.get('file_name', '').lower()
):
documents.append(doc)
elif hasattr(self, '_credential_refreshed') and self._credential_refreshed:
self._credential_refreshed = False
logging.info(f"Retrying load of file {file_id} after credential refresh")
doc = self._load_file_by_id(file_id, load_content=load_content)
if doc:
if doc and (
not search_query or
search_query.lower() in doc.extra_info.get('file_name', '').lower()
):
documents.append(doc)
except Exception as e:
logging.error(f"Error loading file {file_id}: {e}")
@@ -141,7 +152,13 @@ class GoogleDriveLoader(BaseConnectorLoader):
else:
# Browsing mode: list immediate children of provided folder or root
parent_id = folder_id if folder_id else 'root'
documents = self._list_items_in_parent(parent_id, limit=limit, load_content=load_content, page_token=page_token)
documents = self._list_items_in_parent(
parent_id,
limit=limit,
load_content=load_content,
page_token=page_token,
search_query=search_query
)
logging.info(f"Loaded {len(documents)} documents from Google Drive")
return documents
@@ -184,13 +201,18 @@ class GoogleDriveLoader(BaseConnectorLoader):
return None
def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None) -> List[Document]:
def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None, search_query: Optional[str] = None) -> List[Document]:
self._ensure_service()
documents: List[Document] = []
try:
query = f"'{parent_id}' in parents and trashed=false"
if search_query:
safe_search = search_query.replace("'", "\\'")
query += f" and name contains '{safe_search}'"
next_token_out: Optional[str] = None
while True:
@@ -205,7 +227,8 @@ class GoogleDriveLoader(BaseConnectorLoader):
q=query,
fields='nextPageToken,files(id,name,mimeType,size,createdTime,modifiedTime,parents)',
pageToken=page_token,
pageSize=page_size
pageSize=page_size,
orderBy='name'
).execute()
items = results.get('files', [])

View File

@@ -1,5 +1,6 @@
import os
import logging
from typing import List, Any
from retry import retry
from tqdm import tqdm
from application.core.settings import settings
@@ -22,13 +23,16 @@ def sanitize_content(content: str) -> str:
@retry(tries=10, delay=60)
def add_text_to_store_with_retry(store, doc, source_id):
"""
Add a document's text and metadata to the vector store with retry logic.
def add_text_to_store_with_retry(store: Any, doc: Any, source_id: str) -> None:
"""Add a document's text and metadata to the vector store with retry logic.
Args:
store: The vector store object.
doc: The document to be added.
source_id: Unique identifier for the source.
Raises:
Exception: If document addition fails after all retry attempts.
"""
try:
# Sanitize content to remove NUL characters that cause ingestion failures
@@ -41,18 +45,21 @@ def add_text_to_store_with_retry(store, doc, source_id):
raise
def embed_and_store_documents(docs, folder_name, source_id, task_status):
"""
Embeds documents and stores them in a vector store.
def embed_and_store_documents(docs: List[Any], folder_name: str, source_id: str, task_status: Any) -> None:
"""Embeds documents and stores them in a vector store.
Args:
docs (list): List of documents to be embedded and stored.
folder_name (str): Directory to save the vector store.
source_id (str): Unique identifier for the source.
docs: List of documents to be embedded and stored.
folder_name: Directory to save the vector store.
source_id: Unique identifier for the source.
task_status: Task state manager for progress updates.
Returns:
None
Raises:
OSError: If unable to create folder or save vector store.
Exception: If vector store creation or document embedding fails.
"""
# Ensure the folder exists
if not os.path.exists(folder_name):
@@ -95,10 +102,21 @@ def embed_and_store_documents(docs, folder_name, source_id, task_status):
except Exception as e:
logging.error(f"Error embedding document {idx}: {e}", exc_info=True)
logging.info(f"Saving progress at document {idx} out of {total_docs}")
store.save_local(folder_name)
try:
store.save_local(folder_name)
logging.info("Progress saved successfully")
except Exception as save_error:
logging.error(f"CRITICAL: Failed to save progress: {save_error}", exc_info=True)
# Continue without breaking to attempt final save
break
# Save the vector store
if settings.VECTOR_STORE == "faiss":
store.save_local(folder_name)
logging.info("Vector store saved successfully.")
try:
store.save_local(folder_name)
logging.info("Vector store saved successfully.")
except Exception as e:
logging.error(f"CRITICAL: Failed to save final vector store: {e}", exc_info=True)
raise OSError(f"Unable to save vector store to {folder_name}: {e}") from e
else:
logging.info("Vector store saved successfully.")

View File

@@ -1,44 +1,135 @@
import base64
import requests
from typing import List
import time
from typing import List, Optional
from application.parser.remote.base import BaseRemote
from langchain_core.documents import Document
from application.parser.schema.base import Document
import mimetypes
from application.core.settings import settings
class GitHubLoader(BaseRemote):
def __init__(self):
self.access_token = None
self.access_token = settings.GITHUB_ACCESS_TOKEN
self.headers = {
"Authorization": f"token {self.access_token}"
} if self.access_token else {}
"Authorization": f"token {self.access_token}",
"Accept": "application/vnd.github.v3+json"
} if self.access_token else {
"Accept": "application/vnd.github.v3+json"
}
return
def fetch_file_content(self, repo_url: str, file_path: str) -> str:
def is_text_file(self, file_path: str) -> bool:
"""Determine if a file is a text file based on extension."""
# Common text file extensions
text_extensions = {
'.txt', '.md', '.markdown', '.rst', '.json', '.xml', '.yaml', '.yml',
'.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.c', '.cpp', '.h', '.hpp',
'.cs', '.go', '.rs', '.rb', '.php', '.swift', '.kt', '.scala',
'.html', '.css', '.scss', '.sass', '.less',
'.sh', '.bash', '.zsh', '.fish',
'.sql', '.r', '.m', '.mat',
'.ini', '.cfg', '.conf', '.config', '.env',
'.gitignore', '.dockerignore', '.editorconfig',
'.log', '.csv', '.tsv'
}
# Get file extension
file_lower = file_path.lower()
for ext in text_extensions:
if file_lower.endswith(ext):
return True
# Also check MIME type
mime_type, _ = mimetypes.guess_type(file_path)
if mime_type and (mime_type.startswith("text") or mime_type in ["application/json", "application/xml"]):
return True
return False
def fetch_file_content(self, repo_url: str, file_path: str) -> Optional[str]:
"""Fetch file content. Returns None if file should be skipped (binary files or empty files)."""
url = f"https://api.github.com/repos/{repo_url}/contents/{file_path}"
response = requests.get(url, headers=self.headers)
response = self._make_request(url)
if response.status_code == 200:
content = response.json()
mime_type, _ = mimetypes.guess_type(file_path) # Guess the MIME type based on the file extension
content = response.json()
if content.get("encoding") == "base64":
if mime_type and mime_type.startswith("text"): # Handle only text files
try:
decoded_content = base64.b64decode(content["content"]).decode("utf-8")
return f"Filename: {file_path}\n\n{decoded_content}"
except Exception as e:
raise e
else:
return f"Filename: {file_path} is a binary file and was skipped."
if content.get("encoding") == "base64":
if self.is_text_file(file_path): # Handle only text files
try:
decoded_content = base64.b64decode(content["content"]).decode("utf-8").strip()
# Skip empty files
if not decoded_content:
return None
return decoded_content
except Exception:
# If decoding fails, it's probably a binary file
return None
else:
return f"Filename: {file_path}\n\n{content['content']}"
# Skip binary files by returning None
return None
else:
response.raise_for_status()
file_content = content['content'].strip()
# Skip empty files
if not file_content:
return None
return file_content
def _make_request(self, url: str, max_retries: int = 3) -> requests.Response:
"""Make a request with retry logic for rate limiting"""
for attempt in range(max_retries):
response = requests.get(url, headers=self.headers)
if response.status_code == 200:
return response
elif response.status_code == 403:
# Check if it's a rate limit issue
try:
error_data = response.json()
error_msg = error_data.get("message", "")
# Check rate limit headers
remaining = response.headers.get("X-RateLimit-Remaining", "unknown")
reset_time = response.headers.get("X-RateLimit-Reset", "unknown")
print(f"GitHub API 403 Error: {error_msg}")
print(f"Rate limit remaining: {remaining}, Reset time: {reset_time}")
if "rate limit" in error_msg.lower():
if attempt < max_retries - 1:
wait_time = 2 ** attempt # Exponential backoff
print(f"Rate limit hit, waiting {wait_time} seconds before retry...")
time.sleep(wait_time)
continue
# Provide helpful error message
if remaining == "0":
raise Exception(f"GitHub API rate limit exceeded. Please set GITHUB_ACCESS_TOKEN environment variable. Reset time: {reset_time}")
else:
raise Exception(f"GitHub API error: {error_msg}. This may require authentication - set GITHUB_ACCESS_TOKEN environment variable.")
except Exception as e:
if isinstance(e, Exception) and "GitHub API" in str(e):
raise
# If we can't parse the response, raise the original error
response.raise_for_status()
else:
response.raise_for_status()
return response
def fetch_repo_files(self, repo_url: str, path: str = "") -> List[str]:
url = f"https://api.github.com/repos/{repo_url}/contents/{path}"
response = requests.get(url, headers={**self.headers, "Accept": "application/vnd.github.v3.raw"})
response = self._make_request(url)
contents = response.json()
# Handle error responses from GitHub API
if isinstance(contents, dict) and "message" in contents:
raise Exception(f"GitHub API error: {contents.get('message')}")
# Ensure contents is a list
if not isinstance(contents, list):
raise TypeError(f"Expected list from GitHub API, got {type(contents).__name__}: {contents}")
files = []
for item in contents:
if item["type"] == "file":
@@ -53,6 +144,15 @@ class GitHubLoader(BaseRemote):
documents = []
for file_path in files:
content = self.fetch_file_content(repo_name, file_path)
documents.append(Document(page_content=content, metadata={"title": file_path,
"source": f"https://github.com/{repo_name}/blob/main/{file_path}"}))
# Skip binary files (content is None)
if content is None:
continue
documents.append(Document(
text=content,
doc_id=file_path,
extra_info={
"title": file_path,
"source": f"https://github.com/{repo_name}/blob/main/{file_path}"
}
))
return documents

View File

@@ -2,6 +2,7 @@ anthropic==0.49.0
boto3==1.38.18
beautifulsoup4==4.13.4
celery==5.4.0
cryptography==42.0.8
dataclasses-json==0.6.7
docx2txt==0.8
duckduckgo-search==7.5.2
@@ -9,8 +10,10 @@ ebooklib==0.18
escodegen==1.0.11
esprima==4.0.1
esutils==1.0.1
elevenlabs==2.17.0
Flask==3.1.1
faiss-cpu==1.9.0.post1
fastmcp==2.11.0
flask-restx==1.3.0
google-genai==1.3.0
google-api-python-client==2.179.0
@@ -55,13 +58,13 @@ prompt-toolkit==3.0.51
protobuf==5.29.3
psycopg2-binary==2.9.10
py==1.11.0
pydantic==2.10.6
pydantic-core==2.27.2
pydantic-settings==2.7.1
pydantic
pydantic-core
pydantic-settings
pymongo==4.11.3
pypdf==5.5.0
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
python-dotenv
python-jose==3.4.0
python-pptx==1.0.2
redis==5.2.1
@@ -81,7 +84,7 @@ tzdata==2024.2
urllib3==2.3.0
vine==5.1.0
wcwidth==0.2.13
werkzeug==3.1.3
werkzeug>=3.1.0,<3.1.2
yarl==1.20.0
markdownify==1.1.0
tldextract==5.1.3

View File

@@ -5,14 +5,6 @@ class BaseRetriever(ABC):
def __init__(self):
pass
@abstractmethod
def gen(self, *args, **kwargs):
pass
@abstractmethod
def search(self, *args, **kwargs):
pass
@abstractmethod
def get_params(self):
pass

View File

@@ -1,8 +1,10 @@
import logging
import os
from application.core.settings import settings
from application.llm.llm_creator import LLMCreator
from application.retriever.base import BaseRetriever
from application.utils import num_tokens_from_string
from application.vectorstore.vector_creator import VectorCreator
@@ -13,28 +15,33 @@ class ClassicRAG(BaseRetriever):
chat_history=None,
prompt="",
chunks=2,
token_limit=150,
doc_token_limit=50000,
gpt_model="docsgpt",
user_api_key=None,
llm_name=settings.LLM_PROVIDER,
api_key=settings.API_KEY,
decoded_token=None,
):
self.original_question = ""
self.original_question = source.get("question", "")
self.chat_history = chat_history if chat_history is not None else []
self.prompt = prompt
self.chunks = chunks
self.gpt_model = gpt_model
self.token_limit = (
token_limit
if token_limit
< settings.LLM_TOKEN_LIMITS.get(
self.gpt_model, settings.DEFAULT_MAX_HISTORY
)
else settings.LLM_TOKEN_LIMITS.get(
self.gpt_model, settings.DEFAULT_MAX_HISTORY
)
if isinstance(chunks, str):
try:
self.chunks = int(chunks)
except ValueError:
logging.warning(
f"Invalid chunks value '{chunks}', using default value 2"
)
self.chunks = 2
else:
self.chunks = chunks
user_identifier = user_api_key if user_api_key else "default"
logging.info(
f"ClassicRAG initialized with chunks={self.chunks}, user_api_key={user_identifier}, "
f"sources={'active_docs' in source and source['active_docs'] is not None}"
)
self.gpt_model = gpt_model
self.doc_token_limit = doc_token_limit
self.user_api_key = user_api_key
self.llm_name = llm_name
self.api_key = api_key
@@ -44,26 +51,48 @@ class ClassicRAG(BaseRetriever):
user_api_key=self.user_api_key,
decoded_token=decoded_token,
)
self.vectorstore = source["active_docs"] if "active_docs" in source else None
if "active_docs" in source and source["active_docs"] is not None:
if isinstance(source["active_docs"], list):
self.vectorstores = source["active_docs"]
else:
self.vectorstores = [source["active_docs"]]
else:
self.vectorstores = []
self.question = self._rephrase_query()
self.decoded_token = decoded_token
self._validate_vectorstore_config()
def _validate_vectorstore_config(self):
"""Validate vectorstore IDs and remove any empty/invalid entries"""
if not self.vectorstores:
logging.warning("No vectorstores configured for retrieval")
return
invalid_ids = [
vs_id for vs_id in self.vectorstores if not vs_id or not vs_id.strip()
]
if invalid_ids:
logging.warning(f"Found invalid vectorstore IDs: {invalid_ids}")
self.vectorstores = [
vs_id for vs_id in self.vectorstores if vs_id and vs_id.strip()
]
def _rephrase_query(self):
"""Rephrase user query with chat history context for better retrieval"""
if (
not self.original_question
or not self.chat_history
or self.chat_history == []
or self.chunks == 0
or self.vectorstore is None
or not self.vectorstores
):
return self.original_question
prompt = f"""Given the following conversation history:
{self.chat_history}
Rephrase the following user question to be a standalone search query
that captures all relevant context from the conversation:
"""
prompt = (
"Given the following conversation history:\n"
f"{self.chat_history}\n\n"
"Rephrase the following user question to be a standalone search query "
"that captures all relevant context from the conversation:\n"
)
messages = [
{"role": "system", "content": prompt},
@@ -79,46 +108,93 @@ class ClassicRAG(BaseRetriever):
return self.original_question
def _get_data(self):
if self.chunks == 0 or self.vectorstore is None:
docs = []
else:
docsearch = VectorCreator.create_vectorstore(
settings.VECTOR_STORE, self.vectorstore, settings.EMBEDDINGS_KEY
if self.chunks == 0 or not self.vectorstores:
logging.info(
f"ClassicRAG._get_data: Skipping retrieval - chunks={self.chunks}, "
f"vectorstores_count={len(self.vectorstores) if self.vectorstores else 0}"
)
docs_temp = docsearch.search(self.question, k=self.chunks)
docs = [
{
"title": i.metadata.get(
"title", i.metadata.get("post_title", i.page_content)
).split("/")[-1],
"text": i.page_content,
"source": (
i.metadata.get("source")
if i.metadata.get("source")
else "local"
),
}
for i in docs_temp
]
return []
return docs
all_docs = []
chunks_per_source = max(1, self.chunks // len(self.vectorstores))
token_budget = max(int(self.doc_token_limit * 0.9), 100)
cumulative_tokens = 0
def gen():
pass
for vectorstore_id in self.vectorstores:
if vectorstore_id:
try:
docsearch = VectorCreator.create_vectorstore(
settings.VECTOR_STORE, vectorstore_id, settings.EMBEDDINGS_KEY
)
docs_temp = docsearch.search(
self.question, k=max(chunks_per_source * 2, 20)
)
for doc in docs_temp:
if cumulative_tokens >= token_budget:
break
if hasattr(doc, "page_content") and hasattr(doc, "metadata"):
page_content = doc.page_content
metadata = doc.metadata
else:
page_content = doc.get("text", doc.get("page_content", ""))
metadata = doc.get("metadata", {})
title = metadata.get(
"title", metadata.get("post_title", page_content)
)
if not isinstance(title, str):
title = str(title)
title = title.split("/")[-1]
filename = (
metadata.get("filename")
or metadata.get("file_name")
or metadata.get("source")
)
if isinstance(filename, str):
filename = os.path.basename(filename) or filename
else:
filename = title
if not filename:
filename = title
source_path = metadata.get("source") or vectorstore_id
doc_text_with_header = f"{filename}\n{page_content}"
doc_tokens = num_tokens_from_string(doc_text_with_header)
if cumulative_tokens + doc_tokens < token_budget:
all_docs.append(
{
"title": title,
"text": page_content,
"source": source_path,
"filename": filename,
}
)
cumulative_tokens += doc_tokens
if cumulative_tokens >= token_budget:
break
except Exception as e:
logging.error(
f"Error searching vectorstore {vectorstore_id}: {e}",
exc_info=True,
)
continue
logging.info(
f"ClassicRAG._get_data: Retrieval complete - retrieved {len(all_docs)} documents "
f"(requested chunks={self.chunks}, chunks_per_source={chunks_per_source}, "
f"cumulative_tokens={cumulative_tokens}/{token_budget})"
)
return all_docs
def search(self, query: str = ""):
"""Search for documents using optional query override"""
if query:
self.original_question = query
self.question = self._rephrase_query()
return self._get_data()
def get_params(self):
return {
"question": self.original_question,
"rephrased_question": self.question,
"source": self.vectorstore,
"chunks": self.chunks,
"token_limit": self.token_limit,
"gpt_model": self.gpt_model,
"user_api_key": self.user_api_key,
}

View File

View File

@@ -0,0 +1,85 @@
import base64
import json
import os
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.ciphers import algorithms, Cipher, modes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
from application.core.settings import settings
def _derive_key(user_id: str, salt: bytes) -> bytes:
app_secret = settings.ENCRYPTION_SECRET_KEY
password = f"{app_secret}#{user_id}".encode()
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=salt,
iterations=100000,
backend=default_backend(),
)
return kdf.derive(password)
def encrypt_credentials(credentials: dict, user_id: str) -> str:
if not credentials:
return ""
try:
salt = os.urandom(16)
iv = os.urandom(16)
key = _derive_key(user_id, salt)
json_str = json.dumps(credentials)
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
encryptor = cipher.encryptor()
padded_data = _pad_data(json_str.encode())
encrypted_data = encryptor.update(padded_data) + encryptor.finalize()
result = salt + iv + encrypted_data
return base64.b64encode(result).decode()
except Exception as e:
print(f"Warning: Failed to encrypt credentials: {e}")
return ""
def decrypt_credentials(encrypted_data: str, user_id: str) -> dict:
if not encrypted_data:
return {}
try:
data = base64.b64decode(encrypted_data.encode())
salt = data[:16]
iv = data[16:32]
encrypted_content = data[32:]
key = _derive_key(user_id, salt)
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
decryptor = cipher.decryptor()
decrypted_padded = decryptor.update(encrypted_content) + decryptor.finalize()
decrypted_data = _unpad_data(decrypted_padded)
return json.loads(decrypted_data.decode())
except Exception as e:
print(f"Warning: Failed to decrypt credentials: {e}")
return {}
def _pad_data(data: bytes) -> bytes:
block_size = 16
padding_len = block_size - (len(data) % block_size)
padding = bytes([padding_len]) * padding_len
return data + padding
def _unpad_data(data: bytes) -> bytes:
padding_len = data[-1]
return data[:-padding_len]

View File

View File

@@ -0,0 +1,26 @@
import click
from application.core.mongo_db import MongoDB
from application.core.settings import settings
from application.seed.seeder import DatabaseSeeder
@click.group()
def seed():
"""Database seeding commands"""
pass
@seed.command()
@click.option("--force", is_flag=True, help="Force reseeding even if data exists")
def init(force):
"""Initialize database with seed data"""
mongo = MongoDB.get_client()
db = mongo[settings.MONGO_DB_NAME]
seeder = DatabaseSeeder(db)
seeder.seed_initial_data(force=force)
if __name__ == "__main__":
seed()

View File

@@ -0,0 +1,36 @@
# Configuration for Premade Agents
# This file contains template agents that will be seeded into the database
agents:
# Basic Agent Template
- name: "Agent Name" # Required: Unique name for the agent
description: "What this agent does" # Required: Brief description of the agent's purpose
image: "URL_TO_IMAGE" # Optional: URL to agent's avatar/image
agent_type: "classic" # Required: Type of agent (e.g., classic, react, etc.)
prompt_id: "default" # Optional: Reference to prompt template
prompt: # Optional: Define new prompt
name: "New Prompt"
content: "You are new agent with cool new prompt."
chunks: "0" # Optional: Chunking strategy for documents
retriever: "" # Optional: Retriever type for document search
# Source Configuration (where the agent gets its knowledge)
source: # Optional: Select a source to link with agent
name: "Source Display Name" # Human-readable name for the source
url: "https://example.com/data-source" # URL or path to knowledge source
loader: "url" # Type of loader (url, pdf, txt, etc.)
# Tools Configuration (what capabilities the agent has)
tools: # Optional: Remove if agent doesn't need tools
- name: "tool_name" # Must match a supported tool name
display_name: "Tool Display Name" # Optional: Human-readable name for the tool
config:
# Tool-specific configuration
# Example for DuckDuckGo:
# token: "${DDG_API_KEY}" # ${} denotes environment variable
# Add more tools as needed
# - name: "another_tool"
# config:
# param1: "value1"
# param2: "${ENV_VAR}"

View File

@@ -0,0 +1,94 @@
# Configuration for Premade Agents
agents:
- name: "Assistant"
description: "Your general-purpose AI assistant. Ready to help with a wide range of tasks."
image: "https://d3dg1063dc54p9.cloudfront.net/imgs/agents/agent-logo.svg"
agent_type: "classic"
prompt_id: "default"
chunks: "0"
retriever: ""
# Tools Configuration
tools:
- name: "tool_name"
display_name: "read_webpage"
config:
- name: "Researcher"
description: "A specialized research agent that performs deep dives into subjects."
image: "https://d3dg1063dc54p9.cloudfront.net/imgs/agents/agent-researcher.svg"
agent_type: "react"
prompt:
name: "Researcher-Agent"
content: |
You are a specialized AI research assistant, DocsGPT. Your primary function is to conduct in-depth research on a given subject or question. You are methodical, thorough, and analytical. You should perform multiple iterations of thinking to gather and synthesize information before providing a final, comprehensive answer.
You have access to the 'Read Webpage' tool. Use this tool to explore sources, gather data, and deepen your understanding. Be proactive in using the tool to fill in knowledge gaps and validate information.
Users can Upload documents for your context as attachments or sources via UI using the Conversation input box.
If appropriate, your answers can include code examples, formatted as follows:
```(language)
(code)
```
Users are also able to see charts and diagrams if you use them with valid mermaid syntax in your responses. Try to respond with mermaid charts if visualization helps with users queries. You effectively utilize chat history, ensuring relevant and tailored responses. Try to use additional provided context if it's available, otherwise use your knowledge and tool capabilities.
----------------
Possible additional context from uploaded sources:
{summaries}
chunks: "0"
retriever: ""
# Tools Configuration
tools:
- name: "tool_name"
display_name: "read_webpage"
config:
- name: "Search Widget"
description: "A powerful search widget agent. Ask it anything about DocsGPT"
image: "https://d3dg1063dc54p9.cloudfront.net/imgs/agents/agent-search.svg"
agent_type: "classic"
prompt:
name: "Search-Agent"
content: |
You are a website search assistant, DocsGPT. Your sole purpose is to help users find information within the provided context of the DocsGPT documentation. Act as a specialized search engine.
Your answers must be based *only* on the provided context. Do not use any external knowledge. If the answer is not in the context, inform the user that you could not find the information within the documentation.
Keep your responses concise and directly related to the user's query, pointing them to the most relevant information.
----------------
Possible additional context from uploaded sources:
{summaries}
chunks: "8"
retriever: ""
source:
name: "DocsGPT-Docs"
url: "https://d3dg1063dc54p9.cloudfront.net/agent-source/docsgpt-documentation.md" # URL to DocsGPT documentation
loader: "url"
- name: "Support Widget"
description: "A friendly support widget agent to help you with any questions."
image: "https://d3dg1063dc54p9.cloudfront.net/imgs/agents/agent-support.svg"
agent_type: "classic"
prompt:
name: "Support-Agent"
content: |
You are a helpful AI support widget agent, DocsGPT. Your goal is to assist users by answering their questions about our website, product and its features. Provide friendly, clear, and direct support.
Your knowledge is strictly limited to the provided context from the DocsGPT documentation. You must not answer questions outside of this scope. If a user asks something you cannot answer from the context, politely state that you can only help with questions about this website.
Effectively utilize chat history to understand the user's issue fully. Guide users to the information they need in a helpful and conversational manner.
----------------
Possible additional context from uploaded sources:
{summaries}
chunks: "8"
retriever: ""
source:
name: "DocsGPT-Docs"
url: "https://d3dg1063dc54p9.cloudfront.net/agent-source/docsgpt-documentation.md" # URL to DocsGPT documentation
loader: "url"

277
application/seed/seeder.py Normal file
View File

@@ -0,0 +1,277 @@
import logging
import os
from datetime import datetime, timezone
from typing import Dict, List, Optional, Union
import yaml
from bson import ObjectId
from bson.dbref import DBRef
from dotenv import load_dotenv
from pymongo import MongoClient
from application.agents.tools.tool_manager import ToolManager
from application.api.user.tasks import ingest_remote
load_dotenv()
tool_config = {}
tool_manager = ToolManager(config=tool_config)
class DatabaseSeeder:
def __init__(self, db):
self.db = db
self.tools_collection = self.db["user_tools"]
self.sources_collection = self.db["sources"]
self.agents_collection = self.db["agents"]
self.prompts_collection = self.db["prompts"]
self.system_user_id = "system"
self.logger = logging.getLogger(__name__)
def seed_initial_data(self, config_path: str = None, force=False):
"""Main entry point for seeding all initial data"""
if not force and self._is_already_seeded():
self.logger.info("Database already seeded. Use force=True to reseed.")
return
config_path = config_path or os.path.join(
os.path.dirname(__file__), "config", "premade_agents.yaml"
)
try:
with open(config_path, "r") as f:
config = yaml.safe_load(f)
self._seed_from_config(config)
except Exception as e:
self.logger.error(f"Failed to load seeding config: {str(e)}")
raise
def _seed_from_config(self, config: Dict):
"""Seed all data from configuration"""
self.logger.info("🌱 Starting seeding...")
if not config.get("agents"):
self.logger.warning("No agents found in config")
return
used_tool_ids = set()
for agent_config in config["agents"]:
try:
self.logger.info(f"Processing agent: {agent_config['name']}")
# 1. Handle Source
source_result = self._handle_source(agent_config)
if source_result is False:
self.logger.error(
f"Skipping agent {agent_config['name']} due to source ingestion failure"
)
continue
source_id = source_result
# 2. Handle Tools
tool_ids = self._handle_tools(agent_config)
if len(tool_ids) == 0:
self.logger.warning(
f"No valid tools for agent {agent_config['name']}"
)
used_tool_ids.update(tool_ids)
# 3. Handle Prompt
prompt_id = self._handle_prompt(agent_config)
# 4. Create Agent
agent_data = {
"user": self.system_user_id,
"name": agent_config["name"],
"description": agent_config["description"],
"image": agent_config.get("image", ""),
"source": (
DBRef("sources", ObjectId(source_id)) if source_id else ""
),
"tools": [str(tid) for tid in tool_ids],
"agent_type": agent_config["agent_type"],
"prompt_id": prompt_id or agent_config.get("prompt_id", "default"),
"chunks": agent_config.get("chunks", "0"),
"retriever": agent_config.get("retriever", ""),
"status": "template",
"createdAt": datetime.now(timezone.utc),
"updatedAt": datetime.now(timezone.utc),
}
existing = self.agents_collection.find_one(
{"user": self.system_user_id, "name": agent_config["name"]}
)
if existing:
self.logger.info(f"Updating existing agent: {agent_config['name']}")
self.agents_collection.update_one(
{"_id": existing["_id"]}, {"$set": agent_data}
)
agent_id = existing["_id"]
else:
self.logger.info(f"Creating new agent: {agent_config['name']}")
result = self.agents_collection.insert_one(agent_data)
agent_id = result.inserted_id
self.logger.info(
f"Successfully processed agent: {agent_config['name']} (ID: {agent_id})"
)
except Exception as e:
self.logger.error(
f"Error processing agent {agent_config['name']}: {str(e)}"
)
continue
self.logger.info("✅ Database seeding completed")
def _handle_source(self, agent_config: Dict) -> Union[ObjectId, None, bool]:
"""Handle source ingestion and return source ID"""
if not agent_config.get("source"):
self.logger.info(
"No source provided for agent - will create agent without source"
)
return None
source_config = agent_config["source"]
self.logger.info(f"Ingesting source: {source_config['url']}")
try:
existing = self.sources_collection.find_one(
{"user": self.system_user_id, "remote_data": source_config["url"]}
)
if existing:
self.logger.info(f"Source already exists: {existing['_id']}")
return existing["_id"]
# Ingest new source using worker
task = ingest_remote.delay(
source_data=source_config["url"],
job_name=source_config["name"],
user=self.system_user_id,
loader=source_config.get("loader", "url"),
)
result = task.get(timeout=300)
if not task.successful():
raise Exception(f"Source ingestion failed: {result}")
source_id = None
if isinstance(result, dict) and "id" in result:
source_id = result["id"]
else:
raise Exception(f"Source ingestion result missing 'id': {result}")
self.logger.info(f"Source ingested successfully: {source_id}")
return source_id
except Exception as e:
self.logger.error(f"Failed to ingest source: {str(e)}")
return False
def _handle_tools(self, agent_config: Dict) -> List[ObjectId]:
"""Handle tool creation and return list of tool IDs"""
tool_ids = []
if not agent_config.get("tools"):
return tool_ids
for tool_config in agent_config["tools"]:
try:
tool_name = tool_config["name"]
processed_config = self._process_config(tool_config.get("config", {}))
self.logger.info(f"Processing tool: {tool_name}")
existing = self.tools_collection.find_one(
{
"user": self.system_user_id,
"name": tool_name,
"config": processed_config,
}
)
if existing:
self.logger.info(f"Tool already exists: {existing['_id']}")
tool_ids.append(existing["_id"])
continue
tool_data = {
"user": self.system_user_id,
"name": tool_name,
"displayName": tool_config.get("display_name", tool_name),
"description": tool_config.get("description", ""),
"actions": tool_manager.tools[tool_name].get_actions_metadata(),
"config": processed_config,
"status": True,
}
result = self.tools_collection.insert_one(tool_data)
tool_ids.append(result.inserted_id)
self.logger.info(f"Created new tool: {result.inserted_id}")
except Exception as e:
self.logger.error(f"Failed to process tool {tool_name}: {str(e)}")
continue
return tool_ids
def _handle_prompt(self, agent_config: Dict) -> Optional[str]:
"""Handle prompt creation and return prompt ID"""
if not agent_config.get("prompt"):
return None
prompt_config = agent_config["prompt"]
prompt_name = prompt_config.get("name", f"{agent_config['name']} Prompt")
prompt_content = prompt_config.get("content", "")
if not prompt_content:
self.logger.warning(
f"No prompt content provided for agent {agent_config['name']}"
)
return None
self.logger.info(f"Processing prompt: {prompt_name}")
try:
existing = self.prompts_collection.find_one(
{
"user": self.system_user_id,
"name": prompt_name,
"content": prompt_content,
}
)
if existing:
self.logger.info(f"Prompt already exists: {existing['_id']}")
return str(existing["_id"])
prompt_data = {
"name": prompt_name,
"content": prompt_content,
"user": self.system_user_id,
}
result = self.prompts_collection.insert_one(prompt_data)
prompt_id = str(result.inserted_id)
self.logger.info(f"Created new prompt: {prompt_id}")
return prompt_id
except Exception as e:
self.logger.error(f"Failed to process prompt {prompt_name}: {str(e)}")
return None
def _process_config(self, config: Dict) -> Dict:
"""Process config values to replace environment variables"""
processed = {}
for key, value in config.items():
if (
isinstance(value, str)
and value.startswith("${")
and value.endswith("}")
):
env_var = value[2:-1]
processed[key] = os.getenv(env_var, "")
else:
processed[key] = value
return processed
def _is_already_seeded(self) -> bool:
"""Check if premade agents already exist"""
return self.agents_collection.count_documents({"user": self.system_user_id}) > 0
@classmethod
def initialize_from_env(cls, worker=None):
"""Factory method to create seeder from environment"""
mongo_uri = os.getenv("MONGO_URI", "mongodb://localhost:27017")
db_name = os.getenv("MONGO_DB_NAME", "docsgpt")
client = MongoClient(mongo_uri)
db = client[db_name]
return cls(db)

View File

@@ -26,7 +26,7 @@ class LocalStorage(BaseStorage):
return path
return os.path.join(self.base_dir, path)
def save_file(self, file_data: BinaryIO, path: str) -> dict:
def save_file(self, file_data: BinaryIO, path: str, **kwargs) -> dict:
"""Save a file to local storage."""
full_path = self._get_full_path(path)

View File

View File

@@ -0,0 +1,190 @@
import logging
import uuid
from abc import ABC, abstractmethod
from datetime import datetime, timezone
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
class NamespaceBuilder(ABC):
"""Base class for building template context namespaces"""
@abstractmethod
def build(self, **kwargs) -> Dict[str, Any]:
"""Build namespace context dictionary"""
pass
@property
@abstractmethod
def namespace_name(self) -> str:
"""Name of this namespace for template access"""
pass
class SystemNamespace(NamespaceBuilder):
"""System metadata namespace: {{ system.* }}"""
@property
def namespace_name(self) -> str:
return "system"
def build(
self, request_id: Optional[str] = None, user_id: Optional[str] = None, **kwargs
) -> Dict[str, Any]:
"""
Build system context with metadata.
Args:
request_id: Unique request identifier
user_id: Current user identifier
Returns:
Dictionary with system variables
"""
now = datetime.now(timezone.utc)
return {
"date": now.strftime("%Y-%m-%d"),
"time": now.strftime("%H:%M:%S"),
"timestamp": now.isoformat(),
"request_id": request_id or str(uuid.uuid4()),
"user_id": user_id,
}
class PassthroughNamespace(NamespaceBuilder):
"""Request parameters namespace: {{ passthrough.* }}"""
@property
def namespace_name(self) -> str:
return "passthrough"
def build(
self, passthrough_data: Optional[Dict[str, Any]] = None, **kwargs
) -> Dict[str, Any]:
"""
Build passthrough context from request parameters.
Args:
passthrough_data: Dictionary of parameters from web request
Returns:
Dictionary with passthrough variables
"""
if not passthrough_data:
return {}
safe_data = {}
for key, value in passthrough_data.items():
if isinstance(value, (str, int, float, bool, type(None))):
safe_data[key] = value
else:
logger.warning(
f"Skipping non-serializable passthrough value for key '{key}': {type(value)}"
)
return safe_data
class SourceNamespace(NamespaceBuilder):
"""RAG source documents namespace: {{ source.* }}"""
@property
def namespace_name(self) -> str:
return "source"
def build(
self, docs: Optional[list] = None, docs_together: Optional[str] = None, **kwargs
) -> Dict[str, Any]:
"""
Build source context from RAG retrieval results.
Args:
docs: List of retrieved documents
docs_together: Concatenated document content (for backward compatibility)
Returns:
Dictionary with source variables
"""
context = {}
if docs:
context["documents"] = docs
context["count"] = len(docs)
if docs_together:
context["docs_together"] = docs_together # Add docs_together for custom templates
context["content"] = docs_together
context["summaries"] = docs_together
return context
class ToolsNamespace(NamespaceBuilder):
"""Pre-executed tools namespace: {{ tools.* }}"""
@property
def namespace_name(self) -> str:
return "tools"
def build(
self, tools_data: Optional[Dict[str, Any]] = None, **kwargs
) -> Dict[str, Any]:
"""
Build tools context with pre-executed tool results.
Args:
tools_data: Dictionary of pre-fetched tool results organized by tool name
e.g., {"memory": {"notes": "content", "tasks": "list"}}
Returns:
Dictionary with tool results organized by tool name
"""
if not tools_data:
return {}
safe_data = {}
for tool_name, tool_result in tools_data.items():
if isinstance(tool_result, (str, dict, list, int, float, bool, type(None))):
safe_data[tool_name] = tool_result
else:
logger.warning(
f"Skipping non-serializable tool result for '{tool_name}': {type(tool_result)}"
)
return safe_data
class NamespaceManager:
"""Manages all namespace builders and context assembly"""
def __init__(self):
self._builders = {
"system": SystemNamespace(),
"passthrough": PassthroughNamespace(),
"source": SourceNamespace(),
"tools": ToolsNamespace(),
}
def build_context(self, **kwargs) -> Dict[str, Any]:
"""
Build complete template context from all namespaces.
Args:
**kwargs: Parameters to pass to namespace builders
Returns:
Complete context dictionary for template rendering
"""
context = {}
for namespace_name, builder in self._builders.items():
try:
namespace_context = builder.build(**kwargs)
# Always include namespace, even if empty, to prevent undefined errors
context[namespace_name] = namespace_context if namespace_context else {}
except Exception as e:
logger.error(f"Failed to build {namespace_name} namespace: {str(e)}")
# Include empty namespace on error to prevent template failures
context[namespace_name] = {}
return context
def get_builder(self, namespace_name: str) -> Optional[NamespaceBuilder]:
"""Get specific namespace builder"""
return self._builders.get(namespace_name)

View File

@@ -0,0 +1,161 @@
import logging
from typing import Any, Dict, List, Optional, Set
from jinja2 import (
ChainableUndefined,
Environment,
nodes,
select_autoescape,
TemplateSyntaxError,
)
from jinja2.exceptions import UndefinedError
logger = logging.getLogger(__name__)
class TemplateRenderError(Exception):
"""Raised when template rendering fails"""
pass
class TemplateEngine:
"""Jinja2-based template engine for dynamic prompt rendering"""
def __init__(self):
self._env = Environment(
undefined=ChainableUndefined,
trim_blocks=True,
lstrip_blocks=True,
autoescape=select_autoescape(default_for_string=True, default=True),
)
def render(self, template_content: str, context: Dict[str, Any]) -> str:
"""
Render template with provided context.
Args:
template_content: Raw template string with Jinja2 syntax
context: Dictionary of variables to inject into template
Returns:
Rendered template string
Raises:
TemplateRenderError: If template syntax is invalid or variables undefined
"""
if not template_content:
return ""
try:
template = self._env.from_string(template_content)
return template.render(**context)
except TemplateSyntaxError as e:
error_msg = f"Template syntax error at line {e.lineno}: {e.message}"
logger.error(error_msg)
raise TemplateRenderError(error_msg) from e
except UndefinedError as e:
error_msg = f"Undefined variable in template: {e.message}"
logger.error(error_msg)
raise TemplateRenderError(error_msg) from e
except Exception as e:
error_msg = f"Template rendering failed: {str(e)}"
logger.error(error_msg)
raise TemplateRenderError(error_msg) from e
def validate_template(self, template_content: str) -> bool:
"""
Validate template syntax without rendering.
Args:
template_content: Template string to validate
Returns:
True if template is syntactically valid
"""
if not template_content:
return True
try:
self._env.from_string(template_content)
return True
except TemplateSyntaxError as e:
logger.debug(f"Template syntax invalid at line {e.lineno}: {e.message}")
return False
except Exception as e:
logger.debug(f"Template validation error: {type(e).__name__}: {str(e)}")
return False
def extract_variables(self, template_content: str) -> Set[str]:
"""
Extract all variable names from template.
Args:
template_content: Template string to analyze
Returns:
Set of variable names found in template
"""
if not template_content:
return set()
try:
ast = self._env.parse(template_content)
return set(self._env.get_template_module(ast).make_module().keys())
except TemplateSyntaxError as e:
logger.debug(f"Cannot extract variables - syntax error at line {e.lineno}")
return set()
except Exception as e:
logger.debug(f"Cannot extract variables: {type(e).__name__}")
return set()
def extract_tool_usages(
self, template_content: str
) -> Dict[str, Set[Optional[str]]]:
"""Extract tool and action references from a template"""
if not template_content:
return {}
try:
ast = self._env.parse(template_content)
except TemplateSyntaxError as e:
logger.debug(f"extract_tool_usages - syntax error at line {e.lineno}")
return {}
except Exception as e:
logger.debug(f"extract_tool_usages - parse error: {type(e).__name__}")
return {}
usages: Dict[str, Set[Optional[str]]] = {}
def record(path: List[str]) -> None:
if not path:
return
tool_name = path[0]
action_name = path[1] if len(path) > 1 else None
if not tool_name:
return
tool_entry = usages.setdefault(tool_name, set())
tool_entry.add(action_name)
for node in ast.find_all(nodes.Getattr):
path = []
current = node
while isinstance(current, nodes.Getattr):
path.append(current.attr)
current = current.node
if isinstance(current, nodes.Name) and current.name == "tools":
path.reverse()
record(path)
for node in ast.find_all(nodes.Getitem):
path = []
current = node
while isinstance(current, nodes.Getitem):
key = current.arg
if isinstance(key, nodes.Const) and isinstance(key.value, str):
path.append(key.value)
else:
path = []
break
current = current.node
if path and isinstance(current, nodes.Name) and current.name == "tools":
path.reverse()
record(path)
return usages

View File

@@ -1,84 +1,31 @@
import asyncio
import websockets
import json
import base64
from io import BytesIO
import base64
from application.tts.base import BaseTTS
from application.core.settings import settings
class ElevenlabsTTS(BaseTTS):
def __init__(self):
self.api_key = 'ELEVENLABS_API_KEY'# here you should put your api key
self.model = "eleven_flash_v2_5"
self.voice = "VOICE_ID" # this is the hash code for the voice not the name!
self.write_audio = 1
def __init__(self):
from elevenlabs.client import ElevenLabs
self.client = ElevenLabs(
api_key=settings.ELEVENLABS_API_KEY,
)
def text_to_speech(self, text):
asyncio.run(self._text_to_speech_websocket(text))
lang = "en"
audio = self.client.text_to_speech.convert(
voice_id="nPczCjzI2devNBz1zQrb",
model_id="eleven_multilingual_v2",
text=text,
output_format="mp3_44100_128"
)
audio_data = BytesIO()
for chunk in audio:
audio_data.write(chunk)
audio_bytes = audio_data.getvalue()
async def _text_to_speech_websocket(self, text):
uri = f"wss://api.elevenlabs.io/v1/text-to-speech/{self.voice}/stream-input?model_id={self.model}"
websocket = await websockets.connect(uri)
payload = {
"text": " ",
"voice_settings": {
"stability": 0.5,
"similarity_boost": 0.8,
},
"xi_api_key": self.api_key,
}
await websocket.send(json.dumps(payload))
async def listen():
while 1:
try:
msg = await websocket.recv()
data = json.loads(msg)
if data.get("audio"):
print("audio received")
yield base64.b64decode(data["audio"])
elif data.get("isFinal"):
break
except websockets.exceptions.ConnectionClosed:
print("websocket closed")
break
listen_task = asyncio.create_task(self.stream(listen()))
await websocket.send(json.dumps({"text": text}))
# this is to signal the end of the text, either use this or flush
await websocket.send(json.dumps({"text": ""}))
await listen_task
async def stream(self, audio_stream):
if self.write_audio:
audio_bytes = BytesIO()
async for chunk in audio_stream:
if chunk:
audio_bytes.write(chunk)
with open("output_audio.mp3", "wb") as f:
f.write(audio_bytes.getvalue())
else:
async for chunk in audio_stream:
pass # depends on the streamer!
def test_elevenlabs_websocket():
"""
Tests the ElevenlabsTTS text_to_speech method with a sample prompt.
Prints out the base64-encoded result and writes it to 'output_audio.mp3'.
"""
# Instantiate your TTS class
tts = ElevenlabsTTS()
# Call the method with some sample text
tts.text_to_speech("Hello from ElevenLabs WebSocket!")
print("Saved audio to output_audio.mp3.")
if __name__ == "__main__":
test_elevenlabs_websocket()
# Encode to base64
audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
return audio_base64, lang

View File

@@ -0,0 +1,18 @@
from application.tts.google_tts import GoogleTTS
from application.tts.elevenlabs import ElevenlabsTTS
from application.tts.base import BaseTTS
class TTSCreator:
tts_providers = {
"google_tts": GoogleTTS,
"elevenlabs": ElevenlabsTTS,
}
@classmethod
def create_tts(cls, tts_type, *args, **kwargs)-> BaseTTS:
tts_class = cls.tts_providers.get(tts_type.lower())
if not tts_class:
raise ValueError(f"No tts class found for type {tts_type}")
return tts_class(*args, **kwargs)

View File

@@ -21,7 +21,7 @@ def get_encoding():
def get_gpt_model() -> str:
"""Get the appropriate GPT model based on provider"""
"""Get GPT model based on provider"""
model_map = {
"openai": "gpt-4o-mini",
"anthropic": "claude-2",
@@ -32,16 +32,7 @@ def get_gpt_model() -> str:
def safe_filename(filename):
"""
Creates a safe filename that preserves the original extension.
Uses secure_filename, but ensures a proper filename is returned even with non-Latin characters.
Args:
filename (str): The original filename
Returns:
str: A safe filename that can be used for storage
"""
"""Create safe filename, preserving extension. Handles non-Latin characters."""
if not filename:
return str(uuid.uuid4())
_, extension = os.path.splitext(filename)
@@ -83,8 +74,25 @@ def count_tokens_docs(docs):
return tokens
def calculate_doc_token_budget(
gpt_model: str = "gpt-4o", history_token_limit: int = 2000
) -> int:
total_context = settings.LLM_TOKEN_LIMITS.get(
gpt_model, settings.DEFAULT_LLM_TOKEN_LIMIT
)
reserved = sum(settings.RESERVED_TOKENS.values())
doc_budget = total_context - history_token_limit - reserved
return max(doc_budget, 1000)
def get_missing_fields(data, required_fields):
"""Check for missing required fields. Returns list of missing field names."""
return [field for field in required_fields if field not in data]
def check_required_fields(data, required_fields):
missing_fields = [field for field in required_fields if field not in data]
"""Validate required fields. Returns Flask 400 response if validation fails, None otherwise."""
missing_fields = get_missing_fields(data, required_fields)
if missing_fields:
return make_response(
jsonify(
@@ -98,7 +106,8 @@ def check_required_fields(data, required_fields):
return None
def validate_required_fields(data, required_fields):
def get_field_validation_errors(data, required_fields):
"""Check for missing and empty fields. Returns dict with 'missing_fields' and 'empty_fields', or None."""
missing_fields = []
empty_fields = []
@@ -107,12 +116,24 @@ def validate_required_fields(data, required_fields):
missing_fields.append(field)
elif not data[field]:
empty_fields.append(field)
errors = []
if missing_fields:
errors.append(f"Missing required fields: {', '.join(missing_fields)}")
if empty_fields:
errors.append(f"Empty values in required fields: {', '.join(empty_fields)}")
if errors:
if missing_fields or empty_fields:
return {"missing_fields": missing_fields, "empty_fields": empty_fields}
return None
def validate_required_fields(data, required_fields):
"""Validate required fields (must exist and be non-empty). Returns Flask 400 response if validation fails, None otherwise."""
errors_dict = get_field_validation_errors(data, required_fields)
if errors_dict:
errors = []
if errors_dict["missing_fields"]:
errors.append(
f"Missing required fields: {', '.join(errors_dict['missing_fields'])}"
)
if errors_dict["empty_fields"]:
errors.append(
f"Empty values in required fields: {', '.join(errors_dict['empty_fields'])}"
)
return make_response(
jsonify({"success": False, "message": " | ".join(errors)}), 400
)
@@ -124,18 +145,15 @@ def get_hash(data):
def limit_chat_history(history, max_token_limit=None, gpt_model="docsgpt"):
"""
Limits chat history based on token count.
Returns a list of messages that fit within the token limit.
"""
"""Limit chat history to fit within token limit."""
from application.core.settings import settings
max_token_limit = (
max_token_limit
if max_token_limit
and max_token_limit
< settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
else settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
< settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_LLM_TOKEN_LIMIT)
else settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_LLM_TOKEN_LIMIT)
)
if not history:
@@ -161,13 +179,17 @@ def limit_chat_history(history, max_token_limit=None, gpt_model="docsgpt"):
def validate_function_name(function_name):
"""Validates if a function name matches the allowed pattern."""
"""Validate function name matches allowed pattern (alphanumeric, underscore, hyphen)."""
if not re.match(r"^[a-zA-Z0-9_-]+$", function_name):
return False
return True
def generate_image_url(image_path):
if isinstance(image_path, str) and (
image_path.startswith("http://") or image_path.startswith("https://")
):
return image_path
strategy = getattr(settings, "URL_STRATEGY", "backend")
if strategy == "s3":
bucket_name = getattr(settings, "S3_BUCKET_NAME", "docsgpt-test-bucket")
@@ -176,3 +198,44 @@ def generate_image_url(image_path):
else:
base_url = getattr(settings, "API_URL", "http://localhost:7091")
return f"{base_url}/api/images/{image_path}"
def clean_text_for_tts(text: str) -> str:
"""
clean text for Text-to-Speech processing.
"""
# Handle code blocks and links
text = re.sub(r'```mermaid[\s\S]*?```', ' flowchart, ', text) ## ```mermaid...```
text = re.sub(r'```[\s\S]*?```', ' code block, ', text) ## ```code```
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) ## [text](url)
text = re.sub(r'!\[([^\]]*)\]\([^\)]+\)', '', text) ## ![alt](url)
# Remove markdown formatting
text = re.sub(r'`([^`]+)`', r'\1', text) ## `code`
text = re.sub(r'\{([^}]*)\}', r' \1 ', text) ## {text}
text = re.sub(r'[{}]', ' ', text) ## unmatched {}
text = re.sub(r'\[([^\]]+)\]', r' \1 ', text) ## [text]
text = re.sub(r'[\[\]]', ' ', text) ## unmatched []
text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text) ## **bold** __bold__
text = re.sub(r'(\*|_)(.*?)\1', r'\2', text) ## *italic* _italic_
text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE) ## # headers
text = re.sub(r'^>\s+', '', text, flags=re.MULTILINE) ## > blockquotes
text = re.sub(r'^[\s]*[-\*\+]\s+', '', text, flags=re.MULTILINE) ## - * + lists
text = re.sub(r'^[\s]*\d+\.\s+', '', text, flags=re.MULTILINE) ## 1. numbered lists
text = re.sub(r'^[\*\-_]{3,}\s*$', '', text, flags=re.MULTILINE) ## --- *** ___ rules
text = re.sub(r'<[^>]*>', '', text) ## <html> tags
#Remove non-ASCII (emojis, special Unicode)
text = re.sub(r'[^\x20-\x7E\n\r\t]', '', text)
#Replace special sequences
text = re.sub(r'-->', ', ', text) ## -->
text = re.sub(r'<--', ', ', text) ## <--
text = re.sub(r'=>', ', ', text) ## =>
text = re.sub(r'::', ' ', text) ## ::
#Normalize whitespace
text = re.sub(r'\s+', ' ', text)
text = text.strip()
return text

View File

@@ -1,20 +1,43 @@
from abc import ABC, abstractmethod
import logging
import os
from sentence_transformers import SentenceTransformer
from abc import ABC, abstractmethod
from langchain_openai import OpenAIEmbeddings
from sentence_transformers import SentenceTransformer
from application.core.settings import settings
class EmbeddingsWrapper:
def __init__(self, model_name, *args, **kwargs):
self.model = SentenceTransformer(model_name, config_kwargs={'allow_dangerous_deserialization': True}, *args, **kwargs)
self.dimension = self.model.get_sentence_embedding_dimension()
logging.info(f"Initializing EmbeddingsWrapper with model: {model_name}")
try:
kwargs.setdefault("trust_remote_code", True)
self.model = SentenceTransformer(
model_name,
config_kwargs={"allow_dangerous_deserialization": True},
*args,
**kwargs,
)
if self.model is None or self.model._first_module() is None:
raise ValueError(
f"SentenceTransformer model failed to load properly for: {model_name}"
)
self.dimension = self.model.get_sentence_embedding_dimension()
logging.info(f"Successfully loaded model with dimension: {self.dimension}")
except Exception as e:
logging.error(
f"Failed to initialize SentenceTransformer with model {model_name}: {str(e)}",
exc_info=True,
)
raise
def embed_query(self, query: str):
return self.model.encode(query).tolist()
def embed_documents(self, documents: list):
return self.model.encode(documents).tolist()
def __call__(self, text):
if isinstance(text, str):
return self.embed_query(text)
@@ -24,15 +47,14 @@ class EmbeddingsWrapper:
raise ValueError("Input must be a string or a list of strings")
class EmbeddingsSingleton:
_instances = {}
@staticmethod
def get_instance(embeddings_name, *args, **kwargs):
if embeddings_name not in EmbeddingsSingleton._instances:
EmbeddingsSingleton._instances[embeddings_name] = EmbeddingsSingleton._create_instance(
embeddings_name, *args, **kwargs
EmbeddingsSingleton._instances[embeddings_name] = (
EmbeddingsSingleton._create_instance(embeddings_name, *args, **kwargs)
)
return EmbeddingsSingleton._instances[embeddings_name]
@@ -40,9 +62,15 @@ class EmbeddingsSingleton:
def _create_instance(embeddings_name, *args, **kwargs):
embeddings_factory = {
"openai_text-embedding-ada-002": OpenAIEmbeddings,
"huggingface_sentence-transformers/all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
"huggingface_sentence-transformers-all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
"huggingface_hkunlp/instructor-large": lambda: EmbeddingsWrapper("hkunlp/instructor-large"),
"huggingface_sentence-transformers/all-mpnet-base-v2": lambda: EmbeddingsWrapper(
"sentence-transformers/all-mpnet-base-v2"
),
"huggingface_sentence-transformers-all-mpnet-base-v2": lambda: EmbeddingsWrapper(
"sentence-transformers/all-mpnet-base-v2"
),
"huggingface_hkunlp/instructor-large": lambda: EmbeddingsWrapper(
"hkunlp/instructor-large"
),
}
if embeddings_name in embeddings_factory:
@@ -50,41 +78,83 @@ class EmbeddingsSingleton:
else:
return EmbeddingsWrapper(embeddings_name, *args, **kwargs)
class BaseVectorStore(ABC):
def __init__(self):
pass
@abstractmethod
def search(self, *args, **kwargs):
"""Search for similar documents/chunks in the vectorstore"""
pass
@abstractmethod
def add_texts(self, texts, metadatas=None, *args, **kwargs):
"""Add texts with their embeddings to the vectorstore"""
pass
def delete_index(self, *args, **kwargs):
"""Delete the entire index/collection"""
pass
def save_local(self, *args, **kwargs):
"""Save vectorstore to local storage"""
pass
def get_chunks(self, *args, **kwargs):
"""Get all chunks from the vectorstore"""
pass
def add_chunk(self, text, metadata=None, *args, **kwargs):
"""Add a single chunk to the vectorstore"""
pass
def delete_chunk(self, chunk_id, *args, **kwargs):
"""Delete a specific chunk from the vectorstore"""
pass
def is_azure_configured(self):
return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
return (
settings.OPENAI_API_BASE
and settings.OPENAI_API_VERSION
and settings.AZURE_DEPLOYMENT_NAME
)
def _get_embeddings(self, embeddings_name, embeddings_key=None):
if embeddings_name == "openai_text-embedding-ada-002":
if self.is_azure_configured():
os.environ["OPENAI_API_TYPE"] = "azure"
embedding_instance = EmbeddingsSingleton.get_instance(
embeddings_name,
model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME
embeddings_name, model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME
)
else:
embedding_instance = EmbeddingsSingleton.get_instance(
embeddings_name,
openai_api_key=embeddings_key
embeddings_name, openai_api_key=embeddings_key
)
elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2":
if os.path.exists("./models/all-mpnet-base-v2"):
possible_paths = [
"/app/models/all-mpnet-base-v2", # Docker absolute path
"./models/all-mpnet-base-v2", # Relative path
]
local_model_path = None
for path in possible_paths:
if os.path.exists(path):
local_model_path = path
logging.info(f"Found local model at path: {path}")
break
else:
logging.info(f"Path does not exist: {path}")
if local_model_path:
embedding_instance = EmbeddingsSingleton.get_instance(
embeddings_name = "./models/all-mpnet-base-v2",
local_model_path,
)
else:
logging.warning(
f"Local model not found in any of the paths: {possible_paths}. Falling back to HuggingFace download."
)
embedding_instance = EmbeddingsSingleton.get_instance(
embeddings_name,
)
else:
embedding_instance = EmbeddingsSingleton.get_instance(embeddings_name)
return embedding_instance

View File

@@ -19,6 +19,7 @@ from bson.objectid import ObjectId
from application.agents.agent_creator import AgentCreator
from application.api.answer.services.stream_processor import get_prompt
from application.cache import get_redis_instance
from application.core.mongo_db import MongoDB
from application.core.settings import settings
from application.parser.chunking import Chunker
@@ -38,6 +39,7 @@ sources_collection = db["sources"]
# Constants
MIN_TOKENS = 150
MAX_TOKENS = 1250
RECURSION_DEPTH = 2
@@ -214,8 +216,7 @@ def run_agent_logic(agent_config, input_data):
def ingest_worker(
self, directory, formats, job_name, file_path, filename, user,
retriever="classic"
self, directory, formats, job_name, file_path, filename, user, retriever="classic"
):
"""
Ingest and process documents.
@@ -240,7 +241,7 @@ def ingest_worker(
sample = False
storage = StorageCreator.get_storage()
logging.info(f"Ingest path: {file_path}", extra={"user": user, "job": job_name})
# Create temporary working directory
@@ -253,30 +254,32 @@ def ingest_worker(
# Handle directory case
logging.info(f"Processing directory: {file_path}")
files_list = storage.list_files(file_path)
for storage_file_path in files_list:
if storage.is_directory(storage_file_path):
continue
# Create relative path structure in temp directory
rel_path = os.path.relpath(storage_file_path, file_path)
local_file_path = os.path.join(temp_dir, rel_path)
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
# Download file
try:
file_data = storage.get_file(storage_file_path)
with open(local_file_path, "wb") as f:
f.write(file_data.read())
except Exception as e:
logging.error(f"Error downloading file {storage_file_path}: {e}")
logging.error(
f"Error downloading file {storage_file_path}: {e}"
)
continue
else:
# Handle single file case
temp_filename = os.path.basename(file_path)
temp_file_path = os.path.join(temp_dir, temp_filename)
file_data = storage.get_file(file_path)
with open(temp_file_path, "wb") as f:
f.write(file_data.read())
@@ -285,7 +288,10 @@ def ingest_worker(
if temp_filename.endswith(".zip"):
logging.info(f"Extracting zip file: {temp_filename}")
extract_zip_recursive(
temp_file_path, temp_dir, current_depth=0, max_depth=RECURSION_DEPTH
temp_file_path,
temp_dir,
current_depth=0,
max_depth=RECURSION_DEPTH,
)
self.update_state(state="PROGRESS", meta={"current": 1})
@@ -300,8 +306,8 @@ def ingest_worker(
file_metadata=metadata_from_filename,
)
raw_docs = reader.load_data()
directory_structure = getattr(reader, 'directory_structure', {})
directory_structure = getattr(reader, "directory_structure", {})
logging.info(f"Directory structure from reader: {directory_structure}")
chunker = Chunker(
@@ -371,7 +377,10 @@ def reingest_source_worker(self, source_id, user):
try:
from application.vectorstore.vector_creator import VectorCreator
self.update_state(state="PROGRESS", meta={"current": 10, "status": "Initializing re-ingestion scan"})
self.update_state(
state="PROGRESS",
meta={"current": 10, "status": "Initializing re-ingestion scan"},
)
source = sources_collection.find_one({"_id": ObjectId(source_id), "user": user})
if not source:
@@ -380,7 +389,9 @@ def reingest_source_worker(self, source_id, user):
storage = StorageCreator.get_storage()
source_file_path = source.get("file_path", "")
self.update_state(state="PROGRESS", meta={"current": 20, "status": "Scanning current files"})
self.update_state(
state="PROGRESS", meta={"current": 20, "status": "Scanning current files"}
)
with tempfile.TemporaryDirectory() as temp_dir:
# Download all files from storage to temp directory, preserving directory structure
@@ -391,7 +402,6 @@ def reingest_source_worker(self, source_id, user):
if storage.is_directory(storage_file_path):
continue
rel_path = os.path.relpath(storage_file_path, source_file_path)
local_file_path = os.path.join(temp_dir, rel_path)
@@ -403,23 +413,39 @@ def reingest_source_worker(self, source_id, user):
with open(local_file_path, "wb") as f:
f.write(file_data.read())
except Exception as e:
logging.error(f"Error downloading file {storage_file_path}: {e}")
logging.error(
f"Error downloading file {storage_file_path}: {e}"
)
continue
reader = SimpleDirectoryReader(
input_dir=temp_dir,
recursive=True,
required_exts=[
".rst", ".md", ".pdf", ".txt", ".docx", ".csv", ".epub",
".html", ".mdx", ".json", ".xlsx", ".pptx", ".png",
".jpg", ".jpeg",
".rst",
".md",
".pdf",
".txt",
".docx",
".csv",
".epub",
".html",
".mdx",
".json",
".xlsx",
".pptx",
".png",
".jpg",
".jpeg",
],
exclude_hidden=True,
file_metadata=metadata_from_filename,
)
reader.load_data()
directory_structure = reader.directory_structure
logging.info(f"Directory structure built with token counts: {directory_structure}")
logging.info(
f"Directory structure built with token counts: {directory_structure}"
)
try:
old_directory_structure = source.get("directory_structure") or {}
@@ -433,11 +459,17 @@ def reingest_source_worker(self, source_id, user):
files = set()
if isinstance(struct, dict):
for name, meta in struct.items():
current_path = os.path.join(prefix, name) if prefix else name
if isinstance(meta, dict) and ("type" in meta and "size_bytes" in meta):
current_path = (
os.path.join(prefix, name) if prefix else name
)
if isinstance(meta, dict) and (
"type" in meta and "size_bytes" in meta
):
files.add(current_path)
elif isinstance(meta, dict):
files |= _flatten_directory_structure(meta, current_path)
files |= _flatten_directory_structure(
meta, current_path
)
return files
old_files = _flatten_directory_structure(old_directory_structure)
@@ -457,7 +489,9 @@ def reingest_source_worker(self, source_id, user):
logging.info("No files removed since last ingest.")
except Exception as e:
logging.error(f"Error comparing directory structures: {e}", exc_info=True)
logging.error(
f"Error comparing directory structures: {e}", exc_info=True
)
added_files = []
removed_files = []
try:
@@ -477,14 +511,21 @@ def reingest_source_worker(self, source_id, user):
settings.EMBEDDINGS_KEY,
)
self.update_state(state="PROGRESS", meta={"current": 40, "status": "Processing file changes"})
self.update_state(
state="PROGRESS",
meta={"current": 40, "status": "Processing file changes"},
)
# 1) Delete chunks from removed files
deleted = 0
if removed_files:
try:
for ch in vector_store.get_chunks() or []:
metadata = ch.get("metadata", {}) if isinstance(ch, dict) else getattr(ch, "metadata", {})
metadata = (
ch.get("metadata", {})
if isinstance(ch, dict)
else getattr(ch, "metadata", {})
)
raw_source = metadata.get("source")
source_file = str(raw_source) if raw_source else ""
@@ -496,10 +537,17 @@ def reingest_source_worker(self, source_id, user):
vector_store.delete_chunk(cid)
deleted += 1
except Exception as de:
logging.error(f"Failed deleting chunk {cid}: {de}")
logging.info(f"Deleted {deleted} chunks from {len(removed_files)} removed files")
logging.error(
f"Failed deleting chunk {cid}: {de}"
)
logging.info(
f"Deleted {deleted} chunks from {len(removed_files)} removed files"
)
except Exception as e:
logging.error(f"Error during deletion of removed file chunks: {e}", exc_info=True)
logging.error(
f"Error during deletion of removed file chunks: {e}",
exc_info=True,
)
# 2) Add chunks from new files
added = 0
@@ -528,58 +576,86 @@ def reingest_source_worker(self, source_id, user):
)
chunked_new = chunker_new.chunk(documents=raw_docs_new)
for file_path, token_count in reader_new.file_token_counts.items():
for (
file_path,
token_count,
) in reader_new.file_token_counts.items():
try:
rel_path = os.path.relpath(file_path, start=temp_dir)
rel_path = os.path.relpath(
file_path, start=temp_dir
)
path_parts = rel_path.split(os.sep)
current_dir = directory_structure
for part in path_parts[:-1]:
if part in current_dir and isinstance(current_dir[part], dict):
if part in current_dir and isinstance(
current_dir[part], dict
):
current_dir = current_dir[part]
else:
break
filename = path_parts[-1]
if filename in current_dir and isinstance(current_dir[filename], dict):
current_dir[filename]["token_count"] = token_count
logging.info(f"Updated token count for {rel_path}: {token_count}")
if filename in current_dir and isinstance(
current_dir[filename], dict
):
current_dir[filename][
"token_count"
] = token_count
logging.info(
f"Updated token count for {rel_path}: {token_count}"
)
except Exception as e:
logging.warning(f"Could not update token count for {file_path}: {e}")
logging.warning(
f"Could not update token count for {file_path}: {e}"
)
for d in chunked_new:
meta = dict(d.extra_info or {})
try:
raw_src = meta.get("source")
if isinstance(raw_src, str) and os.path.isabs(raw_src):
meta["source"] = os.path.relpath(raw_src, start=temp_dir)
if isinstance(raw_src, str) and os.path.isabs(
raw_src
):
meta["source"] = os.path.relpath(
raw_src, start=temp_dir
)
except Exception:
pass
vector_store.add_chunk(d.text, metadata=meta)
added += 1
logging.info(f"Added {added} chunks from {len(added_files)} new files")
logging.info(
f"Added {added} chunks from {len(added_files)} new files"
)
except Exception as e:
logging.error(f"Error during ingestion of new files: {e}", exc_info=True)
logging.error(
f"Error during ingestion of new files: {e}", exc_info=True
)
# 3) Update source directory structure timestamp
try:
total_tokens = sum(reader.file_token_counts.values())
sources_collection.update_one(
{"_id": ObjectId(source_id)},
{
"$set": {
"directory_structure": directory_structure,
"date": datetime.datetime.now(),
"tokens": total_tokens
"tokens": total_tokens,
}
},
)
except Exception as e:
logging.error(f"Error updating directory_structure in DB: {e}", exc_info=True)
logging.error(
f"Error updating directory_structure in DB: {e}", exc_info=True
)
self.update_state(state="PROGRESS", meta={"current": 100, "status": "Re-ingestion completed"})
self.update_state(
state="PROGRESS",
meta={"current": 100, "status": "Re-ingestion completed"},
)
return {
"source_id": source_id,
@@ -591,15 +667,16 @@ def reingest_source_worker(self, source_id, user):
"chunks_deleted": deleted,
}
except Exception as e:
logging.error(f"Error while processing file changes: {e}", exc_info=True)
logging.error(
f"Error while processing file changes: {e}", exc_info=True
)
raise
except Exception as e:
logging.error(f"Error in reingest_source_worker: {e}", exc_info=True)
raise
def remote_worker(
self,
source_data,
@@ -651,7 +728,7 @@ def remote_worker(
"id": str(id),
"type": loader,
"remote_data": source_data,
"sync_frequency": sync_frequency
"sync_frequency": sync_frequency,
}
if operation_mode == "sync":
@@ -664,7 +741,13 @@ def remote_worker(
if os.path.exists(full_path):
shutil.rmtree(full_path)
logging.info("remote_worker task completed successfully")
return {"urls": source_data, "name_job": name_job, "user": user, "limited": False}
return {
"id": str(id),
"urls": source_data,
"name_job": name_job,
"user": user,
"limited": False,
}
def sync(
@@ -712,7 +795,7 @@ def sync_worker(self, frequency):
self, source_data, name, user, source_type, frequency, retriever, doc_id
)
sync_counts["total_sync_count"] += 1
sync_counts[
sync_counts[
"sync_success" if resp["status"] == "success" else "sync_failure"
] += 1
return {
@@ -749,15 +832,14 @@ def attachment_worker(self, file_info, user):
input_files=[local_path], exclude_hidden=True, errors="ignore"
)
.load_data()[0]
.text,
.text,
)
token_count = num_tokens_from_string(content)
if token_count > 100000:
content = content[:250000]
token_count = num_tokens_from_string(content)
self.update_state(
state="PROGRESS", meta={"current": 80, "status": "Storing in database"}
)
@@ -872,37 +954,49 @@ def ingest_connector(
doc_id: Document ID for sync operations (required when operation_mode="sync")
sync_frequency: How often to sync ("never", "daily", "weekly", "monthly")
"""
logging.info(f"Starting remote ingestion from {source_type} for user: {user}, job: {job_name}")
logging.info(
f"Starting remote ingestion from {source_type} for user: {user}, job: {job_name}"
)
self.update_state(state="PROGRESS", meta={"current": 1})
with tempfile.TemporaryDirectory() as temp_dir:
try:
# Step 1: Initialize the appropriate loader
self.update_state(state="PROGRESS", meta={"current": 10, "status": "Initializing connector"})
self.update_state(
state="PROGRESS",
meta={"current": 10, "status": "Initializing connector"},
)
if not session_token:
raise ValueError(f"{source_type} connector requires session_token")
if not ConnectorCreator.is_supported(source_type):
raise ValueError(f"Unsupported connector type: {source_type}. Supported types: {ConnectorCreator.get_supported_connectors()}")
raise ValueError(
f"Unsupported connector type: {source_type}. Supported types: {ConnectorCreator.get_supported_connectors()}"
)
remote_loader = ConnectorCreator.create_connector(source_type, session_token)
remote_loader = ConnectorCreator.create_connector(
source_type, session_token
)
# Create a clean config for storage
api_source_config = {
"file_ids": file_ids or [],
"folder_ids": folder_ids or [],
"recursive": recursive
"recursive": recursive,
}
# Step 2: Download files to temp directory
self.update_state(state="PROGRESS", meta={"current": 20, "status": "Downloading files"})
download_info = remote_loader.download_to_directory(
temp_dir,
api_source_config
self.update_state(
state="PROGRESS", meta={"current": 20, "status": "Downloading files"}
)
if download_info.get("empty_result", False) or not download_info.get("files_downloaded", 0):
download_info = remote_loader.download_to_directory(
temp_dir, api_source_config
)
if download_info.get("empty_result", False) or not download_info.get(
"files_downloaded", 0
):
logging.warning(f"No files were downloaded from {source_type}")
# Create empty result directly instead of calling a separate method
return {
@@ -913,28 +1007,42 @@ def ingest_connector(
"source_config": api_source_config,
"directory_structure": "{}",
}
# Step 3: Use SimpleDirectoryReader to process downloaded files
self.update_state(state="PROGRESS", meta={"current": 40, "status": "Processing files"})
self.update_state(
state="PROGRESS", meta={"current": 40, "status": "Processing files"}
)
reader = SimpleDirectoryReader(
input_dir=temp_dir,
recursive=True,
required_exts=[
".rst", ".md", ".pdf", ".txt", ".docx", ".csv", ".epub",
".html", ".mdx", ".json", ".xlsx", ".pptx", ".png",
".jpg", ".jpeg",
".rst",
".md",
".pdf",
".txt",
".docx",
".csv",
".epub",
".html",
".mdx",
".json",
".xlsx",
".pptx",
".png",
".jpg",
".jpeg",
],
exclude_hidden=True,
file_metadata=metadata_from_filename,
)
raw_docs = reader.load_data()
directory_structure = getattr(reader, 'directory_structure', {})
directory_structure = getattr(reader, "directory_structure", {})
# Step 4: Process documents (chunking, embedding, etc.)
self.update_state(state="PROGRESS", meta={"current": 60, "status": "Processing documents"})
self.update_state(
state="PROGRESS", meta={"current": 60, "status": "Processing documents"}
)
chunker = Chunker(
chunking_strategy="classic_chunk",
max_tokens=MAX_TOKENS,
@@ -942,22 +1050,26 @@ def ingest_connector(
duplicate_headers=False,
)
raw_docs = chunker.chunk(documents=raw_docs)
# Preserve source information in document metadata
for doc in raw_docs:
if hasattr(doc, 'extra_info') and doc.extra_info:
source = doc.extra_info.get('source')
if hasattr(doc, "extra_info") and doc.extra_info:
source = doc.extra_info.get("source")
if source and os.path.isabs(source):
# Convert absolute path to relative path
doc.extra_info['source'] = os.path.relpath(source, start=temp_dir)
doc.extra_info["source"] = os.path.relpath(
source, start=temp_dir
)
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
if operation_mode == "upload":
id = ObjectId()
elif operation_mode == "sync":
if not doc_id or not ObjectId.is_valid(doc_id):
logging.error("Invalid doc_id provided for sync operation: %s", doc_id)
logging.error(
"Invalid doc_id provided for sync operation: %s", doc_id
)
raise ValueError("doc_id must be provided for sync operation.")
id = ObjectId(doc_id)
else:
@@ -966,7 +1078,9 @@ def ingest_connector(
vector_store_path = os.path.join(temp_dir, "vector_store")
os.makedirs(vector_store_path, exist_ok=True)
self.update_state(state="PROGRESS", meta={"current": 80, "status": "Storing documents"})
self.update_state(
state="PROGRESS", meta={"current": 80, "status": "Storing documents"}
)
embed_and_store_documents(docs, vector_store_path, id, self)
tokens = count_tokens_docs(docs)
@@ -978,13 +1092,12 @@ def ingest_connector(
"tokens": tokens,
"retriever": retriever,
"id": str(id),
"type": "connector",
"remote_data": json.dumps({
"provider": source_type,
**api_source_config
}),
"type": "connector:file",
"remote_data": json.dumps(
{"provider": source_type, **api_source_config}
),
"directory_structure": json.dumps(directory_structure),
"sync_frequency": sync_frequency
"sync_frequency": sync_frequency,
}
if operation_mode == "sync":
@@ -995,7 +1108,9 @@ def ingest_connector(
upload_index(vector_store_path, file_data)
# Ensure we mark the task as complete
self.update_state(state="PROGRESS", meta={"current": 100, "status": "Complete"})
self.update_state(
state="PROGRESS", meta={"current": 100, "status": "Complete"}
)
logging.info(f"Remote ingestion completed: {job_name}")
@@ -1005,9 +1120,136 @@ def ingest_connector(
"tokens": tokens,
"type": source_type,
"id": str(id),
"status": "complete"
"status": "complete",
}
except Exception as e:
logging.error(f"Error during remote ingestion: {e}", exc_info=True)
raise
def mcp_oauth(self, config: Dict[str, Any], user_id: str = None) -> Dict[str, Any]:
"""Worker to handle MCP OAuth flow asynchronously."""
logging.info(
"[MCP OAuth] Worker started for user_id=%s, config=%s", user_id, config
)
try:
import asyncio
from application.agents.tools.mcp_tool import MCPTool
task_id = self.request.id
logging.info("[MCP OAuth] Task ID: %s", task_id)
redis_client = get_redis_instance()
def update_status(status_data: Dict[str, Any]):
logging.info("[MCP OAuth] Updating status: %s", status_data)
status_key = f"mcp_oauth_status:{task_id}"
redis_client.setex(status_key, 600, json.dumps(status_data))
update_status(
{
"status": "in_progress",
"message": "Starting OAuth flow...",
"task_id": task_id,
}
)
tool_config = config.copy()
tool_config["oauth_task_id"] = task_id
logging.info("[MCP OAuth] Initializing MCPTool with config: %s", tool_config)
mcp_tool = MCPTool(tool_config, user_id)
async def run_oauth_discovery():
if not mcp_tool._client:
mcp_tool._setup_client()
return await mcp_tool._execute_with_client("list_tools")
update_status(
{
"status": "awaiting_redirect",
"message": "Waiting for OAuth redirect...",
"task_id": task_id,
}
)
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
logging.info("[MCP OAuth] Starting event loop for OAuth discovery...")
tools_response = loop.run_until_complete(run_oauth_discovery())
logging.info(
"[MCP OAuth] Tools response after async call: %s", tools_response
)
status_key = f"mcp_oauth_status:{task_id}"
redis_status = redis_client.get(status_key)
if redis_status:
logging.info(
"[MCP OAuth] Redis status after async call: %s", redis_status
)
else:
logging.warning(
"[MCP OAuth] No Redis status found after async call for key: %s",
status_key,
)
tools = mcp_tool.get_actions_metadata()
update_status(
{
"status": "completed",
"message": f"OAuth completed successfully. Found {len(tools)} tools.",
"tools": tools,
"tools_count": len(tools),
"task_id": task_id,
}
)
logging.info(
"[MCP OAuth] OAuth flow completed successfully for task_id=%s", task_id
)
return {"success": True, "tools": tools, "tools_count": len(tools)}
except Exception as e:
error_msg = f"OAuth flow failed: {str(e)}"
logging.error(
"[MCP OAuth] Exception in OAuth discovery: %s", error_msg, exc_info=True
)
update_status(
{
"status": "error",
"message": error_msg,
"error": str(e),
"task_id": task_id,
}
)
return {"success": False, "error": error_msg}
finally:
logging.info("[MCP OAuth] Closing event loop for task_id=%s", task_id)
loop.close()
except Exception as e:
error_msg = f"Failed to initialize OAuth flow: {str(e)}"
logging.error(
"[MCP OAuth] Exception during initialization: %s", error_msg, exc_info=True
)
update_status(
{
"status": "error",
"message": error_msg,
"error": str(e),
"task_id": task_id,
}
)
return {"success": False, "error": error_msg}
def mcp_oauth_status(self, task_id: str) -> Dict[str, Any]:
"""Check the status of an MCP OAuth flow."""
redis_client = get_redis_instance()
status_key = f"mcp_oauth_status:{task_id}"
status_data = redis_client.get(status_key)
if status_data:
return json.loads(status_data)
return {"status": "not_found", "message": "Status not found"}

View File

@@ -6,6 +6,7 @@ services:
environment:
- VITE_API_HOST=http://localhost:7091
- VITE_API_STREAMING=$VITE_API_STREAMING
- VITE_GOOGLE_CLIENT_ID=$VITE_GOOGLE_CLIENT_ID
ports:
- "5173:5173"
depends_on:
@@ -71,4 +72,4 @@ services:
- mongodb_data_container:/data/db
volumes:
mongodb_data_container:
mongodb_data_container:

View File

@@ -7,6 +7,7 @@ services:
environment:
- VITE_API_HOST=http://localhost:7091
- VITE_API_STREAMING=$VITE_API_STREAMING
- VITE_GOOGLE_CLIENT_ID=$VITE_GOOGLE_CLIENT_ID
ports:
- "5173:5173"
depends_on:

View File

@@ -107,3 +107,13 @@ Once an agent is created, you can:
* Modify any of its configuration settings (name, description, source, prompt, tools, type).
* **Generate a Public Link:** From the edit screen, you can create a shareable public link that allows others to import and use your agent.
* **Get a Webhook URL:** You can also obtain a Webhook URL for the agent. This allows external applications or services to trigger the agent and receive responses programmatically, enabling powerful integrations and automations.
## Seeding Premade Agents from YAML
You can bootstrap a fresh DocsGPT deployment with a curated set of agents by seeding them directly into MongoDB.
1. **Customize the configuration** edit `application/seed/config/premade_agents.yaml` (or copy from `application/seed/config/agents_template.yaml`) to describe the agents you want to provision. Each entry lets you define prompts, tools, and optional data sources.
2. **Ensure dependencies are running** MongoDB must be reachable using the credentials in `.env`, and a Celery worker should be available if any agent sources need to be ingested via `ingest_remote`.
3. **Execute the seeder** run `python -m application.seed.commands init`. Add `--force` when you need to reseed an existing environment.
The seeder keeps templates under the `system` user so they appear in the UI for anyone to clone or customize. Environment variable placeholders such as `${MY_TOKEN}` inside tool configs are resolved during the seeding process.

View File

@@ -42,7 +42,7 @@ To run the DocsGPT backend locally, you'll need to set up a Python environment a
* **Option 1: Using a `.env` file (Recommended):**
* If you haven't already, create a file named `.env` in the **root directory** of your DocsGPT project.
* Modify the `.env` file to adjust settings as needed. You can find a comprehensive list of configurable options in [`application/core/settings.py`](application/core/settings.py).
* Modify the `.env` file to adjust settings as needed. You can find a comprehensive list of configurable options in [`application/core/settings.py`](https://github.com/arc53/DocsGPT/blob/main/application/core/settings.py).
* **Option 2: Exporting Environment Variables:**
* Alternatively, you can export environment variables directly in your terminal. However, using a `.env` file is generally more organized for development.
@@ -67,7 +67,7 @@ To run the DocsGPT backend locally, you'll need to set up a Python environment a
3. **Download Embedding Model:**
The backend requires an embedding model. Download the `mpnet-base-v2` model and place it in the `model/` directory within the project root. You can use the following script:
The backend requires an embedding model. Download the `mpnet-base-v2` model and place it in the `models/` directory within the project root. You can use the following script:
```bash
wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
@@ -75,7 +75,7 @@ To run the DocsGPT backend locally, you'll need to set up a Python environment a
rm mpnet-base-v2.zip
```
Alternatively, you can manually download the zip file from [here](https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip), unzip it, and place the extracted folder in `model/`.
Alternatively, you can manually download the zip file from [here](https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip), unzip it, and place the extracted folder in `models/`.
4. **Install Backend Dependencies:**
@@ -160,4 +160,4 @@ To run the DocsGPT frontend locally, you'll need Node.js and npm (Node Package M
This command will start the Vite development server. The frontend application will typically be accessible at [http://localhost:5173/](http://localhost:5173/). The terminal will display the exact URL where the frontend is running.
With both the backend and frontend running, you should now have a fully functional DocsGPT development environment. You can access the application in your browser at [http://localhost:5173/](http://localhost:5173/) and start developing!
With both the backend and frontend running, you should now have a fully functional DocsGPT development environment. You can access the application in your browser at [http://localhost:5173/](http://localhost:5173/) and start developing!

View File

@@ -1,49 +1,453 @@
---
title: Customizing Prompts
description: This guide will explain how to change prompts in DocsGPT and why it might be benefitial. Additionaly this article expains additional variables that can be used in prompts.
title: Customizing Prompts
description: This guide explains how to customize prompts in DocsGPT using the new template-based system with dynamic variable injection.
---
import Image from 'next/image'
# Customizing the Main Prompt
# Customizing Prompts in DocsGPT
Customizing the main prompt for DocsGPT gives you the ability to tailor the AI's responses to your specific requirements. By modifying the prompt text, you can achieve more accurate and relevant answers. Here's how you can do it:
Customizing prompts for DocsGPT gives you powerful control over the AI's behavior and responses. With the new template-based system, you can inject dynamic context through organized namespaces, making prompts flexible and maintainable without hardcoding values.
## Quick Start
1. Navigate to `SideBar -> Settings`.
2.In Settings select the `Active Prompt` now you will be able to see various prompts style.x
3.Click on the `edit icon` on the prompt of your choice and you will be able to see the current prompt for it,you can now customise the prompt as per your choice.
2. In Settings, select the `Active Prompt` to see various prompt styles.
3. Click on the `edit icon` on your chosen prompt to customize it.
### Video Demo
<Image src="/prompts.gif" alt="prompts" width={800} height={500} />
---
## Template-Based Prompt System
## Example Prompt Modification
DocsGPT now uses **Jinja2 templating** with four organized namespaces for dynamic variable injection:
### Available Namespaces
#### 1. **`system`** - System Metadata
Access system-level information:
```jinja
{{ system.date }} # Current date (YYYY-MM-DD)
{{ system.time }} # Current time (HH:MM:SS)
{{ system.timestamp }} # ISO 8601 timestamp
{{ system.request_id }} # Unique request identifier
{{ system.user_id }} # Current user ID
```
#### 2. **`source`** - Retrieved Documents
Access RAG (Retrieval-Augmented Generation) document context:
```jinja
{{ source.content }} # Concatenated document content
{{ source.summaries }} # Alias for content (backward compatible)
{{ source.documents }} # List of document objects
{{ source.count }} # Number of retrieved documents
```
#### 3. **`passthrough`** - Request Parameters
Access custom parameters passed in the API request:
```jinja
{{ passthrough.company }} # Custom field from request
{{ passthrough.user_name }} # User-provided data
{{ passthrough.context }} # Any custom parameter
```
To use passthrough data, send it in your API request:
```json
{
"question": "What is the pricing?",
"passthrough": {
"company": "Acme Corp",
"user_name": "Alice",
"plan_type": "enterprise"
}
}
```
#### 4. **`tools`** - Pre-fetched Tool Data
Access results from tools that run before the agent (like memory tool):
```jinja
{{ tools.memory.root }} # Memory tool directory listing
{{ tools.memory.available }} # Boolean: is memory available
```
---
## Example Prompts
### Basic Prompt with Documents
```jinja
You are a helpful AI assistant for DocsGPT.
Current date: {{ system.date }}
Use the following documents to answer the question:
{{ source.content }}
Provide accurate, helpful answers with code examples when relevant.
```
### Advanced Prompt with All Namespaces
```jinja
You are an AI assistant for {{ passthrough.company }}.
**System Info:**
- Date: {{ system.date }}
- Request ID: {{ system.request_id }}
**User Context:**
- User: {{ passthrough.user_name }}
- Role: {{ passthrough.role }}
**Available Documents ({{ source.count }}):**
{{ source.content }}
**Memory Context:**
{% if tools.memory.available %}
{{ tools.memory.root }}
{% else %}
No saved context available.
{% endif %}
Please provide detailed, accurate answers based on the documents above.
```
### Conditional Logic Example
```jinja
You are a DocsGPT assistant.
{% if source.count > 0 %}
I found {{ source.count }} relevant document(s):
{{ source.content }}
Base your answer on these documents.
{% else %}
No documents were found. Please answer based on your general knowledge.
{% endif %}
```
---
## Migration Guide
### Legacy Format (Still Supported)
The old `{summaries}` format continues to work for backward compatibility:
**Original Prompt:**
```markdown
You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible.
Use the following pieces of context to help answer the users question. If it's not relevant to the question, provide friendly responses.
You have access to chat history, and can use it to help answer the question.
When using code examples, use the following format:
You are a helpful assistant.
(code)
Documents:
{summaries}
```
Note that `{summaries}` allows model to see and respond to your upploaded documents. If you don't want this functionality you can safely remove it from the customized prompt.
This will automatically substitute `{summaries}` with document content.
Feel free to customize the prompt to align it with your specific use case or the kind of responses you want from the AI. For example, you can focus on specific document types, industries, or topics to get more targeted results.
### New Template Format (Recommended)
Migrate to the new template syntax for more flexibility:
```jinja
You are a helpful assistant.
Documents:
{{ source.content }}
```
**Migration mapping:**
- `{summaries}` → `{{ source.content }}` or `{{ source.summaries }}`
---
## Best Practices
### 1. **Use Descriptive Context**
```jinja
**Retrieved Documents:**
{{ source.content }}
**User Query Context:**
- Company: {{ passthrough.company }}
- Department: {{ passthrough.department }}
```
### 2. **Handle Missing Data Gracefully**
```jinja
{% if passthrough.user_name %}
Hello {{ passthrough.user_name }}!
{% endif %}
```
### 3. **Leverage Memory for Continuity**
```jinja
{% if tools.memory.available %}
**Previous Context:**
{{ tools.memory.root }}
{% endif %}
**Current Question:**
Please consider the above context when answering.
```
### 4. **Add Clear Instructions**
```jinja
You are a technical support assistant.
**Guidelines:**
1. Always reference the documents below
2. Provide step-by-step instructions
3. Include code examples when relevant
**Reference Documents:**
{{ source.content }}
```
---
## Advanced Features
### Looping Over Documents
```jinja
{% for doc in source.documents %}
**Source {{ loop.index }}:** {{ doc.filename }}
{{ doc.text }}
{% endfor %}
```
### Date-Based Behavior
```jinja
{% if system.date > "2025-01-01" %}
Note: This is information from 2025 or later.
{% endif %}
```
### Custom Formatting
```jinja
**Request Information**
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
• Request ID: {{ system.request_id }}
• User: {{ passthrough.user_name | default("Guest") }}
• Time: {{ system.time }}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
```
---
## Tool Pre-Fetching
### Memory Tool Configuration
Enable memory tool pre-fetching to inject saved context into prompts:
```python
# In your tool configuration
{
"name": "memory",
"config": {
"pre_fetch_enabled": true # Default: true
}
}
```
Control pre-fetching globally:
```bash
# .env file
ENABLE_TOOL_PREFETCH=true
```
Or per-request:
```json
{
"question": "What are the requirements?",
"disable_tool_prefetch": false
}
```
---
## Debugging Prompts
### View Rendered Prompts in Logs
Set log level to `INFO` to see the final rendered prompt sent to the LLM:
```bash
export LOG_LEVEL=INFO
```
You'll see output like:
```
INFO - Rendered system prompt for agent (length: 1234 chars):
================================================================================
You are a helpful assistant for Acme Corp.
Current date: 2025-10-30
Request ID: req_abc123
Documents:
Technical documentation about...
================================================================================
```
### Template Validation
Test your template syntax before saving:
```python
from application.api.answer.services.prompt_renderer import PromptRenderer
renderer = PromptRenderer()
is_valid = renderer.validate_template("Your prompt with {{ variables }}")
```
---
## Common Use Cases
### 1. Customer Support Bot
```jinja
You are a customer support assistant for {{ passthrough.company }}.
**Customer:** {{ passthrough.customer_name }}
**Ticket ID:** {{ system.request_id }}
**Date:** {{ system.date }}
**Knowledge Base:**
{{ source.content }}
**Previous Interactions:**
{{ tools.memory.root }}
Please provide helpful, friendly support based on the knowledge base above.
```
### 2. Technical Documentation Assistant
```jinja
You are a technical documentation expert.
**Available Documentation ({{ source.count }} documents):**
{{ source.content }}
**Requirements:**
- Provide code examples in {{ passthrough.language }}
- Focus on {{ passthrough.framework }} best practices
- Include relevant links when possible
```
### 3. Internal Knowledge Base
```jinja
You are an internal AI assistant for {{ passthrough.department }}.
**Employee:** {{ passthrough.employee_name }}
**Access Level:** {{ passthrough.access_level }}
**Relevant Documents:**
{{ source.content }}
Provide detailed answers appropriate for {{ passthrough.access_level }} access level.
```
---
## Template Syntax Reference
### Variables
```jinja
{{ variable_name }} # Output variable
{{ namespace.field }} # Access nested field
{{ variable | default("N/A") }} # Default value
```
### Conditionals
```jinja
{% if condition %}
Content
{% elif other_condition %}
Other content
{% else %}
Default content
{% endif %}
```
### Loops
```jinja
{% for item in list %}
{{ item.field }}
{% endfor %}
```
### Comments
```jinja
{# This is a comment and won't appear in output #}
```
---
## Security Considerations
1. **Input Sanitization**: Passthrough data is automatically sanitized to prevent injection attacks
2. **Type Filtering**: Only primitive types (string, int, float, bool, None) are allowed in passthrough
3. **Autoescaping**: Jinja2 autoescaping is enabled by default
4. **Size Limits**: Consider the token budget when including large documents
---
## Troubleshooting
### Problem: Variables Not Rendering
**Solution:** Ensure you're using the correct namespace:
```jinja
❌ {{ company }}
✅ {{ passthrough.company }}
```
### Problem: Empty Output for Tool Data
**Solution:** Check that tool pre-fetching is enabled and the tool is configured correctly.
### Problem: Syntax Errors
**Solution:** Validate template syntax. Common issues:
```jinja
❌ {{ variable } # Missing closing brace
❌ {% if x % # Missing closing %}
✅ {{ variable }}
✅ {% if x %}...{% endif %}
```
### Problem: Legacy Prompts Not Working
**Solution:** The system auto-detects template syntax. If your prompt uses `{summaries}`, it will work in legacy mode. To use new features, add `{{ }}` syntax.
---
## API Reference
### Render Prompt via API
```python
from application.api.answer.services.prompt_renderer import PromptRenderer
renderer = PromptRenderer()
rendered = renderer.render_prompt(
prompt_content="Your template with {{ passthrough.name }}",
user_id="user_123",
request_id="req_456",
passthrough_data={"name": "Alice"},
docs_together="Document content here",
tools_data={"memory": {"root": "Files: notes.txt"}}
)
```
---
## Conclusion
Customizing the main prompt for DocsGPT allows you to tailor the AI's responses to your unique requirements. Whether you need in-depth explanations, code examples, or specific insights, you can achieve it by modifying the main prompt. Remember to experiment and fine-tune your prompts to get the best results.
The new template-based prompt system provides powerful flexibility while maintaining backward compatibility. By leveraging namespaces, you can create dynamic, context-aware prompts that adapt to your specific use case.
**Key Benefits:**
- ✅ Dynamic variable injection
- ✅ Organized namespaces
- ✅ Backward compatible
- ✅ Security built-in
- ✅ Easy to debug
Start with simple templates and gradually add complexity as needed. Happy prompting! 🚀

View File

@@ -0,0 +1,6 @@
{
"google-drive-connector": {
"title": "🔗 Google Drive",
"href": "/Guides/Integrations/google-drive-connector"
}
}

View File

@@ -0,0 +1,212 @@
---
title: Google Drive Connector
description: Connect your Google Drive as an external knowledge base to upload and process files directly from your Google Drive account.
---
import { Callout } from 'nextra/components'
import { Steps } from 'nextra/components'
# Google Drive Connector
The Google Drive Connector allows you to seamlessly connect your Google Drive account as an external knowledge base. This integration enables you to upload and process files directly from your Google Drive without manually downloading and uploading them to DocsGPT.
## Features
- **Direct File Access**: Browse and select files directly from your Google Drive
- **Comprehensive File Support**: Supports all major document formats including:
- Google Workspace files (Docs, Sheets, Slides)
- Microsoft Office files (.docx, .xlsx, .pptx, .doc, .ppt, .xls)
- PDF documents
- Text files (.txt, .md, .rst, .html, .rtf)
- Data files (.csv, .json)
- Image files (.png, .jpg, .jpeg)
- E-books (.epub)
- **Secure Authentication**: Uses OAuth 2.0 for secure access to your Google Drive
- **Real-time Sync**: Process files directly from Google Drive without local downloads
<Callout type="info" emoji="">
The Google Drive Connector requires proper configuration of Google API credentials. Follow the setup instructions below to enable this feature.
</Callout>
## Prerequisites
Before setting up the Google Drive Connector, you'll need:
1. A Google Cloud Platform (GCP) project
2. Google Drive API enabled
3. OAuth 2.0 credentials configured
4. DocsGPT instance with proper environment variables
## Setup Instructions
<Steps>
### Step 1: Create a Google Cloud Project
1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
2. Create a new project or select an existing one
3. Note down your Project ID for later use
### Step 2: Enable Google Drive API
1. In the Google Cloud Console, navigate to **APIs & Services** > **Library**
2. Search for "Google Drive API"
3. Click on "Google Drive API" and click **Enable**
### Step 3: Create OAuth 2.0 Credentials
1. Go to **APIs & Services** > **Credentials**
2. Click **Create Credentials** > **OAuth client ID**
3. If prompted, configure the OAuth consent screen:
- Choose **External** user type (unless you're using Google Workspace)
- Fill in the required fields (App name, User support email, Developer contact)
- Add your domain to **Authorized domains** if deploying publicly
4. For Application type, select **Web application**
5. Add your DocsGPT frontend URL to **Authorized JavaScript origins**:
- For local development: `http://localhost:3000`
- For production: `https://yourdomain.com`
6. Add your DocsGPT callback URL to **Authorized redirect URIs**:
- For local development: `http://localhost:7091/api/connectors/callback?provider=google_drive`
- For production: `https://yourdomain.com/api/connectors/callback?provider=google_drive`
7. Click **Create** and note down the **Client ID** and **Client Secret**
### Step 4: Configure Backend Environment Variables
Add the following environment variables to your backend configuration:
**For Docker deployment**, add to your `.env` file in the root directory:
```env
# Google Drive Connector Configuration
GOOGLE_CLIENT_ID=your_google_client_id_here
GOOGLE_CLIENT_SECRET=your_google_client_secret_here
```
**For manual deployment**, set these environment variables in your system or application configuration.
### Step 5: Configure Frontend Environment Variables
Add the following environment variables to your frontend `.env` file:
```env
# Google Drive Frontend Configuration
VITE_GOOGLE_CLIENT_ID=your_google_client_id_here
```
<Callout type="warning" emoji="⚠️">
Make sure to use the same Google Client ID in both backend and frontend configurations.
</Callout>
### Step 6: Restart Your Application
After configuring the environment variables:
1. **For Docker**: Restart your Docker containers
```bash
docker-compose down
docker-compose up -d
```
2. **For manual deployment**: Restart both backend and frontend services
</Steps>
## Using the Google Drive Connector
Once configured, you can use the Google Drive Connector to upload files:
<Steps>
### Step 1: Access the Upload Interface
1. Navigate to the DocsGPT interface
2. Go to the upload/training section
3. You should now see "Google Drive" as an available upload option
### Step 2: Connect Your Google Account
1. Select "Google Drive" as your upload method
2. Click "Connect to Google Drive"
3. You'll be redirected to Google's OAuth consent screen
4. Grant the necessary permissions to DocsGPT
5. You'll be redirected back to DocsGPT with a successful connection
### Step 3: Select Files
1. Once connected, click "Select Files"
2. The Google Drive picker will open
3. Browse your Google Drive and select the files you want to process
4. Click "Select" to confirm your choices
### Step 4: Process Files
1. Review your selected files
2. Click "Train" or "Upload" to process the files
3. DocsGPT will download and process the files from your Google Drive
4. Once processing is complete, the files will be available in your knowledge base
</Steps>
## Supported File Types
The Google Drive Connector supports the following file types:
| File Type | Extensions | Description |
|-----------|------------|-------------|
| **Google Workspace** | - | Google Docs, Sheets, Slides (automatically converted) |
| **Microsoft Office** | .docx, .xlsx, .pptx | Modern Office formats |
| **Legacy Office** | .doc, .ppt, .xls | Older Office formats |
| **PDF Documents** | .pdf | Portable Document Format |
| **Text Files** | .txt, .md, .rst, .html, .rtf | Various text formats |
| **Data Files** | .csv, .json | Structured data formats |
| **Images** | .png, .jpg, .jpeg | Image files (with OCR if enabled) |
| **E-books** | .epub | Electronic publication format |
## Troubleshooting
### Common Issues
**"Google Drive option not appearing"**
- Verify that `VITE_GOOGLE_CLIENT_ID` is set in frontend environment
- Check that `VITE_GOOGLE_CLIENT_ID` environment variable is present in your frontend configuration
- Check browser console for any JavaScript errors
- Ensure the frontend has been restarted after adding environment variables
**"Authentication failed"**
- Verify that your OAuth 2.0 credentials are correctly configured
- Check that the redirect URI `http://<your-domain>/api/connectors/callback?provider=google_drive` is correctly added in GCP console
- Ensure the Google Drive API is enabled in your GCP project
**"Permission denied" errors**
- Verify that the OAuth consent screen is properly configured
- Check that your Google account has access to the files you're trying to select
- Ensure the required scopes are granted during authentication
**"Files not processing"**
- Check that the backend environment variables are correctly set
- Verify that the OAuth credentials have the necessary permissions
- Check the backend logs for any error messages
### Environment Variable Checklist
**Backend (.env in root directory):**
- ✅ `GOOGLE_CLIENT_ID`
- ✅ `GOOGLE_CLIENT_SECRET`
**Frontend (.env in frontend directory):**
- ✅ `VITE_GOOGLE_CLIENT_ID`
### Security Considerations
- Keep your Google Client Secret secure and never expose it in frontend code
- Regularly rotate your OAuth credentials
- Use HTTPS in production to protect authentication tokens
- Ensure proper OAuth consent screen configuration for production use
<Callout type="tip" emoji="💡">
For production deployments, make sure to add your actual domain to the OAuth consent screen and authorized origins/redirect URIs.
</Callout>

View File

@@ -20,5 +20,8 @@
"Architecture": {
"title": "🏗️ Architecture",
"href": "/Guides/Architecture"
},
"Integrations": {
"title": "🔗 Integrations"
}
}

View File

@@ -43,7 +43,8 @@ The easiest way to launch DocsGPT is using the provided `setup.sh` script. This
2) Serve Local (with Ollama)
3) Connect Local Inference Engine
4) Connect Cloud API Provider
Choose option (1-4):
5) Advanced: Build images locally (for developers)
Choose option (1-5):
```
Let's break down each option:
@@ -56,6 +57,8 @@ The easiest way to launch DocsGPT is using the provided `setup.sh` script. This
* **4) Connect Cloud API Provider:** This option lets you connect DocsGPT to a commercial Cloud API provider such as OpenAI, Google (Vertex AI/Gemini), Anthropic (Claude), Groq, HuggingFace Inference API, or Azure OpenAI. You will need an API key from your chosen provider. Select this if you prefer to use a powerful cloud-based LLM.
* **5) Modify DocsGPT's source code and rebuild the Docker images locally. Instead of pulling prebuilt images from Docker Hub or using the hosted/public API, you build the entire backend and frontend from source, customizing how DocsGPT works internally, or run it in an environment without internet access.
After selecting an option and providing any required information (like API keys or model names), the script will configure your `.env` file and start DocsGPT using Docker Compose.
4. **Access DocsGPT in your browser:**

View File

@@ -1,4 +1,6 @@
# Please put appropriate value
VITE_BASE_URL=http://localhost:5173
VITE_API_HOST=http://127.0.0.1:7091
VITE_API_STREAMING=true
VITE_API_STREAMING=true
VITE_NOTIFICATION_TEXT="What's new in 0.14.0 — Changelog"
VITE_NOTIFICATION_LINK="#"

File diff suppressed because it is too large Load Diff

View File

@@ -23,15 +23,16 @@
"chart.js": "^4.4.4",
"clsx": "^2.1.1",
"copy-to-clipboard": "^3.3.3",
"i18next": "^24.2.0",
"i18next-browser-languagedetector": "^8.0.2",
"i18next": "^25.5.3",
"i18next-browser-languagedetector": "^8.2.0",
"lodash": "^4.17.21",
"mermaid": "^11.6.0",
"mermaid": "^11.12.1",
"prop-types": "^15.8.1",
"react": "^19.1.0",
"react-chartjs-2": "^5.3.0",
"react-dom": "^19.0.0",
"react-dom": "^19.1.1",
"react-dropzone": "^14.3.8",
"react-google-drive-picker": "^1.2.2",
"react-i18next": "^15.4.0",
"react-markdown": "^9.0.1",
"react-redux": "^9.2.0",
@@ -47,17 +48,17 @@
"@types/lodash": "^4.17.20",
"@types/mermaid": "^9.1.0",
"@types/react": "^19.1.8",
"@types/react-dom": "^19.0.0",
"@types/react-dom": "^19.1.7",
"@types/react-syntax-highlighter": "^15.5.13",
"@typescript-eslint/eslint-plugin": "^5.51.0",
"@typescript-eslint/parser": "^5.62.0",
"@typescript-eslint/eslint-plugin": "^6.21.0",
"@typescript-eslint/parser": "^6.21.0",
"@vitejs/plugin-react": "^4.3.4",
"eslint": "^8.57.1",
"eslint-config-prettier": "^10.1.5",
"eslint-config-standard-with-typescript": "^34.0.0",
"eslint-config-standard-with-typescript": "^43.0.1",
"eslint-plugin-import": "^2.31.0",
"eslint-plugin-n": "^15.7.0",
"eslint-plugin-prettier": "^5.2.1",
"eslint-plugin-prettier": "^5.5.4",
"eslint-plugin-promise": "^6.6.0",
"eslint-plugin-react": "^7.37.5",
"eslint-plugin-unused-imports": "^4.1.4",

View File

@@ -0,0 +1,4 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="64" height="64" color="#000000" fill="none">
<path d="M3.49994 11.7501L11.6717 3.57855C12.7762 2.47398 14.5672 2.47398 15.6717 3.57855C16.7762 4.68312 16.7762 6.47398 15.6717 7.57855M15.6717 7.57855L9.49994 13.7501M15.6717 7.57855C16.7762 6.47398 18.5672 6.47398 19.6717 7.57855C20.7762 8.68312 20.7762 10.474 19.6717 11.5785L12.7072 18.543C12.3167 18.9335 12.3167 19.5667 12.7072 19.9572L13.9999 21.2499" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path>
<path d="M17.4999 9.74921L11.3282 15.921C10.2237 17.0255 8.43272 17.0255 7.32823 15.921C6.22373 14.8164 6.22373 13.0255 7.32823 11.921L13.4999 5.74939" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"></path>
</svg>

After

Width:  |  Height:  |  Size: 831 B

View File

@@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#e3e3e3">
<path d="M240-80q-33 0-56.5-23.5T160-160v-480q0-33 23.5-56.5T240-720h80v-80q0-17 11.5-28.5T360-840q17 0 28.5 11.5T400-800v80h40v-80q0-17 11.5-28.5T480-840q17 0 28.5 11.5T520-800v80h40v-80q0-17 11.5-28.5T600-840q17 0 28.5 11.5T640-800v80h80q33 0 56.5 23.5T800-640v480q0 33-23.5 56.5T720-80H240Zm0-80h480v-480H240v480Zm120-320v-80h240v80H360Zm0 120v-80h240v80H360Zm0 120v-80h160v80H360ZM240-160v-480 480Z"/>
</svg>

After

Width:  |  Height:  |  Size: 523 B

View File

@@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#e3e3e3"><path d="M320-240h320v-80H320v80Zm0-160h320v-80H320v80ZM240-80q-33 0-56.5-23.5T160-160v-640q0-33 23.5-56.5T240-880h320l240 240v480q0 33-23.5 56.5T720-80H240Zm280-520v-200H240v640h480v-440H520ZM240-800v200-200 640-640Z"/></svg>

After

Width:  |  Height:  |  Size: 334 B

View File

@@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#e3e3e3"><path d="M240-80q-33 0-56.5-23.5T160-160v-640q0-33 23.5-56.5T240-880h480q33 0 56.5 23.5T800-800v640q0 33-23.5 56.5T720-80H240Zm0-80h480v-640H240v640Zm88-104 56-56-56-56-56 56 56 56Zm0-160 56-56-56-56-56 56 56 56Zm0-160 56-56-56-56-56 56 56 56Zm120 280h232v-80H448v80Zm0-160h232v-80H448v80Zm0-160h232v-80H448v80ZM240-160v-640 640Z"/></svg>

After

Width:  |  Height:  |  Size: 446 B

View File

@@ -7,6 +7,7 @@ import Agents from './agents';
import SharedAgentGate from './agents/SharedAgentGate';
import ActionButtons from './components/ActionButtons';
import Spinner from './components/Spinner';
import UploadToast from './components/UploadToast';
import Conversation from './conversation/Conversation';
import { SharedConversation } from './conversation/SharedConversation';
import { useDarkTheme, useMediaQuery } from './hooks';
@@ -14,6 +15,7 @@ import useTokenAuth from './hooks/useTokenAuth';
import Navigation from './Navigation';
import PageNotFound from './PageNotFound';
import Setting from './settings';
import Notification from './components/Notification';
function AuthWrapper({ children }: { children: React.ReactNode }) {
const { isAuthLoading } = useTokenAuth();
@@ -33,28 +35,45 @@ function MainLayout() {
const [navOpen, setNavOpen] = useState(!(isMobile || isTablet));
return (
<div className="relative h-screen overflow-hidden dark:bg-raisin-black">
<div className="dark:bg-raisin-black relative h-screen overflow-hidden">
<Navigation navOpen={navOpen} setNavOpen={setNavOpen} />
<ActionButtons showNewChat={true} showShare={true} />
<div
className={`h-[calc(100dvh-64px)] overflow-auto lg:h-screen ${
className={`h-[calc(100dvh-64px)] overflow-auto transition-all duration-300 ease-in-out lg:h-screen ${
!(isMobile || isTablet)
? `ml-0 ${!navOpen ? 'lg:mx-auto' : 'lg:ml-72'}`
? `${navOpen ? 'lg:ml-72' : 'lg:ml-0'}`
: 'ml-0 lg:ml-16'
}`}
>
<Outlet />
</div>
<UploadToast />
</div>
);
}
export default function App() {
const [, , componentMounted] = useDarkTheme();
const [showNotification, setShowNotification] = useState<boolean>(() => {
const saved = localStorage.getItem('showNotification');
return saved ? JSON.parse(saved) : true;
});
const notificationText = import.meta.env.VITE_NOTIFICATION_TEXT;
const notificationLink = import.meta.env.VITE_NOTIFICATION_LINK;
if (!componentMounted) {
return <div />;
}
return (
<div className="relative h-full overflow-hidden">
{notificationLink && notificationText && showNotification && (
<Notification
notificationText={notificationText}
notificationLink={notificationLink}
handleCloseNotification={() => {
setShowNotification(false);
localStorage.setItem('showNotification', 'false');
}}
/>
)}
<Routes>
<Route
element={

Some files were not shown because too many files have changed in this diff Show More