Compare commits

...

45 Commits

Author SHA1 Message Date
Luis Pater
7223fee2de Merge branch 'pr-488'
# Conflicts:
#	README.md
#	README_CN.md
#	README_JA.md
2026-04-05 02:08:45 +08:00
Luis Pater
ada8e2905e feat(api): enhance proxy resolution for API key-based auth
Added comprehensive support for resolving proxy URLs from configuration based on API key and provider attributes. Introduced new helper functions and extended the test suite to validate fallback mechanisms and compatibility cases.
2026-04-05 01:56:34 +08:00
Luis Pater
4ba10531da feat(docs): add Poixe AI sponsorship details to README files
Added Poixe AI sponsorship information, including referral bonuses and platform capabilities, to README files in English, Japanese, and Chinese. Updated assets to include Poixe AI logo.
2026-04-05 01:20:50 +08:00
Luis Pater
3774b56e9f feat(misc): add background updater for Antigravity version caching
Introduce `StartAntigravityVersionUpdater` to periodically refresh the cached Antigravity version using a non-blocking background process. Updated main server flow to initialize the updater.
2026-04-04 22:09:11 +08:00
Luis Pater
c2d4137fb9 feat(executor): enhance Qwen system message handling with strict injection and merging rules
Closes: #2537
2026-04-04 21:51:02 +08:00
Luis Pater
2ee938acaf Merge pull request #2535 from rensumo/main
feat: 动态获取 Antigravity User-Agent 版本号
2026-04-04 21:00:47 +08:00
rensumo
8d5e470e1f feat: dynamically fetch antigravity UA version from releases API
Fetch the latest version from the antigravity auto-updater releases
endpoint and cache it for 6 hours. Falls back to 1.21.9 if the API
is unreachable or returns unexpected data.
2026-04-04 14:52:59 +08:00
Luis Pater
3882494878 Merge pull request #486 from router-for-me/plus
v6.9.14
2026-04-04 11:40:13 +08:00
Luis Pater
088c1d07f4 Merge branch 'main' into plus 2026-04-04 11:40:03 +08:00
Luis Pater
8430b28cfa Merge pull request #2526 from rensumo/main
feat: 升级反重力 (antigravity) UA 版本为 1.21.9
2026-04-04 11:32:16 +08:00
rensumo
f3ab8f4bc5 chore: update antigravity UA version to 1.21.9 2026-04-04 07:35:08 +08:00
Luis Pater
0e4f189c2e Merge pull request #1302 from dinhkarate/feat(vertex)/add-prefix-field
Feat(vertex): add prefix field
2026-04-04 04:17:12 +08:00
Luis Pater
98509f615c Merge pull request #485 from kunish/fix/copilot-premium-request-inflation
fix(copilot): reduce premium request inflation, enable thinking, and use dynamic API limits
2026-04-04 02:19:56 +08:00
Luis Pater
e7a66ae504 Merge branch 'router-for-me:main' into main 2026-04-04 02:18:06 +08:00
Luis Pater
754b126944 fix(executor): remove commented-out code in QwenExecutor 2026-04-04 02:14:48 +08:00
Luis Pater
ae37ccffbf Merge pull request #2520 from Arronlong/main
fix:qwen invalid_parameter_error
2026-04-04 02:13:09 +08:00
Luis Pater
42c062bb5b Merge pull request #2509 from adamhelfgott/fix-claude-thinking-temperature
Normalize Claude temperature when thinking is enabled
2026-04-03 23:55:50 +08:00
kunish
87bf0b73d5 fix(copilot): use dynamic API limits to prevent prompt token overflow
The Copilot API enforces per-account prompt token limits (128K individual,
168K business) that differ from the static 200K context length advertised
by the proxy. This mismatch caused Claude Code to accumulate context
beyond the actual limit, triggering "prompt token count exceeds the limit
of 128000" errors.

Changes:
- Extract max_prompt_tokens and max_output_tokens from the Copilot
  /models API response (capabilities.limits) and use them as the
  authoritative ContextLength and MaxCompletionTokens values
- Add CopilotModelLimits struct and Limits() helper to parse limits
  from the existing Capabilities map
- Fix GitLab Duo context-1m beta header not being set when routing
  through the Anthropic gateway (gitlab_duo_force_context_1m attr
  was set but only gin headers were checked)
- Fix flaky parallel tests that shared global model registry state
2026-04-03 23:54:17 +08:00
Luis Pater
f389667ec3 Merge pull request #2513 from lonr-6/codex/fix-ws-custom-tool-repair-v2
fix: repair responses websocket custom tool call pairing
2026-04-03 23:45:38 +08:00
Arronlong
29dba0399b Comment out system message check in Qwen executor
fix qwen invalid_parameter_error
2026-04-03 23:07:33 +08:00
Luis Pater
a824e7cd0b feat(models): add GPT-5.3, GPT-5.4, and GPT-5.4-mini with enhanced "thinking" levels 2026-04-03 23:05:10 +08:00
Luis Pater
140faef7dc Merge branch 'router-for-me:main' into main 2026-04-03 21:48:23 +08:00
Luis Pater
adb580b344 feat(security): add configuration to toggle Gemini CLI endpoint access
Closes: #2445
2026-04-03 21:46:49 +08:00
Luis Pater
06405f2129 fix(security): enforce stricter localhost validation for GeminiCLIAPIHandler
Closes: #2445
2026-04-03 21:22:03 +08:00
kunish
b849bf79d6 fix(copilot): address code review — SSE reasoning, multi-choice, agent detection
- Strip SSE `data:` prefix before normalizing reasoning_text→reasoning_content
  in streaming mode; re-wrap afterward for the translator
- Iterate all choices in normalizeGitHubCopilotReasoningField (not just
  choices[0]) to support n>1 requests
- Remove over-broad tool-role fallback in isAgentInitiated that scanned
  all messages for role:"tool", aligning with opencode's approach of only
  detecting active tool loops — genuine user follow-ups after tool use are
  no longer mis-classified as agent-initiated
- Add 5 reasoning normalization tests; update 2 X-Initiator tests to match
  refined semantics
2026-04-03 20:51:19 +08:00
kunish
59af2c57b1 fix(copilot): reduce premium request inflation and enable thinking
This commit addresses three issues with Claude Code through GitHub
Copilot:

1. **Premium request inflation**: Responses API requests were missing
   Openai-Intent headers and proper defaults, causing Copilot to bill
   each tool-loop continuation as a new premium request. Fixed by adding
   isAgentInitiated() heuristic (checks for tool_result content or
   preceding assistant tool_use), applying Responses API defaults
   (store, include, reasoning.summary), and local tiktoken-based token
   counting to avoid extra API calls.

2. **Context overflow**: Claude Code's modelSupports1M() hardcodes
   opus-4-6 as 1M-capable, but Copilot only supports ~128K-200K.
   Fixed by stripping the context-1m-2025-08-07 beta from translated
   request bodies. Also forwards response headers in non-streaming
   Execute() and registers the GET /copilot-quota management API route.

3. **Thinking not working**: Add ThinkingSupport with level-based
   reasoning to Claude models in the static definitions. Normalize
   Copilot's non-standard 'reasoning_text' response field to
   'reasoning_content' before passing to the SDK translator. Use
   caller-provided context in CountTokens instead of Background().
2026-04-03 20:24:30 +08:00
Kai Wang
d1fd2c4ad4 fix: repair websocket custom tool calls 2026-04-03 17:11:44 +08:00
Kai Wang
b6c6379bfa fix: repair websocket custom tool calls 2026-04-03 17:11:42 +08:00
Kai Wang
8f0e66b72e fix: repair websocket custom tool calls 2026-04-03 17:11:41 +08:00
Adam Helfgott
f63cf6ff7a Normalize Claude temperature for thinking 2026-04-03 03:45:51 -04:00
Luis Pater
d2419ed49d feat(executor): ensure default system message in QwenExecutor payload 2026-04-03 11:18:48 +08:00
Luis Pater
516d22c695 Merge pull request #484 from Ve-ria/main
更新CodeBuddy CN的模型列表
2026-04-03 11:10:32 +08:00
rensumo
73cda6e836 Update CodeBuddy DeepSeek model description 2026-04-03 11:03:33 +08:00
rensumo
0805989ee5 更新CodeBuddy CN的模型列表 2026-04-03 10:59:27 +08:00
Luis Pater
75da02af55 Merge branch 'router-for-me:main' into main 2026-04-02 22:34:47 +08:00
Luis Pater
ab9ebea592 Merge PR #2474
# Conflicts:
#	internal/api/modules/amp/response_rewriter.go
#	internal/api/modules/amp/response_rewriter_test.go
2026-04-02 22:31:12 +08:00
Luis Pater
7ee37ee4b9 feat: add /healthz endpoint and test coverage for health check
Closes: #2493
2026-04-02 21:56:27 +08:00
Luis Pater
837afffb31 docs: remove README_JA.md and clean up related links from README files 2026-04-02 21:37:47 +08:00
Aikins Laryea
f5e9f01811 test(amp): update tests to expect thinking blocks to pass through during streaming 2026-04-01 20:35:23 +00:00
Aikins Laryea
ff7dbb5867 test(amp): update tests to expect thinking blocks to pass through during streaming 2026-04-01 20:21:39 +00:00
Aikins Laryea
e34b2b4f1d fix(gemini): clean tool schemas and eager_input_streaming
delegate schema sanitization to util.CleanJSONSchemaForGemini and drop the top-level eager_input_streaming key to prevent validation errors when sending claude tools to the gemini api
2026-04-01 19:49:38 +00:00
dinhkarate
36efcc6e28 fix(vertex): include prefix in auth filename and validate at import
Address two blocking issues from PR review:
- Auth file now named vertex-{prefix}-{project}.json so importing the
  same project with different prefixes no longer overwrites credentials
- Prefix containing "/" is rejected at import time instead of being
  silently ignored at runtime
- Add prefix to in-memory metadata map for consistency

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 15:06:04 +07:00
Pham Quang Dinh
a337ecf35c Merge branch 'router-for-me:main' into feat(vertex)/add-prefix-field 2026-03-17 11:48:40 +07:00
dinhkarate
14cb2b95c6 feat(vertex): add --vertex-import-prefix flag for model namespacing 2026-01-29 13:32:38 +07:00
dinhkarate
fdeef48498 feat(vertex): Add Prefix field to VertexCredentialStorage for per-file model namespacing 2026-01-29 13:32:38 +07:00
33 changed files with 2078 additions and 586 deletions

6
.gitignore vendored
View File

@@ -54,4 +54,10 @@ _bmad-output/*
# macOS # macOS
.DS_Store .DS_Store
._* ._*
# Opencode
.beads/
.opencode/
.cli-proxy-api/
.venv/
*.bak *.bak

View File

@@ -14,99 +14,6 @@ This project only accepts pull requests that relate to third-party provider supp
If you need to submit any non-third-party provider changes, please open them against the [mainline](https://github.com/router-for-me/CLIProxyAPI) repository. If you need to submit any non-third-party provider changes, please open them against the [mainline](https://github.com/router-for-me/CLIProxyAPI) repository.
1. Fork the repository
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
4. Push to the branch (`git push origin feature/amazing-feature`)
5. Open a Pull Request
## Who is with us?
Those projects are based on CLIProxyAPI:
### [vibeproxy](https://github.com/automazeio/vibeproxy)
Native macOS menu bar app to use your Claude Code & ChatGPT subscriptions with AI coding tools - no API keys needed
### [Subtitle Translator](https://github.com/VjayC/SRT-Subtitle-Translator-Validator)
Browser-based tool to translate SRT subtitles using your Gemini subscription via CLIProxyAPI with automatic validation/error correction - no API keys needed
### [CCS (Claude Code Switch)](https://github.com/kaitranntt/ccs)
CLI wrapper for instant switching between multiple Claude accounts and alternative models (Gemini, Codex, Antigravity) via CLIProxyAPI OAuth - no API keys needed
### [Quotio](https://github.com/nguyenphutrong/quotio)
Native macOS menu bar app that unifies Claude, Gemini, OpenAI, Qwen, and Antigravity subscriptions with real-time quota tracking and smart auto-failover for AI coding tools like Claude Code, OpenCode, and Droid - no API keys needed.
### [CodMate](https://github.com/loocor/CodMate)
Native macOS SwiftUI app for managing CLI AI sessions (Codex, Claude Code, Gemini CLI) with unified provider management, Git review, project organization, global search, and terminal integration. Integrates CLIProxyAPI to provide OAuth authentication for Codex, Claude, Gemini, Antigravity, and Qwen Code, with built-in and third-party provider rerouting through a single proxy endpoint - no API keys needed for OAuth providers.
### [ProxyPilot](https://github.com/Finesssee/ProxyPilot)
Windows-native CLIProxyAPI fork with TUI, system tray, and multi-provider OAuth for AI coding tools - no API keys needed.
### [Claude Proxy VSCode](https://github.com/uzhao/claude-proxy-vscode)
VSCode extension for quick switching between Claude Code models, featuring integrated CLIProxyAPI as its backend with automatic background lifecycle management.
### [ZeroLimit](https://github.com/0xtbug/zero-limit)
Windows desktop app built with Tauri + React for monitoring AI coding assistant quotas via CLIProxyAPI. Track usage across Gemini, Claude, OpenAI Codex, and Antigravity accounts with real-time dashboard, system tray integration, and one-click proxy control - no API keys needed.
### [CPA-XXX Panel](https://github.com/ferretgeek/CPA-X)
A lightweight web admin panel for CLIProxyAPI with health checks, resource monitoring, real-time logs, auto-update, request statistics and pricing display. Supports one-click installation and systemd service.
### [CLIProxyAPI Tray](https://github.com/kitephp/CLIProxyAPI_Tray)
A Windows tray application implemented using PowerShell scripts, without relying on any third-party libraries. The main features include: automatic creation of shortcuts, silent running, password management, channel switching (Main / Plus), and automatic downloading and updating.
### [霖君](https://github.com/wangdabaoqq/LinJun)
霖君 is a cross-platform desktop application for managing AI programming assistants, supporting macOS, Windows, and Linux systems. Unified management of Claude Code, Gemini CLI, OpenAI Codex, Qwen Code, and other AI coding tools, with local proxy for multi-account quota tracking and one-click configuration.
### [CLIProxyAPI Dashboard](https://github.com/itsmylife44/cliproxyapi-dashboard)
A modern web-based management dashboard for CLIProxyAPI built with Next.js, React, and PostgreSQL. Features real-time log streaming, structured configuration editing, API key management, OAuth provider integration for Claude/Gemini/Codex, usage analytics, container management, and config sync with OpenCode via companion plugin - no manual YAML editing needed.
### [All API Hub](https://github.com/qixing-jk/all-api-hub)
Browser extension for one-stop management of New API-compatible relay site accounts, featuring balance and usage dashboards, auto check-in, one-click key export to common apps, in-page API availability testing, and channel/model sync and redirection. It integrates with CLIProxyAPI through the Management API for one-click provider import and config sync.
### [Shadow AI](https://github.com/HEUDavid/shadow-ai)
Shadow AI is an AI assistant tool designed specifically for restricted environments. It provides a stealthy operation
mode without windows or traces, and enables cross-device AI Q&A interaction and control via the local area network (
LAN). Essentially, it is an automated collaboration layer of "screen/audio capture + AI inference + low-friction delivery",
helping users to immersively use AI assistants across applications on controlled devices or in restricted environments.
### [ProxyPal](https://github.com/buddingnewinsights/proxypal)
Cross-platform desktop app (macOS, Windows, Linux) wrapping CLIProxyAPI with a native GUI. Connects Claude, ChatGPT, Gemini, GitHub Copilot, Qwen, iFlow, and custom OpenAI-compatible endpoints with usage analytics, request monitoring, and auto-configuration for popular coding tools - no API keys needed.
> [!NOTE]
> If you developed a project based on CLIProxyAPI, please open a PR to add it to this list.
## More choices
Those projects are ports of CLIProxyAPI or inspired by it:
### [9Router](https://github.com/decolua/9router)
A Next.js implementation inspired by CLIProxyAPI, easy to install and use, built from scratch with format translation (OpenAI/Claude/Gemini/Ollama), combo system with auto-fallback, multi-account management with exponential backoff, a Next.js web dashboard, and support for CLI tools (Cursor, Claude Code, Cline, RooCode) - no API keys needed.
### [OmniRoute](https://github.com/diegosouzapw/OmniRoute)
Never stop coding. Smart routing to FREE & low-cost AI models with automatic fallback.
OmniRoute is an AI gateway for multi-provider LLMs: an OpenAI-compatible endpoint with smart routing, load balancing, retries, and fallbacks. Add policies, rate limits, caching, and observability for reliable, cost-aware inference.
> [!NOTE]
> If you have developed a port of CLIProxyAPI or a project inspired by it, please open a PR to add it to this list.
## License ## License
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

View File

@@ -1,6 +1,6 @@
# CLIProxyAPI Plus # CLIProxyAPI Plus
[English](README.md) | 中文 | [日本語](README_JA.md) [English](README.md) | 中文
这是 [CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI) 的 Plus 版本,在原有基础上增加了第三方供应商的支持。 这是 [CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI) 的 Plus 版本,在原有基础上增加了第三方供应商的支持。
@@ -12,96 +12,6 @@
如果需要提交任何非第三方供应商支持的 Pull Request请提交到[主线](https://github.com/router-for-me/CLIProxyAPI)版本。 如果需要提交任何非第三方供应商支持的 Pull Request请提交到[主线](https://github.com/router-for-me/CLIProxyAPI)版本。
1. Fork 仓库
2. 创建您的功能分支(`git checkout -b feature/amazing-feature`
3. 提交您的更改(`git commit -m 'Add some amazing feature'`
4. 推送到分支(`git push origin feature/amazing-feature`
5. 打开 Pull Request
## 谁与我们在一起?
这些项目基于 CLIProxyAPI:
### [vibeproxy](https://github.com/automazeio/vibeproxy)
一个原生 macOS 菜单栏应用,让您可以使用 Claude Code & ChatGPT 订阅服务和 AI 编程工具,无需 API 密钥。
### [Subtitle Translator](https://github.com/VjayC/SRT-Subtitle-Translator-Validator)
一款基于浏览器的 SRT 字幕翻译工具,可通过 CLI 代理 API 使用您的 Gemini 订阅。内置自动验证与错误修正功能,无需 API 密钥。
### [CCS (Claude Code Switch)](https://github.com/kaitranntt/ccs)
CLI 封装器,用于通过 CLIProxyAPI OAuth 即时切换多个 Claude 账户和替代模型Gemini, Codex, Antigravity无需 API 密钥。
### [Quotio](https://github.com/nguyenphutrong/quotio)
原生 macOS 菜单栏应用,统一管理 Claude、Gemini、OpenAI、Qwen 和 Antigravity 订阅,提供实时配额追踪和智能自动故障转移,支持 Claude Code、OpenCode 和 Droid 等 AI 编程工具,无需 API 密钥。
### [CodMate](https://github.com/loocor/CodMate)
原生 macOS SwiftUI 应用,用于管理 CLI AI 会话Claude Code、Codex、Gemini CLI提供统一的提供商管理、Git 审查、项目组织、全局搜索和终端集成。集成 CLIProxyAPI 为 Codex、Claude、Gemini、Antigravity 和 Qwen Code 提供统一的 OAuth 认证,支持内置和第三方提供商通过单一代理端点重路由 - OAuth 提供商无需 API 密钥。
### [ProxyPilot](https://github.com/Finesssee/ProxyPilot)
原生 Windows CLIProxyAPI 分支,集成 TUI、系统托盘及多服务商 OAuth 认证,专为 AI 编程工具打造,无需 API 密钥。
### [Claude Proxy VSCode](https://github.com/uzhao/claude-proxy-vscode)
一款 VSCode 扩展,提供了在 VSCode 中快速切换 Claude Code 模型的功能,内置 CLIProxyAPI 作为其后端,支持后台自动启动和关闭。
### [ZeroLimit](https://github.com/0xtbug/zero-limit)
Windows 桌面应用,基于 Tauri + React 构建,用于通过 CLIProxyAPI 监控 AI 编程助手配额。支持跨 Gemini、Claude、OpenAI Codex 和 Antigravity 账户的使用量追踪,提供实时仪表盘、系统托盘集成和一键代理控制,无需 API 密钥。
### [CPA-XXX Panel](https://github.com/ferretgeek/CPA-X)
面向 CLIProxyAPI 的 Web 管理面板,提供健康检查、资源监控、日志查看、自动更新、请求统计与定价展示,支持一键安装与 systemd 服务。
### [CLIProxyAPI Tray](https://github.com/kitephp/CLIProxyAPI_Tray)
Windows 托盘应用,基于 PowerShell 脚本实现不依赖任何第三方库。主要功能包括自动创建快捷方式、静默运行、密码管理、通道切换Main / Plus以及自动下载与更新。
### [霖君](https://github.com/wangdabaoqq/LinJun)
霖君是一款用于管理AI编程助手的跨平台桌面应用支持macOS、Windows、Linux系统。统一管理Claude Code、Gemini CLI、OpenAI Codex、Qwen Code等AI编程工具本地代理实现多账户配额跟踪和一键配置。
### [CLIProxyAPI Dashboard](https://github.com/itsmylife44/cliproxyapi-dashboard)
一个面向 CLIProxyAPI 的现代化 Web 管理仪表盘,基于 Next.js、React 和 PostgreSQL 构建。支持实时日志流、结构化配置编辑、API Key 管理、Claude/Gemini/Codex 的 OAuth 提供方集成、使用量分析、容器管理,并可通过配套插件与 OpenCode 同步配置,无需手动编辑 YAML。
### [All API Hub](https://github.com/qixing-jk/all-api-hub)
用于一站式管理 New API 兼容中转站账号的浏览器扩展,提供余额与用量看板、自动签到、密钥一键导出到常用应用、网页内 API 可用性测试,以及渠道与模型同步和重定向。支持通过 CLIProxyAPI Management API 一键导入 Provider 与同步配置。
### [Shadow AI](https://github.com/HEUDavid/shadow-ai)
Shadow AI 是一款专为受限环境设计的 AI 辅助工具。提供无窗口、无痕迹的隐蔽运行方式,并通过局域网实现跨设备的 AI 问答交互与控制。本质上是一个「屏幕/音频采集 + AI 推理 + 低摩擦投送」的自动化协作层,帮助用户在受控设备/受限环境下沉浸式跨应用地使用 AI 助手。
### [ProxyPal](https://github.com/buddingnewinsights/proxypal)
跨平台桌面应用macOS、Windows、Linux以原生 GUI 封装 CLIProxyAPI。支持连接 Claude、ChatGPT、Gemini、GitHub Copilot、Qwen、iFlow 及自定义 OpenAI 兼容端点,具备使用分析、请求监控和热门编程工具自动配置功能,无需 API 密钥。
> [!NOTE]
> 如果你开发了基于 CLIProxyAPI 的项目,请提交一个 PR拉取请求将其添加到此列表中。
## 更多选择
以下项目是 CLIProxyAPI 的移植版或受其启发:
### [9Router](https://github.com/decolua/9router)
基于 Next.js 的实现,灵感来自 CLIProxyAPI易于安装使用自研格式转换OpenAI/Claude/Gemini/Ollama、组合系统与自动回退、多账户管理指数退避、Next.js Web 控制台,并支持 Cursor、Claude Code、Cline、RooCode 等 CLI 工具,无需 API 密钥。
### [OmniRoute](https://github.com/diegosouzapw/OmniRoute)
代码不止,创新不停。智能路由至免费及低成本 AI 模型,并支持自动故障转移。
OmniRoute 是一个面向多供应商大语言模型的 AI 网关:它提供兼容 OpenAI 的端点,具备智能路由、负载均衡、重试及回退机制。通过添加策略、速率限制、缓存和可观测性,确保推理过程既可靠又具备成本意识。
> [!NOTE]
> 如果你开发了 CLIProxyAPI 的移植或衍生项目,请提交 PR 将其添加到此列表中。
## 许可证 ## 许可证
此项目根据 MIT 许可证授权 - 有关详细信息,请参阅 [LICENSE](LICENSE) 文件。 此项目根据 MIT 许可证授权 - 有关详细信息,请参阅 [LICENSE](LICENSE) 文件。

View File

@@ -1,199 +0,0 @@
# CLI Proxy API
[English](README.md) | [中文](README_CN.md) | 日本語
CLI向けのOpenAI/Gemini/Claude/Codex互換APIインターフェースを提供するプロキシサーバーです。
OAuth経由でOpenAI CodexGPTモデルおよびClaude Codeもサポートしています。
ローカルまたはマルチアカウントのCLIアクセスを、OpenAIResponses含む/Gemini/Claude互換のクライアントやSDKで利用できます。
## スポンサー
[![z.ai](https://assets.router-for.me/english-5-0.jpg)](https://z.ai/subscribe?ic=8JVLJQFSKB)
本プロジェクトはZ.aiにスポンサーされており、GLM CODING PLANの提供を受けています。
GLM CODING PLANはAIコーディング向けに設計されたサブスクリプションサービスで、月額わずか$10から利用可能です。フラッグシップのGLM-4.7およびGLM-5はProユーザーのみ利用可能モデルを10以上の人気AIコーディングツールClaude Code、Cline、Roo Codeなどで利用でき、開発者にトップクラスの高速かつ安定したコーディング体験を提供します。
GLM CODING PLANを10%割引で取得https://z.ai/subscribe?ic=8JVLJQFSKB
---
<table>
<tbody>
<tr>
<td width="180"><a href="https://www.packyapi.com/register?aff=cliproxyapi"><img src="./assets/packycode.png" alt="PackyCode" width="150"></a></td>
<td>PackyCodeのスポンサーシップに感謝しますPackyCodeは信頼性が高く効率的なAPIリレーサービスプロバイダーで、Claude Code、Codex、Geminiなどのリレーサービスを提供しています。PackyCodeは当ソフトウェアのユーザーに特別割引を提供しています<a href="https://www.packyapi.com/register?aff=cliproxyapi">こちらのリンク</a>から登録し、チャージ時にプロモーションコード「cliproxyapi」を入力すると10%割引になります。</td>
</tr>
<tr>
<td width="180"><a href="https://www.aicodemirror.com/register?invitecode=TJNAIF"><img src="./assets/aicodemirror.png" alt="AICodeMirror" width="150"></a></td>
<td>AICodeMirrorのスポンサーシップに感謝しますAICodeMirrorはClaude Code / Codex / Gemini CLI向けの公式高安定性リレーサービスを提供しており、エンタープライズグレードの同時接続、迅速な請求書発行、24時間365日の専任技術サポートを備えています。Claude Code / Codex / Geminiの公式チャネルが元の価格の38% / 2% / 9%で利用でき、チャージ時にはさらに割引がありますCLIProxyAPIユーザー向けの特別特典<a href="https://www.aicodemirror.com/register?invitecode=TJNAIF">こちらのリンク</a>から登録すると、初回チャージが20%割引になり、エンタープライズのお客様は最大25%割引を受けられます!</td>
</tr>
<tr>
<td width="180"><a href="https://shop.bmoplus.com/?utm_source=github"><img src="./assets/bmoplus.png" alt="BmoPlus" width="150"></a></td>
<td>本プロジェクトにご支援いただいた BmoPlus に感謝いたしますBmoPlusは、AIサブスクリプションのヘビーユーザー向けに特化した信頼性の高いAIアカウントサービスプロバイダーであり、安定した ChatGPT Plus / ChatGPT Pro (完全保証) / Claude Pro / Super Grok / Gemini Pro の公式代行チャージおよび即納アカウントを提供しています。こちらの<a href="https://shop.bmoplus.com/?utm_source=github">BmoPlus AIアカウント専門店/代行チャージ</a>経由でご登録・ご注文いただいたユーザー様は、GPTを <b>公式サイト価格の約1割90% OFF</b> という驚異的な価格でご利用いただけます!</td>
</tr>
<tr>
<td width="180"><a href="https://www.lingtrue.com/register"><img src="./assets/lingtrue.png" alt="LingtrueAPI" width="150"></a></td>
<td>LingtrueAPIのスポンサーシップに感謝しますLingtrueAPIはグローバルな大規模モデルAPIリレーサービスプラットフォームで、Claude Code、Codex、GeminiなどのトップモデルAPI呼び出しサービスを提供し、ユーザーが低コストかつ高い安定性で世界中のAI能力に接続できるよう支援しています。LingtrueAPIは本ソフトウェアのユーザーに特別割引を提供しています<a href="https://www.lingtrue.com/register">こちらのリンク</a>から登録し、初回チャージ時にプロモーションコード「LingtrueAPI」を入力すると10%割引になります。</td>
</tr>
</tbody>
</table>
## 概要
- CLIモデル向けのOpenAI/Gemini/Claude互換APIエンドポイント
- OAuthログインによるOpenAI CodexサポートGPTモデル
- OAuthログインによるClaude Codeサポート
- OAuthログインによるQwen Codeサポート
- OAuthログインによるiFlowサポート
- プロバイダールーティングによるAmp CLIおよびIDE拡張機能のサポート
- ストリーミングおよび非ストリーミングレスポンス
- 関数呼び出し/ツールのサポート
- マルチモーダル入力サポート(テキストと画像)
- ラウンドロビン負荷分散による複数アカウント対応Gemini、OpenAI、Claude、QwenおよびiFlow
- シンプルなCLI認証フローGemini、OpenAI、Claude、QwenおよびiFlow
- Generative Language APIキーのサポート
- AI Studioビルドのマルチアカウント負荷分散
- Gemini CLIのマルチアカウント負荷分散
- Claude Codeのマルチアカウント負荷分散
- Qwen Codeのマルチアカウント負荷分散
- iFlowのマルチアカウント負荷分散
- OpenAI Codexのマルチアカウント負荷分散
- 設定によるOpenAI互換アップストリームプロバイダーOpenRouter
- プロキシ埋め込み用の再利用可能なGo SDK`docs/sdk-usage.md`を参照)
## はじめに
CLIProxyAPIガイド[https://help.router-for.me/](https://help.router-for.me/)
## 管理API
[MANAGEMENT_API.md](https://help.router-for.me/management/api)を参照
## Amp CLIサポート
CLIProxyAPIは[Amp CLI](https://ampcode.com)およびAmp IDE拡張機能の統合サポートを含んでおり、Google/ChatGPT/ClaudeのOAuthサブスクリプションをAmpのコーディングツールで使用できます
- Ampの APIパターン用のプロバイダールートエイリアス`/api/provider/{provider}/v1...`
- OAuth認証およびアカウント機能用の管理プロキシ
- 自動ルーティングによるスマートモデルフォールバック
- 利用できないモデルを代替モデルにルーティングする**モデルマッピング**(例:`claude-opus-4.5``claude-sonnet-4`
- localhostのみの管理エンドポイントによるセキュリティファーストの設計
特定のバックエンド系統のリクエスト/レスポンス形状が必要な場合は、統合された `/v1/...` エンドポイントよりも provider-specific のパスを優先してください。
- messages 系のバックエンドには `/api/provider/{provider}/v1/messages`
- モデル単位の generate 系エンドポイントには `/api/provider/{provider}/v1beta/models/...`
- chat-completions 系のバックエンドには `/api/provider/{provider}/v1/chat/completions`
これらのパスはプロトコル面の選択には役立ちますが、同じクライアント向けモデル名が複数バックエンドで再利用されている場合、それだけで推論実行系が一意に固定されるわけではありません。実際の推論ルーティングは、引き続きリクエスト内の model/alias 解決に従います。厳密にバックエンドを固定したい場合は、一意な alias や prefix を使うか、クライアント向けモデル名の重複自体を避けてください。
**→ [Amp CLI統合ガイドの完全版](https://help.router-for.me/agent-client/amp-cli.html)**
## SDKドキュメント
- 使い方:[docs/sdk-usage.md](docs/sdk-usage.md)
- 上級(エグゼキューターとトランスレーター):[docs/sdk-advanced.md](docs/sdk-advanced.md)
- アクセス:[docs/sdk-access.md](docs/sdk-access.md)
- ウォッチャー:[docs/sdk-watcher.md](docs/sdk-watcher.md)
- カスタムプロバイダーの例:`examples/custom-provider`
## コントリビューション
コントリビューションを歓迎しますお気軽にPull Requestを送ってください。
1. リポジトリをフォーク
2. フィーチャーブランチを作成(`git checkout -b feature/amazing-feature`
3. 変更をコミット(`git commit -m 'Add some amazing feature'`
4. ブランチにプッシュ(`git push origin feature/amazing-feature`
5. Pull Requestを作成
## 関連プロジェクト
CLIProxyAPIをベースにした以下のプロジェクトがあります
### [vibeproxy](https://github.com/automazeio/vibeproxy)
macOSネイティブのメニューバーアプリで、Claude CodeとChatGPTのサブスクリプションをAIコーディングツールで使用可能 - APIキー不要
### [Subtitle Translator](https://github.com/VjayC/SRT-Subtitle-Translator-Validator)
CLIProxyAPI経由でGeminiサブスクリプションを使用してSRT字幕を翻訳するブラウザベースのツール。自動検証/エラー修正機能付き - APIキー不要
### [CCS (Claude Code Switch)](https://github.com/kaitranntt/ccs)
CLIProxyAPI OAuthを使用して複数のClaudeアカウントや代替モデルGemini、Codex、Antigravityを即座に切り替えるCLIラッパー - APIキー不要
### [Quotio](https://github.com/nguyenphutrong/quotio)
Claude、Gemini、OpenAI、Qwen、Antigravityのサブスクリプションを統合し、リアルタイムのクォータ追跡とスマート自動フェイルオーバーを備えたmacOSネイティブのメニューバーアプリ。Claude Code、OpenCode、Droidなどのコーディングツール向け - APIキー不要
### [CodMate](https://github.com/loocor/CodMate)
CLI AIセッションCodex、Claude Code、Gemini CLIを管理するmacOS SwiftUIネイティブアプリ。統合プロバイダー管理、Gitレビュー、プロジェクト整理、グローバル検索、ターミナル統合機能を搭載。CLIProxyAPIと統合し、Codex、Claude、Gemini、Antigravity、Qwen CodeのOAuth認証を提供。単一のプロキシエンドポイントを通じた組み込みおよびサードパーティプロバイダーの再ルーティングに対応 - OAuthプロバイダーではAPIキー不要
### [ProxyPilot](https://github.com/Finesssee/ProxyPilot)
TUI、システムトレイ、マルチプロバイダーOAuthを備えたWindows向けCLIProxyAPIフォーク - AIコーディングツール用、APIキー不要
### [Claude Proxy VSCode](https://github.com/uzhao/claude-proxy-vscode)
Claude Codeモデルを素早く切り替えるVSCode拡張機能。バックエンドとしてCLIProxyAPIを統合し、バックグラウンドでの自動ライフサイクル管理を搭載
### [ZeroLimit](https://github.com/0xtbug/zero-limit)
CLIProxyAPIを使用してAIコーディングアシスタントのクォータを監視するTauri + React製のWindowsデスクトップアプリ。Gemini、Claude、OpenAI Codex、Antigravityアカウントの使用量をリアルタイムダッシュボード、システムトレイ統合、ワンクリックプロキシコントロールで追跡 - APIキー不要
### [CPA-XXX Panel](https://github.com/ferretgeek/CPA-X)
CLIProxyAPI向けの軽量Web管理パネル。ヘルスチェック、リソース監視、リアルタイムログ、自動更新、リクエスト統計、料金表示機能を搭載。ワンクリックインストールとsystemdサービスに対応
### [CLIProxyAPI Tray](https://github.com/kitephp/CLIProxyAPI_Tray)
PowerShellスクリプトで実装されたWindowsトレイアプリケーション。サードパーティライブラリに依存せず、ショートカットの自動作成、サイレント実行、パスワード管理、チャネル切り替えMain / Plus、自動ダウンロードおよび自動更新に対応
### [霖君](https://github.com/wangdabaoqq/LinJun)
霖君はAIプログラミングアシスタントを管理するクロスプラットフォームデスクトップアプリケーションで、macOS、Windows、Linuxシステムに対応。Claude Code、Gemini CLI、OpenAI Codex、Qwen Codeなどのコーディングツールを統合管理し、ローカルプロキシによるマルチアカウントクォータ追跡とワンクリック設定が可能
### [CLIProxyAPI Dashboard](https://github.com/itsmylife44/cliproxyapi-dashboard)
Next.js、React、PostgreSQLで構築されたCLIProxyAPI用のモダンなWebベース管理ダッシュボード。リアルタイムログストリーミング、構造化された設定編集、APIキー管理、Claude/Gemini/Codex向けOAuthプロバイダー統合、使用量分析、コンテナ管理、コンパニオンプラグインによるOpenCodeとの設定同期機能を搭載 - 手動でのYAML編集は不要
### [All API Hub](https://github.com/qixing-jk/all-api-hub)
New API互換リレーサイトアカウントをワンストップで管理するブラウザ拡張機能。残高と使用量のダッシュボード、自動チェックイン、一般的なアプリへのワンクリックキーエクスポート、ページ内API可用性テスト、チャネル/モデルの同期とリダイレクト機能を搭載。Management APIを通じてCLIProxyAPIと統合し、ワンクリックでプロバイダーのインポートと設定同期が可能
### [Shadow AI](https://github.com/HEUDavid/shadow-ai)
Shadow AIは制限された環境向けに特別に設計されたAIアシスタントツールです。ウィンドウや痕跡のないステルス動作モードを提供し、LANローカルエリアネットワークを介したクロスデバイスAI質疑応答のインタラクションと制御を可能にします。本質的には「画面/音声キャプチャ + AI推論 + 低摩擦デリバリー」の自動化コラボレーションレイヤーであり、制御されたデバイスや制限された環境でアプリケーション横断的にAIアシスタントを没入的に使用できるようユーザーを支援します。
### [ProxyPal](https://github.com/buddingnewinsights/proxypal)
CLIProxyAPIをネイティブGUIでラップしたクロスプラットフォームデスクトップアプリmacOS、Windows、Linux。Claude、ChatGPT、Gemini、GitHub Copilot、Qwen、iFlow、カスタムOpenAI互換エンドポイントに対応し、使用状況分析、リクエスト監視、人気コーディングツールの自動設定機能を搭載 - APIキー不要
> [!NOTE]
> CLIProxyAPIをベースにプロジェクトを開発した場合は、PRを送ってこのリストに追加してください。
## その他の選択肢
以下のプロジェクトはCLIProxyAPIの移植版またはそれに触発されたものです
### [9Router](https://github.com/decolua/9router)
CLIProxyAPIに触発されたNext.js実装。インストールと使用が簡単で、フォーマット変換OpenAI/Claude/Gemini/Ollama、自動フォールバック付きコンボシステム、指数バックオフ付きマルチアカウント管理、Next.js Webダッシュボード、CLIツールCursor、Claude Code、Cline、RooCodeのサポートをゼロから構築 - APIキー不要
### [OmniRoute](https://github.com/diegosouzapw/OmniRoute)
コーディングを止めない。無料および低コストのAIモデルへのスマートルーティングと自動フォールバック。
OmniRouteはマルチプロバイダーLLM向けのAIゲートウェイですスマートルーティング、負荷分散、リトライ、フォールバックを備えたOpenAI互換エンドポイント。ポリシー、レート制限、キャッシュ、可観測性を追加して、信頼性が高くコストを意識した推論を実現します。
> [!NOTE]
> CLIProxyAPIの移植版またはそれに触発されたプロジェクトを開発した場合は、PRを送ってこのリストに追加してください。
## ライセンス
本プロジェクトはMITライセンスの下でライセンスされています - 詳細は[LICENSE](LICENSE)ファイルを参照してください。

View File

@@ -26,6 +26,7 @@ import (
"time" "time"
"github.com/router-for-me/CLIProxyAPI/v6/internal/logging" "github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
sdkauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth" sdkauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil" "github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
@@ -188,7 +189,7 @@ func fetchModels(ctx context.Context, auth *coreauth.Auth) []modelEntry {
httpReq.Close = true httpReq.Close = true
httpReq.Header.Set("Content-Type", "application/json") httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Authorization", "Bearer "+accessToken) httpReq.Header.Set("Authorization", "Bearer "+accessToken)
httpReq.Header.Set("User-Agent", "antigravity/1.19.6 darwin/arm64") httpReq.Header.Set("User-Agent", misc.AntigravityUserAgent())
httpClient := &http.Client{Timeout: 30 * time.Second} httpClient := &http.Client{Timeout: 30 * time.Second}
if transport, _, errProxy := proxyutil.BuildHTTPTransport(auth.ProxyURL); errProxy == nil && transport != nil { if transport, _, errProxy := proxyutil.BuildHTTPTransport(auth.ProxyURL); errProxy == nil && transport != nil {

View File

@@ -99,6 +99,7 @@ func main() {
var codeBuddyLogin bool var codeBuddyLogin bool
var projectID string var projectID string
var vertexImport string var vertexImport string
var vertexImportPrefix string
var configPath string var configPath string
var password string var password string
var tuiMode bool var tuiMode bool
@@ -139,6 +140,7 @@ func main() {
flag.StringVar(&projectID, "project_id", "", "Project ID (Gemini only, not required)") flag.StringVar(&projectID, "project_id", "", "Project ID (Gemini only, not required)")
flag.StringVar(&configPath, "config", DefaultConfigPath, "Configure File Path") flag.StringVar(&configPath, "config", DefaultConfigPath, "Configure File Path")
flag.StringVar(&vertexImport, "vertex-import", "", "Import Vertex service account key JSON file") flag.StringVar(&vertexImport, "vertex-import", "", "Import Vertex service account key JSON file")
flag.StringVar(&vertexImportPrefix, "vertex-import-prefix", "", "Prefix for Vertex model namespacing (use with -vertex-import)")
flag.StringVar(&password, "password", "", "") flag.StringVar(&password, "password", "", "")
flag.BoolVar(&tuiMode, "tui", false, "Start with terminal management UI") flag.BoolVar(&tuiMode, "tui", false, "Start with terminal management UI")
flag.BoolVar(&standalone, "standalone", false, "In TUI mode, start an embedded local server") flag.BoolVar(&standalone, "standalone", false, "In TUI mode, start an embedded local server")
@@ -510,7 +512,7 @@ func main() {
if vertexImport != "" { if vertexImport != "" {
// Handle Vertex service account import // Handle Vertex service account import
cmd.DoVertexImport(cfg, vertexImport) cmd.DoVertexImport(cfg, vertexImport, vertexImportPrefix)
} else if login { } else if login {
// Handle Google/Gemini login // Handle Google/Gemini login
cmd.DoLogin(cfg, projectID, options) cmd.DoLogin(cfg, projectID, options)
@@ -596,6 +598,7 @@ func main() {
if standalone { if standalone {
// Standalone mode: start an embedded local server and connect TUI client to it. // Standalone mode: start an embedded local server and connect TUI client to it.
managementasset.StartAutoUpdater(context.Background(), configFilePath) managementasset.StartAutoUpdater(context.Background(), configFilePath)
misc.StartAntigravityVersionUpdater(context.Background())
if !localModel { if !localModel {
registry.StartModelsUpdater(context.Background()) registry.StartModelsUpdater(context.Background())
} }
@@ -671,6 +674,7 @@ func main() {
} else { } else {
// Start the main proxy service // Start the main proxy service
managementasset.StartAutoUpdater(context.Background(), configFilePath) managementasset.StartAutoUpdater(context.Background(), configFilePath)
misc.StartAntigravityVersionUpdater(context.Background())
if !localModel { if !localModel {
registry.StartModelsUpdater(context.Background()) registry.StartModelsUpdater(context.Background())
} }

View File

@@ -105,6 +105,10 @@ routing:
# When true, enable authentication for the WebSocket API (/v1/ws). # When true, enable authentication for the WebSocket API (/v1/ws).
ws-auth: false ws-auth: false
# When true, enable Gemini CLI internal endpoints (/v1internal:*).
# Default is false for safety.
enable-gemini-cli-endpoint: false
# When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts. # When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
nonstream-keepalive-interval: 0 nonstream-keepalive-interval: 0

View File

@@ -13,6 +13,7 @@ import (
"github.com/fxamacker/cbor/v2" "github.com/fxamacker/cbor/v2"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli" "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil" "github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
@@ -700,6 +701,11 @@ func (h *Handler) apiCallTransport(auth *coreauth.Auth) http.RoundTripper {
if proxyStr := strings.TrimSpace(auth.ProxyURL); proxyStr != "" { if proxyStr := strings.TrimSpace(auth.ProxyURL); proxyStr != "" {
proxyCandidates = append(proxyCandidates, proxyStr) proxyCandidates = append(proxyCandidates, proxyStr)
} }
if h != nil && h.cfg != nil {
if proxyStr := strings.TrimSpace(proxyURLFromAPIKeyConfig(h.cfg, auth)); proxyStr != "" {
proxyCandidates = append(proxyCandidates, proxyStr)
}
}
} }
if h != nil && h.cfg != nil { if h != nil && h.cfg != nil {
if proxyStr := strings.TrimSpace(h.cfg.ProxyURL); proxyStr != "" { if proxyStr := strings.TrimSpace(h.cfg.ProxyURL); proxyStr != "" {
@@ -722,6 +728,123 @@ func (h *Handler) apiCallTransport(auth *coreauth.Auth) http.RoundTripper {
return clone return clone
} }
type apiKeyConfigEntry interface {
GetAPIKey() string
GetBaseURL() string
}
func resolveAPIKeyConfig[T apiKeyConfigEntry](entries []T, auth *coreauth.Auth) *T {
if auth == nil || len(entries) == 0 {
return nil
}
attrKey, attrBase := "", ""
if auth.Attributes != nil {
attrKey = strings.TrimSpace(auth.Attributes["api_key"])
attrBase = strings.TrimSpace(auth.Attributes["base_url"])
}
for i := range entries {
entry := &entries[i]
cfgKey := strings.TrimSpace((*entry).GetAPIKey())
cfgBase := strings.TrimSpace((*entry).GetBaseURL())
if attrKey != "" && attrBase != "" {
if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) {
return entry
}
continue
}
if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
return entry
}
}
if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
return entry
}
}
if attrKey != "" {
for i := range entries {
entry := &entries[i]
if strings.EqualFold(strings.TrimSpace((*entry).GetAPIKey()), attrKey) {
return entry
}
}
}
return nil
}
func proxyURLFromAPIKeyConfig(cfg *config.Config, auth *coreauth.Auth) string {
if cfg == nil || auth == nil {
return ""
}
authKind, authAccount := auth.AccountInfo()
if !strings.EqualFold(strings.TrimSpace(authKind), "api_key") {
return ""
}
attrs := auth.Attributes
compatName := ""
providerKey := ""
if len(attrs) > 0 {
compatName = strings.TrimSpace(attrs["compat_name"])
providerKey = strings.TrimSpace(attrs["provider_key"])
}
if compatName != "" || strings.EqualFold(strings.TrimSpace(auth.Provider), "openai-compatibility") {
return resolveOpenAICompatAPIKeyProxyURL(cfg, auth, strings.TrimSpace(authAccount), providerKey, compatName)
}
switch strings.ToLower(strings.TrimSpace(auth.Provider)) {
case "gemini":
if entry := resolveAPIKeyConfig(cfg.GeminiKey, auth); entry != nil {
return strings.TrimSpace(entry.ProxyURL)
}
case "claude":
if entry := resolveAPIKeyConfig(cfg.ClaudeKey, auth); entry != nil {
return strings.TrimSpace(entry.ProxyURL)
}
case "codex":
if entry := resolveAPIKeyConfig(cfg.CodexKey, auth); entry != nil {
return strings.TrimSpace(entry.ProxyURL)
}
}
return ""
}
func resolveOpenAICompatAPIKeyProxyURL(cfg *config.Config, auth *coreauth.Auth, apiKey, providerKey, compatName string) string {
if cfg == nil || auth == nil {
return ""
}
apiKey = strings.TrimSpace(apiKey)
if apiKey == "" {
return ""
}
candidates := make([]string, 0, 3)
if v := strings.TrimSpace(compatName); v != "" {
candidates = append(candidates, v)
}
if v := strings.TrimSpace(providerKey); v != "" {
candidates = append(candidates, v)
}
if v := strings.TrimSpace(auth.Provider); v != "" {
candidates = append(candidates, v)
}
for i := range cfg.OpenAICompatibility {
compat := &cfg.OpenAICompatibility[i]
for _, candidate := range candidates {
if candidate != "" && strings.EqualFold(strings.TrimSpace(candidate), compat.Name) {
for j := range compat.APIKeyEntries {
entry := &compat.APIKeyEntries[j]
if strings.EqualFold(strings.TrimSpace(entry.APIKey), apiKey) {
return strings.TrimSpace(entry.ProxyURL)
}
}
return ""
}
}
}
return ""
}
func buildProxyTransport(proxyStr string) *http.Transport { func buildProxyTransport(proxyStr string) *http.Transport {
transport, _, errBuild := proxyutil.BuildHTTPTransport(proxyStr) transport, _, errBuild := proxyutil.BuildHTTPTransport(proxyStr)
if errBuild != nil { if errBuild != nil {

View File

@@ -58,6 +58,105 @@ func TestAPICallTransportInvalidAuthFallsBackToGlobalProxy(t *testing.T) {
} }
} }
func TestAPICallTransportAPIKeyAuthFallsBackToConfigProxyURL(t *testing.T) {
t.Parallel()
h := &Handler{
cfg: &config.Config{
SDKConfig: sdkconfig.SDKConfig{ProxyURL: "http://global-proxy.example.com:8080"},
GeminiKey: []config.GeminiKey{{
APIKey: "gemini-key",
ProxyURL: "http://gemini-proxy.example.com:8080",
}},
ClaudeKey: []config.ClaudeKey{{
APIKey: "claude-key",
ProxyURL: "http://claude-proxy.example.com:8080",
}},
CodexKey: []config.CodexKey{{
APIKey: "codex-key",
ProxyURL: "http://codex-proxy.example.com:8080",
}},
OpenAICompatibility: []config.OpenAICompatibility{{
Name: "bohe",
BaseURL: "https://bohe.example.com",
APIKeyEntries: []config.OpenAICompatibilityAPIKey{{
APIKey: "compat-key",
ProxyURL: "http://compat-proxy.example.com:8080",
}},
}},
},
}
cases := []struct {
name string
auth *coreauth.Auth
wantProxy string
}{
{
name: "gemini",
auth: &coreauth.Auth{
Provider: "gemini",
Attributes: map[string]string{"api_key": "gemini-key"},
},
wantProxy: "http://gemini-proxy.example.com:8080",
},
{
name: "claude",
auth: &coreauth.Auth{
Provider: "claude",
Attributes: map[string]string{"api_key": "claude-key"},
},
wantProxy: "http://claude-proxy.example.com:8080",
},
{
name: "codex",
auth: &coreauth.Auth{
Provider: "codex",
Attributes: map[string]string{"api_key": "codex-key"},
},
wantProxy: "http://codex-proxy.example.com:8080",
},
{
name: "openai-compatibility",
auth: &coreauth.Auth{
Provider: "bohe",
Attributes: map[string]string{
"api_key": "compat-key",
"compat_name": "bohe",
"provider_key": "bohe",
},
},
wantProxy: "http://compat-proxy.example.com:8080",
},
}
for _, tc := range cases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
transport := h.apiCallTransport(tc.auth)
httpTransport, ok := transport.(*http.Transport)
if !ok {
t.Fatalf("transport type = %T, want *http.Transport", transport)
}
req, errRequest := http.NewRequest(http.MethodGet, "https://example.com", nil)
if errRequest != nil {
t.Fatalf("http.NewRequest returned error: %v", errRequest)
}
proxyURL, errProxy := httpTransport.Proxy(req)
if errProxy != nil {
t.Fatalf("httpTransport.Proxy returned error: %v", errProxy)
}
if proxyURL == nil || proxyURL.String() != tc.wantProxy {
t.Fatalf("proxy URL = %v, want %s", proxyURL, tc.wantProxy)
}
})
}
}
func TestAuthByIndexDistinguishesSharedAPIKeysAcrossProviders(t *testing.T) { func TestAuthByIndexDistinguishesSharedAPIKeysAcrossProviders(t *testing.T) {
t.Parallel() t.Parallel()

View File

@@ -253,6 +253,7 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
log.Debugf("amp model mapping: request %s -> %s", normalizedModel, resolvedModel) log.Debugf("amp model mapping: request %s -> %s", normalizedModel, resolvedModel)
logAmpRouting(RouteTypeModelMapping, modelName, resolvedModel, providerName, requestPath) logAmpRouting(RouteTypeModelMapping, modelName, resolvedModel, providerName, requestPath)
rewriter := NewResponseRewriter(c.Writer, modelName) rewriter := NewResponseRewriter(c.Writer, modelName)
rewriter.suppressThinking = true
c.Writer = rewriter c.Writer = rewriter
// Filter Anthropic-Beta header only for local handling paths // Filter Anthropic-Beta header only for local handling paths
filterAntropicBetaHeader(c) filterAntropicBetaHeader(c)
@@ -267,6 +268,7 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
// proxies (e.g. NewAPI) may return a different model name and lack // proxies (e.g. NewAPI) may return a different model name and lack
// Amp-required fields like thinking.signature. // Amp-required fields like thinking.signature.
rewriter := NewResponseRewriter(c.Writer, modelName) rewriter := NewResponseRewriter(c.Writer, modelName)
rewriter.suppressThinking = providerName != "claude"
c.Writer = rewriter c.Writer = rewriter
// Filter Anthropic-Beta header only for local handling paths // Filter Anthropic-Beta header only for local handling paths
filterAntropicBetaHeader(c) filterAntropicBetaHeader(c)

View File

@@ -17,19 +17,18 @@ import (
// and to keep Amp-compatible response shapes. // and to keep Amp-compatible response shapes.
type ResponseRewriter struct { type ResponseRewriter struct {
gin.ResponseWriter gin.ResponseWriter
body *bytes.Buffer body *bytes.Buffer
originalModel string originalModel string
isStreaming bool isStreaming bool
suppressedContentBlock map[int]struct{} suppressThinking bool
} }
// NewResponseRewriter creates a new response rewriter for model name substitution. // NewResponseRewriter creates a new response rewriter for model name substitution.
func NewResponseRewriter(w gin.ResponseWriter, originalModel string) *ResponseRewriter { func NewResponseRewriter(w gin.ResponseWriter, originalModel string) *ResponseRewriter {
return &ResponseRewriter{ return &ResponseRewriter{
ResponseWriter: w, ResponseWriter: w,
body: &bytes.Buffer{}, body: &bytes.Buffer{},
originalModel: originalModel, originalModel: originalModel,
suppressedContentBlock: make(map[int]struct{}),
} }
} }
@@ -99,7 +98,8 @@ func (rw *ResponseRewriter) Write(data []byte) (int, error) {
} }
if rw.isStreaming { if rw.isStreaming {
n, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(data)) rewritten := rw.rewriteStreamChunk(data)
n, err := rw.ResponseWriter.Write(rewritten)
if err == nil { if err == nil {
if flusher, ok := rw.ResponseWriter.(http.Flusher); ok { if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
flusher.Flush() flusher.Flush()
@@ -162,19 +162,10 @@ func ensureAmpSignature(data []byte) []byte {
return data return data
} }
func (rw *ResponseRewriter) markSuppressedContentBlock(index int) {
if rw.suppressedContentBlock == nil {
rw.suppressedContentBlock = make(map[int]struct{})
}
rw.suppressedContentBlock[index] = struct{}{}
}
func (rw *ResponseRewriter) isSuppressedContentBlock(index int) bool {
_, ok := rw.suppressedContentBlock[index]
return ok
}
func (rw *ResponseRewriter) suppressAmpThinking(data []byte) []byte { func (rw *ResponseRewriter) suppressAmpThinking(data []byte) []byte {
if !rw.suppressThinking {
return data
}
if gjson.GetBytes(data, `content.#(type=="tool_use")`).Exists() { if gjson.GetBytes(data, `content.#(type=="tool_use")`).Exists() {
filtered := gjson.GetBytes(data, `content.#(type!="thinking")#`) filtered := gjson.GetBytes(data, `content.#(type!="thinking")#`)
if filtered.Exists() { if filtered.Exists() {
@@ -185,33 +176,11 @@ func (rw *ResponseRewriter) suppressAmpThinking(data []byte) []byte {
data, err = sjson.SetBytes(data, "content", filtered.Value()) data, err = sjson.SetBytes(data, "content", filtered.Value())
if err != nil { if err != nil {
log.Warnf("Amp ResponseRewriter: failed to suppress thinking blocks: %v", err) log.Warnf("Amp ResponseRewriter: failed to suppress thinking blocks: %v", err)
} else {
log.Debugf("Amp ResponseRewriter: Suppressed %d thinking blocks due to tool usage", originalCount-filteredCount)
} }
} }
} }
} }
eventType := gjson.GetBytes(data, "type").String()
indexResult := gjson.GetBytes(data, "index")
if eventType == "content_block_start" && gjson.GetBytes(data, "content_block.type").String() == "thinking" && indexResult.Exists() {
rw.markSuppressedContentBlock(int(indexResult.Int()))
return nil
}
if gjson.GetBytes(data, "delta.type").String() == "thinking_delta" {
if indexResult.Exists() {
rw.markSuppressedContentBlock(int(indexResult.Int()))
}
return nil
}
if eventType == "content_block_stop" && indexResult.Exists() {
index := int(indexResult.Int())
if rw.isSuppressedContentBlock(index) {
delete(rw.suppressedContentBlock, index)
return nil
}
}
return data return data
} }
@@ -262,6 +231,10 @@ func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte {
jsonData := bytes.TrimPrefix(bytes.TrimSpace(lines[dataIdx]), []byte("data: ")) jsonData := bytes.TrimPrefix(bytes.TrimSpace(lines[dataIdx]), []byte("data: "))
if len(jsonData) > 0 && jsonData[0] == '{' { if len(jsonData) > 0 && jsonData[0] == '{' {
rewritten := rw.rewriteStreamEvent(jsonData) rewritten := rw.rewriteStreamEvent(jsonData)
if rewritten == nil {
i = dataIdx + 1
continue
}
// Emit event line // Emit event line
out = append(out, line) out = append(out, line)
// Emit blank lines between event and data // Emit blank lines between event and data
@@ -287,7 +260,9 @@ func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte {
jsonData := bytes.TrimPrefix(trimmed, []byte("data: ")) jsonData := bytes.TrimPrefix(trimmed, []byte("data: "))
if len(jsonData) > 0 && jsonData[0] == '{' { if len(jsonData) > 0 && jsonData[0] == '{' {
rewritten := rw.rewriteStreamEvent(jsonData) rewritten := rw.rewriteStreamEvent(jsonData)
out = append(out, append([]byte("data: "), rewritten...)) if rewritten != nil {
out = append(out, append([]byte("data: "), rewritten...))
}
i++ i++
continue continue
} }

View File

@@ -102,7 +102,7 @@ func TestRewriteStreamChunk_MessageModel(t *testing.T) {
} }
func TestRewriteStreamChunk_PreservesThinkingWithSignatureInjection(t *testing.T) { func TestRewriteStreamChunk_PreservesThinkingWithSignatureInjection(t *testing.T) {
rw := &ResponseRewriter{suppressedContentBlock: make(map[int]struct{})} rw := &ResponseRewriter{}
chunk := []byte("event: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"thinking\",\"thinking\":\"\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"abc\"}}\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0}\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":1,\"content_block\":{\"type\":\"tool_use\",\"name\":\"bash\",\"input\":{}}}\n\n") chunk := []byte("event: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"thinking\",\"thinking\":\"\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"abc\"}}\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0}\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":1,\"content_block\":{\"type\":\"tool_use\",\"name\":\"bash\",\"input\":{}}}\n\n")
result := rw.rewriteStreamChunk(chunk) result := rw.rewriteStreamChunk(chunk)

View File

@@ -323,6 +323,10 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
// setupRoutes configures the API routes for the server. // setupRoutes configures the API routes for the server.
// It defines the endpoints and associates them with their respective handlers. // It defines the endpoints and associates them with their respective handlers.
func (s *Server) setupRoutes() { func (s *Server) setupRoutes() {
s.engine.GET("/healthz", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"status": "ok"})
})
s.engine.GET("/management.html", s.serveManagementControlPanel) s.engine.GET("/management.html", s.serveManagementControlPanel)
openaiHandlers := openai.NewOpenAIAPIHandler(s.handlers) openaiHandlers := openai.NewOpenAIAPIHandler(s.handlers)
geminiHandlers := gemini.NewGeminiAPIHandler(s.handlers) geminiHandlers := gemini.NewGeminiAPIHandler(s.handlers)
@@ -569,6 +573,8 @@ func (s *Server) registerManagementRoutes() {
mgmt.PUT("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel) mgmt.PUT("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
mgmt.PATCH("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel) mgmt.PATCH("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
mgmt.GET("/copilot-quota", s.mgmt.GetCopilotQuota)
mgmt.GET("/api-keys", s.mgmt.GetAPIKeys) mgmt.GET("/api-keys", s.mgmt.GetAPIKeys)
mgmt.PUT("/api-keys", s.mgmt.PutAPIKeys) mgmt.PUT("/api-keys", s.mgmt.PutAPIKeys)
mgmt.PATCH("/api-keys", s.mgmt.PatchAPIKeys) mgmt.PATCH("/api-keys", s.mgmt.PatchAPIKeys)

View File

@@ -1,6 +1,7 @@
package api package api
import ( import (
"encoding/json"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"os" "os"
@@ -46,6 +47,28 @@ func newTestServer(t *testing.T) *Server {
return NewServer(cfg, authManager, accessManager, configPath) return NewServer(cfg, authManager, accessManager, configPath)
} }
func TestHealthz(t *testing.T) {
server := newTestServer(t)
req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
rr := httptest.NewRecorder()
server.engine.ServeHTTP(rr, req)
if rr.Code != http.StatusOK {
t.Fatalf("unexpected status code: got %d want %d; body=%s", rr.Code, http.StatusOK, rr.Body.String())
}
var resp struct {
Status string `json:"status"`
}
if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
t.Fatalf("failed to parse response JSON: %v; body=%s", err, rr.Body.String())
}
if resp.Status != "ok" {
t.Fatalf("unexpected response status: got %q want %q", resp.Status, "ok")
}
}
func TestAmpProviderModelRoutes(t *testing.T) { func TestAmpProviderModelRoutes(t *testing.T) {
testCases := []struct { testCases := []struct {
name string name string

View File

@@ -235,6 +235,74 @@ type CopilotModelEntry struct {
Capabilities map[string]any `json:"capabilities,omitempty"` Capabilities map[string]any `json:"capabilities,omitempty"`
} }
// CopilotModelLimits holds the token limits returned by the Copilot /models API
// under capabilities.limits. These limits vary by account type (individual vs
// business) and are the authoritative source for enforcing prompt size.
type CopilotModelLimits struct {
// MaxContextWindowTokens is the total context window (prompt + output).
MaxContextWindowTokens int
// MaxPromptTokens is the hard limit on input/prompt tokens.
// Exceeding this triggers a 400 error from the Copilot API.
MaxPromptTokens int
// MaxOutputTokens is the maximum number of output/completion tokens.
MaxOutputTokens int
}
// Limits extracts the token limits from the model's capabilities map.
// Returns nil if no limits are available or the structure is unexpected.
//
// Expected Copilot API shape:
//
// "capabilities": {
// "limits": {
// "max_context_window_tokens": 200000,
// "max_prompt_tokens": 168000,
// "max_output_tokens": 32000
// }
// }
func (e *CopilotModelEntry) Limits() *CopilotModelLimits {
if e.Capabilities == nil {
return nil
}
limitsRaw, ok := e.Capabilities["limits"]
if !ok {
return nil
}
limitsMap, ok := limitsRaw.(map[string]any)
if !ok {
return nil
}
result := &CopilotModelLimits{
MaxContextWindowTokens: anyToInt(limitsMap["max_context_window_tokens"]),
MaxPromptTokens: anyToInt(limitsMap["max_prompt_tokens"]),
MaxOutputTokens: anyToInt(limitsMap["max_output_tokens"]),
}
// Only return if at least one field is populated.
if result.MaxContextWindowTokens == 0 && result.MaxPromptTokens == 0 && result.MaxOutputTokens == 0 {
return nil
}
return result
}
// anyToInt converts a JSON-decoded numeric value to int.
// Go's encoding/json decodes numbers into float64 when the target is any/interface{}.
func anyToInt(v any) int {
switch n := v.(type) {
case float64:
return int(n)
case float32:
return int(n)
case int:
return n
case int64:
return int(n)
default:
return 0
}
}
// CopilotModelsResponse represents the response from the Copilot /models endpoint. // CopilotModelsResponse represents the response from the Copilot /models endpoint.
type CopilotModelsResponse struct { type CopilotModelsResponse struct {
Data []CopilotModelEntry `json:"data"` Data []CopilotModelEntry `json:"data"`

View File

@@ -30,6 +30,10 @@ type VertexCredentialStorage struct {
// Type is the provider identifier stored alongside credentials. Always "vertex". // Type is the provider identifier stored alongside credentials. Always "vertex".
Type string `json:"type"` Type string `json:"type"`
// Prefix optionally namespaces models for this credential (e.g., "teamA").
// This results in model names like "teamA/gemini-2.0-flash".
Prefix string `json:"prefix,omitempty"`
} }
// SaveTokenToFile writes the credential payload to the given file path in JSON format. // SaveTokenToFile writes the credential payload to the given file path in JSON format.

View File

@@ -20,7 +20,7 @@ import (
// DoVertexImport imports a Google Cloud service account key JSON and persists // DoVertexImport imports a Google Cloud service account key JSON and persists
// it as a "vertex" provider credential. The file content is embedded in the auth // it as a "vertex" provider credential. The file content is embedded in the auth
// file to allow portable deployment across stores. // file to allow portable deployment across stores.
func DoVertexImport(cfg *config.Config, keyPath string) { func DoVertexImport(cfg *config.Config, keyPath string, prefix string) {
if cfg == nil { if cfg == nil {
cfg = &config.Config{} cfg = &config.Config{}
} }
@@ -62,13 +62,28 @@ func DoVertexImport(cfg *config.Config, keyPath string) {
// Default location if not provided by user. Can be edited in the saved file later. // Default location if not provided by user. Can be edited in the saved file later.
location := "us-central1" location := "us-central1"
fileName := fmt.Sprintf("vertex-%s.json", sanitizeFilePart(projectID)) // Normalize and validate prefix: must be a single segment (no "/" allowed).
prefix = strings.TrimSpace(prefix)
prefix = strings.Trim(prefix, "/")
if prefix != "" && strings.Contains(prefix, "/") {
log.Errorf("vertex-import: prefix must be a single segment (no '/' allowed): %q", prefix)
return
}
// Include prefix in filename so importing the same project with different
// prefixes creates separate credential files instead of overwriting.
baseName := sanitizeFilePart(projectID)
if prefix != "" {
baseName = sanitizeFilePart(prefix) + "-" + baseName
}
fileName := fmt.Sprintf("vertex-%s.json", baseName)
// Build auth record // Build auth record
storage := &vertex.VertexCredentialStorage{ storage := &vertex.VertexCredentialStorage{
ServiceAccount: sa, ServiceAccount: sa,
ProjectID: projectID, ProjectID: projectID,
Email: email, Email: email,
Location: location, Location: location,
Prefix: prefix,
} }
metadata := map[string]any{ metadata := map[string]any{
"service_account": sa, "service_account": sa,
@@ -76,6 +91,7 @@ func DoVertexImport(cfg *config.Config, keyPath string) {
"email": email, "email": email,
"location": location, "location": location,
"type": "vertex", "type": "vertex",
"prefix": prefix,
"label": labelForVertex(projectID, email), "label": labelForVertex(projectID, email),
} }
record := &coreauth.Auth{ record := &coreauth.Auth{

View File

@@ -9,6 +9,10 @@ type SDKConfig struct {
// ProxyURL is the URL of an optional proxy server to use for outbound requests. // ProxyURL is the URL of an optional proxy server to use for outbound requests.
ProxyURL string `yaml:"proxy-url" json:"proxy-url"` ProxyURL string `yaml:"proxy-url" json:"proxy-url"`
// EnableGeminiCLIEndpoint controls whether Gemini CLI internal endpoints (/v1internal:*) are enabled.
// Default is false for safety; when false, /v1internal:* requests are rejected.
EnableGeminiCLIEndpoint bool `yaml:"enable-gemini-cli-endpoint" json:"enable-gemini-cli-endpoint"`
// ForceModelPrefix requires explicit model prefixes (e.g., "teamA/gemini-3-pro-preview") // ForceModelPrefix requires explicit model prefixes (e.g., "teamA/gemini-3-pro-preview")
// to target prefixed credentials. When false, unprefixed model requests may use prefixed // to target prefixed credentials. When false, unprefixed model requests may use prefixed
// credentials as well. // credentials as well.

View File

@@ -0,0 +1,151 @@
// Package misc provides miscellaneous utility functions for the CLI Proxy API server.
package misc
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"sync"
"time"
log "github.com/sirupsen/logrus"
)
const (
antigravityReleasesURL = "https://antigravity-auto-updater-974169037036.us-central1.run.app/releases"
antigravityFallbackVersion = "1.21.9"
antigravityVersionCacheTTL = 6 * time.Hour
antigravityFetchTimeout = 10 * time.Second
)
type antigravityRelease struct {
Version string `json:"version"`
ExecutionID string `json:"execution_id"`
}
var (
cachedAntigravityVersion = antigravityFallbackVersion
antigravityVersionMu sync.RWMutex
antigravityVersionExpiry time.Time
antigravityUpdaterOnce sync.Once
)
// StartAntigravityVersionUpdater starts a background goroutine that periodically refreshes the cached antigravity version.
// This is intentionally decoupled from request execution to avoid blocking executors on version lookups.
func StartAntigravityVersionUpdater(ctx context.Context) {
antigravityUpdaterOnce.Do(func() {
go runAntigravityVersionUpdater(ctx)
})
}
func runAntigravityVersionUpdater(ctx context.Context) {
if ctx == nil {
ctx = context.Background()
}
ticker := time.NewTicker(antigravityVersionCacheTTL / 2)
defer ticker.Stop()
log.Infof("periodic antigravity version refresh started (interval=%s)", antigravityVersionCacheTTL/2)
refreshAntigravityVersion(ctx)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
refreshAntigravityVersion(ctx)
}
}
}
func refreshAntigravityVersion(ctx context.Context) {
version, errFetch := fetchAntigravityLatestVersion(ctx)
antigravityVersionMu.Lock()
defer antigravityVersionMu.Unlock()
now := time.Now()
if errFetch == nil {
cachedAntigravityVersion = version
antigravityVersionExpiry = now.Add(antigravityVersionCacheTTL)
log.WithField("version", version).Info("fetched latest antigravity version")
return
}
if cachedAntigravityVersion == "" || now.After(antigravityVersionExpiry) {
cachedAntigravityVersion = antigravityFallbackVersion
antigravityVersionExpiry = now.Add(antigravityVersionCacheTTL)
log.WithError(errFetch).Warn("failed to refresh antigravity version, using fallback version")
return
}
log.WithError(errFetch).Debug("failed to refresh antigravity version, keeping cached value")
}
// AntigravityLatestVersion returns the cached antigravity version refreshed by StartAntigravityVersionUpdater.
// It falls back to antigravityFallbackVersion if the cache is empty or stale.
func AntigravityLatestVersion() string {
antigravityVersionMu.RLock()
if cachedAntigravityVersion != "" && time.Now().Before(antigravityVersionExpiry) {
v := cachedAntigravityVersion
antigravityVersionMu.RUnlock()
return v
}
antigravityVersionMu.RUnlock()
return antigravityFallbackVersion
}
// AntigravityUserAgent returns the User-Agent string for antigravity requests
// using the latest version fetched from the releases API.
func AntigravityUserAgent() string {
return fmt.Sprintf("antigravity/%s darwin/arm64", AntigravityLatestVersion())
}
func fetchAntigravityLatestVersion(ctx context.Context) (string, error) {
if ctx == nil {
ctx = context.Background()
}
client := &http.Client{Timeout: antigravityFetchTimeout}
httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodGet, antigravityReleasesURL, nil)
if errReq != nil {
return "", fmt.Errorf("build antigravity releases request: %w", errReq)
}
resp, errDo := client.Do(httpReq)
if errDo != nil {
return "", fmt.Errorf("fetch antigravity releases: %w", errDo)
}
defer func() {
if errClose := resp.Body.Close(); errClose != nil {
log.WithError(errClose).Warn("antigravity releases response body close error")
}
}()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("antigravity releases API returned status %d", resp.StatusCode)
}
var releases []antigravityRelease
if errDecode := json.NewDecoder(resp.Body).Decode(&releases); errDecode != nil {
return "", fmt.Errorf("decode antigravity releases response: %w", errDecode)
}
if len(releases) == 0 {
return "", errors.New("antigravity releases API returned empty list")
}
version := releases[0].Version
if version == "" {
return "", errors.New("antigravity releases API returned empty version")
}
return version, nil
}

View File

@@ -93,6 +93,30 @@ func GetAntigravityModels() []*ModelInfo {
func GetCodeBuddyModels() []*ModelInfo { func GetCodeBuddyModels() []*ModelInfo {
now := int64(1748044800) // 2025-05-24 now := int64(1748044800) // 2025-05-24
return []*ModelInfo{ return []*ModelInfo{
{
ID: "auto",
Object: "model",
Created: now,
OwnedBy: "tencent",
Type: "codebuddy",
DisplayName: "Auto",
Description: "Automatic model selection via CodeBuddy",
ContextLength: 128000,
MaxCompletionTokens: 32768,
SupportedEndpoints: []string{"/chat/completions"},
},
{
ID: "glm-5.0-turbo",
Object: "model",
Created: now,
OwnedBy: "tencent",
Type: "codebuddy",
DisplayName: "GLM-5.0 Turbo",
Description: "GLM-5.0 Turbo via CodeBuddy",
ContextLength: 128000,
MaxCompletionTokens: 32768,
SupportedEndpoints: []string{"/chat/completions"},
},
{ {
ID: "glm-5.0", ID: "glm-5.0",
Object: "model", Object: "model",
@@ -118,13 +142,13 @@ func GetCodeBuddyModels() []*ModelInfo {
SupportedEndpoints: []string{"/chat/completions"}, SupportedEndpoints: []string{"/chat/completions"},
}, },
{ {
ID: "minimax-m2.5", ID: "minimax-m2.7",
Object: "model", Object: "model",
Created: now, Created: now,
OwnedBy: "tencent", OwnedBy: "tencent",
Type: "codebuddy", Type: "codebuddy",
DisplayName: "MiniMax M2.5", DisplayName: "MiniMax M2.7",
Description: "MiniMax M2.5 via CodeBuddy", Description: "MiniMax M2.7 via CodeBuddy",
ContextLength: 200000, ContextLength: 200000,
MaxCompletionTokens: 32768, MaxCompletionTokens: 32768,
SupportedEndpoints: []string{"/chat/completions"}, SupportedEndpoints: []string{"/chat/completions"},
@@ -141,6 +165,19 @@ func GetCodeBuddyModels() []*ModelInfo {
MaxCompletionTokens: 32768, MaxCompletionTokens: 32768,
SupportedEndpoints: []string{"/chat/completions"}, SupportedEndpoints: []string{"/chat/completions"},
}, },
{
ID: "kimi-k2-thinking",
Object: "model",
Created: now,
OwnedBy: "tencent",
Type: "codebuddy",
DisplayName: "Kimi K2 Thinking",
Description: "Kimi K2 Thinking via CodeBuddy",
ContextLength: 128000,
MaxCompletionTokens: 32768,
Thinking: &ThinkingSupport{ZeroAllowed: true},
SupportedEndpoints: []string{"/chat/completions"},
},
{ {
ID: "deepseek-v3-2-volc", ID: "deepseek-v3-2-volc",
Object: "model", Object: "model",
@@ -148,24 +185,11 @@ func GetCodeBuddyModels() []*ModelInfo {
OwnedBy: "tencent", OwnedBy: "tencent",
Type: "codebuddy", Type: "codebuddy",
DisplayName: "DeepSeek V3.2 (Volc)", DisplayName: "DeepSeek V3.2 (Volc)",
Description: "DeepSeek V3.2 via CodeBuddy (Volcano Engine)", Description: "DeepSeek V3.2 via CodeBuddy",
ContextLength: 128000, ContextLength: 128000,
MaxCompletionTokens: 32768, MaxCompletionTokens: 32768,
SupportedEndpoints: []string{"/chat/completions"}, SupportedEndpoints: []string{"/chat/completions"},
}, },
{
ID: "hunyuan-2.0-thinking",
Object: "model",
Created: now,
OwnedBy: "tencent",
Type: "codebuddy",
DisplayName: "Hunyuan 2.0 Thinking",
Description: "Tencent Hunyuan 2.0 Thinking via CodeBuddy",
ContextLength: 128000,
MaxCompletionTokens: 32768,
Thinking: &ThinkingSupport{ZeroAllowed: true},
SupportedEndpoints: []string{"/chat/completions"},
},
} }
} }
@@ -525,6 +549,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000, ContextLength: 200000,
MaxCompletionTokens: 64000, MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"}, SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
}, },
{ {
ID: "claude-opus-4.6", ID: "claude-opus-4.6",
@@ -537,6 +562,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000, ContextLength: 200000,
MaxCompletionTokens: 64000, MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"}, SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
}, },
{ {
ID: "claude-sonnet-4", ID: "claude-sonnet-4",
@@ -549,6 +575,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000, ContextLength: 200000,
MaxCompletionTokens: 64000, MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"}, SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
}, },
{ {
ID: "claude-sonnet-4.5", ID: "claude-sonnet-4.5",
@@ -561,6 +588,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000, ContextLength: 200000,
MaxCompletionTokens: 64000, MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"}, SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
}, },
{ {
ID: "claude-sonnet-4.6", ID: "claude-sonnet-4.6",
@@ -573,6 +601,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000, ContextLength: 200000,
MaxCompletionTokens: 64000, MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"}, SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
}, },
{ {
ID: "gemini-2.5-pro", ID: "gemini-2.5-pro",

View File

@@ -280,6 +280,7 @@
"dynamic_allowed": true, "dynamic_allowed": true,
"levels": [ "levels": [
"low", "low",
"medium",
"high" "high"
] ]
} }
@@ -554,6 +555,7 @@
"dynamic_allowed": true, "dynamic_allowed": true,
"levels": [ "levels": [
"low", "low",
"medium",
"high" "high"
] ]
} }
@@ -610,6 +612,8 @@
"dynamic_allowed": true, "dynamic_allowed": true,
"levels": [ "levels": [
"minimal", "minimal",
"low",
"medium",
"high" "high"
] ]
} }
@@ -838,6 +842,7 @@
"dynamic_allowed": true, "dynamic_allowed": true,
"levels": [ "levels": [
"low", "low",
"medium",
"high" "high"
] ]
} }
@@ -896,6 +901,8 @@
"dynamic_allowed": true, "dynamic_allowed": true,
"levels": [ "levels": [
"minimal", "minimal",
"low",
"medium",
"high" "high"
] ]
} }
@@ -1070,6 +1077,8 @@
"dynamic_allowed": true, "dynamic_allowed": true,
"levels": [ "levels": [
"minimal", "minimal",
"low",
"medium",
"high" "high"
] ]
} }
@@ -1371,6 +1380,75 @@
"xhigh" "xhigh"
] ]
} }
},
{
"id": "gpt-5.3-codex",
"object": "model",
"created": 1770307200,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.3 Codex",
"version": "gpt-5.3",
"description": "Stable version of GPT 5.3 Codex, The best model for coding and agentic tasks across domains.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
},
{
"id": "gpt-5.4",
"object": "model",
"created": 1772668800,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4",
"version": "gpt-5.4",
"description": "Stable version of GPT 5.4",
"context_length": 1050000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
},
{
"id": "gpt-5.4-mini",
"object": "model",
"created": 1773705600,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4 Mini",
"version": "gpt-5.4-mini",
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
} }
], ],
"codex-team": [ "codex-team": [
@@ -1623,6 +1701,29 @@
"xhigh" "xhigh"
] ]
} }
},
{
"id": "gpt-5.4-mini",
"object": "model",
"created": 1773705600,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4 Mini",
"version": "gpt-5.4-mini",
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
} }
], ],
"codex-plus": [ "codex-plus": [
@@ -1898,6 +1999,29 @@
"xhigh" "xhigh"
] ]
} }
},
{
"id": "gpt-5.4-mini",
"object": "model",
"created": 1773705600,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4 Mini",
"version": "gpt-5.4-mini",
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
} }
], ],
"codex-pro": [ "codex-pro": [
@@ -2173,55 +2297,40 @@
"xhigh" "xhigh"
] ]
} }
},
{
"id": "gpt-5.4-mini",
"object": "model",
"created": 1773705600,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4 Mini",
"version": "gpt-5.4-mini",
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
} }
], ],
"qwen": [ "qwen": [
{
"id": "qwen3-coder-plus",
"object": "model",
"created": 1753228800,
"owned_by": "qwen",
"type": "qwen",
"display_name": "Qwen3 Coder Plus",
"version": "3.0",
"description": "Advanced code generation and understanding model",
"context_length": 32768,
"max_completion_tokens": 8192,
"supported_parameters": [
"temperature",
"top_p",
"max_tokens",
"stream",
"stop"
]
},
{
"id": "qwen3-coder-flash",
"object": "model",
"created": 1753228800,
"owned_by": "qwen",
"type": "qwen",
"display_name": "Qwen3 Coder Flash",
"version": "3.0",
"description": "Fast code generation model",
"context_length": 8192,
"max_completion_tokens": 2048,
"supported_parameters": [
"temperature",
"top_p",
"max_tokens",
"stream",
"stop"
]
},
{ {
"id": "coder-model", "id": "coder-model",
"object": "model", "object": "model",
"created": 1771171200, "created": 1771171200,
"owned_by": "qwen", "owned_by": "qwen",
"type": "qwen", "type": "qwen",
"display_name": "Qwen 3.5 Plus", "display_name": "Qwen 3.6 Plus",
"version": "3.5", "version": "3.6",
"description": "efficient hybrid model with leading coding performance", "description": "efficient hybrid model with leading coding performance",
"context_length": 1048576, "context_length": 1048576,
"max_completion_tokens": 65536, "max_completion_tokens": 65536,
@@ -2232,25 +2341,6 @@
"stream", "stream",
"stop" "stop"
] ]
},
{
"id": "vision-model",
"object": "model",
"created": 1758672000,
"owned_by": "qwen",
"type": "qwen",
"display_name": "Qwen3 Vision Model",
"version": "3.0",
"description": "Vision model model",
"context_length": 32768,
"max_completion_tokens": 2048,
"supported_parameters": [
"temperature",
"top_p",
"max_tokens",
"stream",
"stop"
]
} }
], ],
"iflow": [ "iflow": [
@@ -2639,11 +2729,12 @@
"context_length": 1048576, "context_length": 1048576,
"max_completion_tokens": 65535, "max_completion_tokens": 65535,
"thinking": { "thinking": {
"min": 128, "min": 1,
"max": 32768, "max": 65535,
"dynamic_allowed": true, "dynamic_allowed": true,
"levels": [ "levels": [
"low", "low",
"medium",
"high" "high"
] ]
} }
@@ -2659,11 +2750,12 @@
"context_length": 1048576, "context_length": 1048576,
"max_completion_tokens": 65535, "max_completion_tokens": 65535,
"thinking": { "thinking": {
"min": 128, "min": 1,
"max": 32768, "max": 65535,
"dynamic_allowed": true, "dynamic_allowed": true,
"levels": [ "levels": [
"low", "low",
"medium",
"high" "high"
] ]
} }

View File

@@ -24,6 +24,7 @@ import (
"github.com/google/uuid" "github.com/google/uuid"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps" "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/router-for-me/CLIProxyAPI/v6/internal/util"
@@ -45,7 +46,7 @@ const (
antigravityGeneratePath = "/v1internal:generateContent" antigravityGeneratePath = "/v1internal:generateContent"
antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com" antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
antigravityClientSecret = "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" antigravityClientSecret = "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"
defaultAntigravityAgent = "antigravity/1.19.6 darwin/arm64" defaultAntigravityAgent = "antigravity/1.21.9 darwin/arm64" // fallback only; overridden at runtime by misc.AntigravityUserAgent()
antigravityAuthType = "antigravity" antigravityAuthType = "antigravity"
refreshSkew = 3000 * time.Second refreshSkew = 3000 * time.Second
antigravityCreditsRetryTTL = 5 * time.Hour antigravityCreditsRetryTTL = 5 * time.Hour
@@ -1739,7 +1740,7 @@ func resolveUserAgent(auth *cliproxyauth.Auth) string {
} }
} }
} }
return defaultAntigravityAgent return misc.AntigravityUserAgent()
} }
func antigravityRetryAttempts(auth *cliproxyauth.Auth, cfg *config.Config) int { func antigravityRetryAttempts(auth *cliproxyauth.Auth, cfg *config.Config) int {

View File

@@ -137,6 +137,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
// Disable thinking if tool_choice forces tool use (Anthropic API constraint) // Disable thinking if tool_choice forces tool use (Anthropic API constraint)
body = disableThinkingIfToolChoiceForced(body) body = disableThinkingIfToolChoiceForced(body)
body = normalizeClaudeTemperatureForThinking(body)
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support) // Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
if countCacheControls(body) == 0 { if countCacheControls(body) == 0 {
@@ -307,6 +308,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
// Disable thinking if tool_choice forces tool use (Anthropic API constraint) // Disable thinking if tool_choice forces tool use (Anthropic API constraint)
body = disableThinkingIfToolChoiceForced(body) body = disableThinkingIfToolChoiceForced(body)
body = normalizeClaudeTemperatureForThinking(body)
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support) // Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
if countCacheControls(body) == 0 { if countCacheControls(body) == 0 {
@@ -651,6 +653,25 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte {
return body return body
} }
// normalizeClaudeTemperatureForThinking keeps Anthropic message requests valid when
// thinking is enabled. Anthropic rejects temperatures other than 1 when
// thinking.type is enabled/adaptive/auto.
func normalizeClaudeTemperatureForThinking(body []byte) []byte {
if !gjson.GetBytes(body, "temperature").Exists() {
return body
}
thinkingType := strings.ToLower(strings.TrimSpace(gjson.GetBytes(body, "thinking.type").String()))
switch thinkingType {
case "enabled", "adaptive", "auto":
if temp := gjson.GetBytes(body, "temperature"); temp.Exists() && temp.Type == gjson.Number && temp.Float() == 1 {
return body
}
body, _ = sjson.SetBytes(body, "temperature", 1)
}
return body
}
type compositeReadCloser struct { type compositeReadCloser struct {
io.Reader io.Reader
closers []func() error closers []func() error
@@ -827,6 +848,14 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
hasClaude1MHeader = true hasClaude1MHeader = true
} }
} }
// Also check auth attributes — GitLab Duo sets gitlab_duo_force_context_1m
// when routing through the Anthropic gateway, but the gin headers won't have
// X-CPA-CLAUDE-1M because the request is internally constructed.
if !hasClaude1MHeader && auth != nil && auth.Attributes != nil {
if auth.Attributes["gitlab_duo_force_context_1m"] == "true" {
hasClaude1MHeader = true
}
}
// Merge extra betas from request body and request flags. // Merge extra betas from request body and request flags.
if len(extraBetas) > 0 || hasClaude1MHeader { if len(extraBetas) > 0 || hasClaude1MHeader {

View File

@@ -1833,3 +1833,43 @@ func TestApplyCloaking_PreservesConfiguredStrictModeAndSensitiveWordsWhenModeOmi
t.Fatalf("expected configured sensitive word obfuscation to apply, got %q", got) t.Fatalf("expected configured sensitive word obfuscation to apply, got %q", got)
} }
} }
func TestNormalizeClaudeTemperatureForThinking_AdaptiveCoercesToOne(t *testing.T) {
payload := []byte(`{"temperature":0,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"}}`)
out := normalizeClaudeTemperatureForThinking(payload)
if got := gjson.GetBytes(out, "temperature").Float(); got != 1 {
t.Fatalf("temperature = %v, want 1", got)
}
}
func TestNormalizeClaudeTemperatureForThinking_EnabledCoercesToOne(t *testing.T) {
payload := []byte(`{"temperature":0.2,"thinking":{"type":"enabled","budget_tokens":2048}}`)
out := normalizeClaudeTemperatureForThinking(payload)
if got := gjson.GetBytes(out, "temperature").Float(); got != 1 {
t.Fatalf("temperature = %v, want 1", got)
}
}
func TestNormalizeClaudeTemperatureForThinking_NoThinkingLeavesTemperatureAlone(t *testing.T) {
payload := []byte(`{"temperature":0,"messages":[{"role":"user","content":"hi"}]}`)
out := normalizeClaudeTemperatureForThinking(payload)
if got := gjson.GetBytes(out, "temperature").Float(); got != 0 {
t.Fatalf("temperature = %v, want 0", got)
}
}
func TestNormalizeClaudeTemperatureForThinking_AfterForcedToolChoiceKeepsOriginalTemperature(t *testing.T) {
payload := []byte(`{"temperature":0,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"},"tool_choice":{"type":"any"}}`)
out := disableThinkingIfToolChoiceForced(payload)
out = normalizeClaudeTemperatureForThinking(out)
if gjson.GetBytes(out, "thinking").Exists() {
t.Fatalf("thinking should be removed when tool_choice forces tool use")
}
if got := gjson.GetBytes(out, "temperature").Float(); got != 0 {
t.Fatalf("temperature = %v, want 0", got)
}
}

View File

@@ -7,6 +7,7 @@ import (
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
"slices"
"strings" "strings"
"sync" "sync"
"time" "time"
@@ -17,6 +18,7 @@ import (
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
@@ -40,7 +42,7 @@ const (
copilotEditorVersion = "vscode/1.107.0" copilotEditorVersion = "vscode/1.107.0"
copilotPluginVersion = "copilot-chat/0.35.0" copilotPluginVersion = "copilot-chat/0.35.0"
copilotIntegrationID = "vscode-chat" copilotIntegrationID = "vscode-chat"
copilotOpenAIIntent = "conversation-panel" copilotOpenAIIntent = "conversation-edits"
copilotGitHubAPIVer = "2025-04-01" copilotGitHubAPIVer = "2025-04-01"
) )
@@ -126,6 +128,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = e.normalizeModel(req.Model, body) body = e.normalizeModel(req.Model, body)
body = flattenAssistantContent(body) body = flattenAssistantContent(body)
body = stripUnsupportedBetas(body)
// Detect vision content before input normalization removes messages // Detect vision content before input normalization removes messages
hasVision := detectVisionContent(body) hasVision := detectVisionContent(body)
@@ -142,6 +145,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
if useResponses { if useResponses {
body = normalizeGitHubCopilotResponsesInput(body) body = normalizeGitHubCopilotResponsesInput(body)
body = normalizeGitHubCopilotResponsesTools(body) body = normalizeGitHubCopilotResponsesTools(body)
body = applyGitHubCopilotResponsesDefaults(body)
} else { } else {
body = normalizeGitHubCopilotChatTools(body) body = normalizeGitHubCopilotChatTools(body)
} }
@@ -225,9 +229,10 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
if useResponses && from.String() == "claude" { if useResponses && from.String() == "claude" {
converted = translateGitHubCopilotResponsesNonStreamToClaude(data) converted = translateGitHubCopilotResponsesNonStreamToClaude(data)
} else { } else {
data = normalizeGitHubCopilotReasoningField(data)
converted = sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param) converted = sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
} }
resp = cliproxyexecutor.Response{Payload: converted} resp = cliproxyexecutor.Response{Payload: converted, Headers: httpResp.Header.Clone()}
reporter.ensurePublished(ctx) reporter.ensurePublished(ctx)
return resp, nil return resp, nil
} }
@@ -256,6 +261,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = e.normalizeModel(req.Model, body) body = e.normalizeModel(req.Model, body)
body = flattenAssistantContent(body) body = flattenAssistantContent(body)
body = stripUnsupportedBetas(body)
// Detect vision content before input normalization removes messages // Detect vision content before input normalization removes messages
hasVision := detectVisionContent(body) hasVision := detectVisionContent(body)
@@ -272,6 +278,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
if useResponses { if useResponses {
body = normalizeGitHubCopilotResponsesInput(body) body = normalizeGitHubCopilotResponsesInput(body)
body = normalizeGitHubCopilotResponsesTools(body) body = normalizeGitHubCopilotResponsesTools(body)
body = applyGitHubCopilotResponsesDefaults(body)
} else { } else {
body = normalizeGitHubCopilotChatTools(body) body = normalizeGitHubCopilotChatTools(body)
} }
@@ -378,7 +385,20 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
if useResponses && from.String() == "claude" { if useResponses && from.String() == "claude" {
chunks = translateGitHubCopilotResponsesStreamToClaude(bytes.Clone(line), &param) chunks = translateGitHubCopilotResponsesStreamToClaude(bytes.Clone(line), &param)
} else { } else {
chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param) // Strip SSE "data: " prefix before reasoning field normalization,
// since normalizeGitHubCopilotReasoningField expects pure JSON.
// Re-wrap with the prefix afterward for the translator.
normalizedLine := bytes.Clone(line)
if bytes.HasPrefix(line, dataTag) {
sseData := bytes.TrimSpace(line[len(dataTag):])
if !bytes.Equal(sseData, []byte("[DONE]")) && gjson.ValidBytes(sseData) {
normalized := normalizeGitHubCopilotReasoningField(bytes.Clone(sseData))
if !bytes.Equal(normalized, sseData) {
normalizedLine = append(append([]byte(nil), dataTag...), normalized...)
}
}
}
chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, normalizedLine, &param)
} }
for i := range chunks { for i := range chunks {
out <- cliproxyexecutor.StreamChunk{Payload: bytes.Clone(chunks[i])} out <- cliproxyexecutor.StreamChunk{Payload: bytes.Clone(chunks[i])}
@@ -400,9 +420,28 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
}, nil }, nil
} }
// CountTokens is not supported for GitHub Copilot. // CountTokens estimates token count locally using tiktoken, since the GitHub
func (e *GitHubCopilotExecutor) CountTokens(_ context.Context, _ *cliproxyauth.Auth, _ cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { // Copilot API does not expose a dedicated token counting endpoint.
return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported for github-copilot"} func (e *GitHubCopilotExecutor) CountTokens(ctx context.Context, _ *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
baseModel := thinking.ParseSuffix(req.Model).ModelName
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
enc, err := helps.TokenizerForModel(baseModel)
if err != nil {
return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: tokenizer init failed: %w", err)
}
count, err := helps.CountOpenAIChatTokens(enc, translated)
if err != nil {
return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: token counting failed: %w", err)
}
usageJSON := helps.BuildOpenAIUsageJSON(count)
translatedUsage := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON)
return cliproxyexecutor.Response{Payload: translatedUsage}, nil
} }
// Refresh validates the GitHub token is still working. // Refresh validates the GitHub token is still working.
@@ -491,46 +530,127 @@ func (e *GitHubCopilotExecutor) applyHeaders(r *http.Request, apiToken string, b
r.Header.Set("X-Request-Id", uuid.NewString()) r.Header.Set("X-Request-Id", uuid.NewString())
initiator := "user" initiator := "user"
if role := detectLastConversationRole(body); role == "assistant" || role == "tool" { if isAgentInitiated(body) {
initiator = "agent" initiator = "agent"
} }
r.Header.Set("X-Initiator", initiator) r.Header.Set("X-Initiator", initiator)
} }
func detectLastConversationRole(body []byte) string { // isAgentInitiated determines whether the current request is agent-initiated
// (tool callbacks, continuations) rather than user-initiated (new user prompt).
//
// GitHub Copilot uses the X-Initiator header for billing:
// - "user" → consumes premium request quota
// - "agent" → free (tool loops, continuations)
//
// The challenge: Claude Code sends tool results as role:"user" messages with
// content type "tool_result". After translation to OpenAI format, the tool_result
// part becomes a separate role:"tool" message, but if the original Claude message
// also contained text content (e.g. skill invocations, attachment descriptions),
// a role:"user" message is emitted AFTER the tool message, making the last message
// appear user-initiated when it's actually part of an agent tool loop.
//
// VSCode Copilot Chat solves this with explicit flags (iterationNumber,
// isContinuation, subAgentInvocationId). Since CPA doesn't have these flags,
// we infer agent status by checking whether the conversation contains prior
// assistant/tool messages — if it does, the current request is a continuation.
//
// References:
// - opencode#8030, opencode#15824: same root cause and fix approach
// - vscode-copilot-chat: toolCallingLoop.ts (iterationNumber === 0)
// - pi-ai: github-copilot-headers.ts (last message role check)
func isAgentInitiated(body []byte) bool {
if len(body) == 0 { if len(body) == 0 {
return "" return false
} }
// Chat Completions API: check messages array
if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() { if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
arr := messages.Array() arr := messages.Array()
if len(arr) == 0 {
return false
}
lastRole := ""
for i := len(arr) - 1; i >= 0; i-- { for i := len(arr) - 1; i >= 0; i-- {
if role := arr[i].Get("role").String(); role != "" { if r := arr[i].Get("role").String(); r != "" {
return role lastRole = r
break
} }
} }
// If last message is assistant or tool, clearly agent-initiated.
if lastRole == "assistant" || lastRole == "tool" {
return true
}
// If last message is "user", check whether it contains tool results
// (indicating a tool-loop continuation) or if the preceding message
// is an assistant tool_use. This is more precise than checking for
// any prior assistant message, which would false-positive on genuine
// multi-turn follow-ups.
if lastRole == "user" {
// Check if the last user message contains tool_result content
lastContent := arr[len(arr)-1].Get("content")
if lastContent.Exists() && lastContent.IsArray() {
for _, part := range lastContent.Array() {
if part.Get("type").String() == "tool_result" {
return true
}
}
}
// Check if the second-to-last message is an assistant with tool_use
if len(arr) >= 2 {
prev := arr[len(arr)-2]
if prev.Get("role").String() == "assistant" {
prevContent := prev.Get("content")
if prevContent.Exists() && prevContent.IsArray() {
for _, part := range prevContent.Array() {
if part.Get("type").String() == "tool_use" {
return true
}
}
}
}
}
}
return false
} }
// Responses API: check input array
if inputs := gjson.GetBytes(body, "input"); inputs.Exists() && inputs.IsArray() { if inputs := gjson.GetBytes(body, "input"); inputs.Exists() && inputs.IsArray() {
arr := inputs.Array() arr := inputs.Array()
for i := len(arr) - 1; i >= 0; i-- { if len(arr) == 0 {
item := arr[i] return false
}
// Most Responses input items carry a top-level role. // Check last item
if role := item.Get("role").String(); role != "" { last := arr[len(arr)-1]
return role if role := last.Get("role").String(); role == "assistant" {
return true
}
switch last.Get("type").String() {
case "function_call", "function_call_arguments", "computer_call":
return true
case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
return true
}
// If last item is user-role, check for prior non-user items
for _, item := range arr {
if role := item.Get("role").String(); role == "assistant" {
return true
} }
switch item.Get("type").String() { switch item.Get("type").String() {
case "function_call", "function_call_arguments", "computer_call": case "function_call", "function_call_output", "function_call_response",
return "assistant" "function_call_arguments", "computer_call", "computer_call_output":
case "function_call_output", "function_call_response", "tool_result", "computer_call_output": return true
return "tool"
} }
} }
} }
return "" return false
} }
// detectVisionContent checks if the request body contains vision/image content. // detectVisionContent checks if the request body contains vision/image content.
@@ -572,6 +692,85 @@ func (e *GitHubCopilotExecutor) normalizeModel(model string, body []byte) []byte
return body return body
} }
// copilotUnsupportedBetas lists beta headers that are Anthropic-specific and
// must not be forwarded to GitHub Copilot. The context-1m beta enables 1M
// context on Anthropic's API, but Copilot's Claude models are limited to
// ~128K-200K. Passing it through would not enable 1M on Copilot, but stripping
// it from the translated body avoids confusing downstream translators.
var copilotUnsupportedBetas = []string{
"context-1m-2025-08-07",
}
// stripUnsupportedBetas removes Anthropic-specific beta entries from the
// translated request body. In OpenAI format the betas may appear under
// "metadata.betas" or a top-level "betas" array; in Claude format they sit at
// "betas". This function checks all known locations.
func stripUnsupportedBetas(body []byte) []byte {
betaPaths := []string{"betas", "metadata.betas"}
for _, path := range betaPaths {
arr := gjson.GetBytes(body, path)
if !arr.Exists() || !arr.IsArray() {
continue
}
var filtered []string
changed := false
for _, item := range arr.Array() {
beta := item.String()
if isCopilotUnsupportedBeta(beta) {
changed = true
continue
}
filtered = append(filtered, beta)
}
if !changed {
continue
}
if len(filtered) == 0 {
body, _ = sjson.DeleteBytes(body, path)
} else {
body, _ = sjson.SetBytes(body, path, filtered)
}
}
return body
}
func isCopilotUnsupportedBeta(beta string) bool {
return slices.Contains(copilotUnsupportedBetas, beta)
}
// normalizeGitHubCopilotReasoningField maps Copilot's non-standard
// 'reasoning_text' field to the standard OpenAI 'reasoning_content' field
// that the SDK translator expects. This handles both streaming deltas
// (choices[].delta.reasoning_text) and non-streaming messages
// (choices[].message.reasoning_text). The field is only renamed when
// 'reasoning_content' is absent or null, preserving standard responses.
// All choices are processed to support n>1 requests.
func normalizeGitHubCopilotReasoningField(data []byte) []byte {
choices := gjson.GetBytes(data, "choices")
if !choices.Exists() || !choices.IsArray() {
return data
}
for i := range choices.Array() {
// Non-streaming: choices[i].message.reasoning_text
msgRT := fmt.Sprintf("choices.%d.message.reasoning_text", i)
msgRC := fmt.Sprintf("choices.%d.message.reasoning_content", i)
if rt := gjson.GetBytes(data, msgRT); rt.Exists() && rt.String() != "" {
if rc := gjson.GetBytes(data, msgRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
data, _ = sjson.SetBytes(data, msgRC, rt.String())
}
}
// Streaming: choices[i].delta.reasoning_text
deltaRT := fmt.Sprintf("choices.%d.delta.reasoning_text", i)
deltaRC := fmt.Sprintf("choices.%d.delta.reasoning_content", i)
if rt := gjson.GetBytes(data, deltaRT); rt.Exists() && rt.String() != "" {
if rc := gjson.GetBytes(data, deltaRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
data, _ = sjson.SetBytes(data, deltaRC, rt.String())
}
}
}
return data
}
func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format, model string) bool { func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format, model string) bool {
if sourceFormat.String() == "openai-response" { if sourceFormat.String() == "openai-response" {
return true return true
@@ -596,12 +795,7 @@ func lookupGitHubCopilotStaticModelInfo(model string) *registry.ModelInfo {
} }
func containsEndpoint(endpoints []string, endpoint string) bool { func containsEndpoint(endpoints []string, endpoint string) bool {
for _, item := range endpoints { return slices.Contains(endpoints, endpoint)
if item == endpoint {
return true
}
}
return false
} }
// flattenAssistantContent converts assistant message content from array format // flattenAssistantContent converts assistant message content from array format
@@ -856,6 +1050,32 @@ func stripGitHubCopilotResponsesUnsupportedFields(body []byte) []byte {
return body return body
} }
// applyGitHubCopilotResponsesDefaults sets required fields for the Responses API
// that both vscode-copilot-chat and pi-ai always include.
//
// References:
// - vscode-copilot-chat: src/platform/endpoint/node/responsesApi.ts
// - pi-ai (badlogic/pi-mono): packages/ai/src/providers/openai-responses.ts
func applyGitHubCopilotResponsesDefaults(body []byte) []byte {
// store: false — prevents request/response storage
if !gjson.GetBytes(body, "store").Exists() {
body, _ = sjson.SetBytes(body, "store", false)
}
// include: ["reasoning.encrypted_content"] — enables reasoning content
// reuse across turns, avoiding redundant computation
if !gjson.GetBytes(body, "include").Exists() {
body, _ = sjson.SetRawBytes(body, "include", []byte(`["reasoning.encrypted_content"]`))
}
// If reasoning.effort is set but reasoning.summary is not, default to "auto"
if gjson.GetBytes(body, "reasoning.effort").Exists() && !gjson.GetBytes(body, "reasoning.summary").Exists() {
body, _ = sjson.SetBytes(body, "reasoning.summary", "auto")
}
return body
}
func normalizeGitHubCopilotResponsesTools(body []byte) []byte { func normalizeGitHubCopilotResponsesTools(body []byte) []byte {
tools := gjson.GetBytes(body, "tools") tools := gjson.GetBytes(body, "tools")
if tools.Exists() { if tools.Exists() {
@@ -1406,6 +1626,21 @@ func FetchGitHubCopilotModels(ctx context.Context, auth *cliproxyauth.Auth, cfg
m.MaxCompletionTokens = defaultCopilotMaxCompletionTokens m.MaxCompletionTokens = defaultCopilotMaxCompletionTokens
} }
// Override with real limits from the Copilot API when available.
// The API returns per-account limits (individual vs business) under
// capabilities.limits, which are more accurate than our static
// fallback values. We use max_prompt_tokens as ContextLength because
// that's the hard limit the Copilot API enforces on prompt size —
// exceeding it triggers "prompt token count exceeds the limit" errors.
if limits := entry.Limits(); limits != nil {
if limits.MaxPromptTokens > 0 {
m.ContextLength = limits.MaxPromptTokens
}
if limits.MaxOutputTokens > 0 {
m.MaxCompletionTokens = limits.MaxOutputTokens
}
}
models = append(models, m) models = append(models, m)
} }

View File

@@ -1,11 +1,14 @@
package executor package executor
import ( import (
"context"
"net/http" "net/http"
"strings" "strings"
"testing" "testing"
copilotauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
) )
@@ -72,7 +75,7 @@ func TestUseGitHubCopilotResponsesEndpoint_CodexModel(t *testing.T) {
} }
func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing.T) { func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing.T) {
t.Parallel() // Not parallel: shares global model registry with DynamicRegistryWinsOverStatic.
if !useGitHubCopilotResponsesEndpoint(sdktranslator.FromString("openai"), "gpt-5.4") { if !useGitHubCopilotResponsesEndpoint(sdktranslator.FromString("openai"), "gpt-5.4") {
t.Fatal("expected responses-only registry model to use /responses") t.Fatal("expected responses-only registry model to use /responses")
} }
@@ -82,7 +85,7 @@ func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing
} }
func TestUseGitHubCopilotResponsesEndpoint_DynamicRegistryWinsOverStatic(t *testing.T) { func TestUseGitHubCopilotResponsesEndpoint_DynamicRegistryWinsOverStatic(t *testing.T) {
t.Parallel() // Not parallel: mutates global model registry, conflicts with RegistryResponsesOnlyModel.
reg := registry.GetGlobalRegistry() reg := registry.GetGlobalRegistry()
clientID := "github-copilot-test-client" clientID := "github-copilot-test-client"
@@ -251,14 +254,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_TextMapping(t *testing
t.Parallel() t.Parallel()
resp := []byte(`{"id":"resp_1","model":"gpt-5-codex","output":[{"type":"message","content":[{"type":"output_text","text":"hello"}]}],"usage":{"input_tokens":3,"output_tokens":5}}`) resp := []byte(`{"id":"resp_1","model":"gpt-5-codex","output":[{"type":"message","content":[{"type":"output_text","text":"hello"}]}],"usage":{"input_tokens":3,"output_tokens":5}}`)
out := translateGitHubCopilotResponsesNonStreamToClaude(resp) out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
if gjson.Get(out, "type").String() != "message" { if gjson.GetBytes(out, "type").String() != "message" {
t.Fatalf("type = %q, want message", gjson.Get(out, "type").String()) t.Fatalf("type = %q, want message", gjson.GetBytes(out, "type").String())
} }
if gjson.Get(out, "content.0.type").String() != "text" { if gjson.GetBytes(out, "content.0.type").String() != "text" {
t.Fatalf("content.0.type = %q, want text", gjson.Get(out, "content.0.type").String()) t.Fatalf("content.0.type = %q, want text", gjson.GetBytes(out, "content.0.type").String())
} }
if gjson.Get(out, "content.0.text").String() != "hello" { if gjson.GetBytes(out, "content.0.text").String() != "hello" {
t.Fatalf("content.0.text = %q, want hello", gjson.Get(out, "content.0.text").String()) t.Fatalf("content.0.text = %q, want hello", gjson.GetBytes(out, "content.0.text").String())
} }
} }
@@ -266,14 +269,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_ToolUseMapping(t *test
t.Parallel() t.Parallel()
resp := []byte(`{"id":"resp_2","model":"gpt-5-codex","output":[{"type":"function_call","id":"fc_1","call_id":"call_1","name":"sum","arguments":"{\"a\":1}"}],"usage":{"input_tokens":1,"output_tokens":2}}`) resp := []byte(`{"id":"resp_2","model":"gpt-5-codex","output":[{"type":"function_call","id":"fc_1","call_id":"call_1","name":"sum","arguments":"{\"a\":1}"}],"usage":{"input_tokens":1,"output_tokens":2}}`)
out := translateGitHubCopilotResponsesNonStreamToClaude(resp) out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
if gjson.Get(out, "content.0.type").String() != "tool_use" { if gjson.GetBytes(out, "content.0.type").String() != "tool_use" {
t.Fatalf("content.0.type = %q, want tool_use", gjson.Get(out, "content.0.type").String()) t.Fatalf("content.0.type = %q, want tool_use", gjson.GetBytes(out, "content.0.type").String())
} }
if gjson.Get(out, "content.0.name").String() != "sum" { if gjson.GetBytes(out, "content.0.name").String() != "sum" {
t.Fatalf("content.0.name = %q, want sum", gjson.Get(out, "content.0.name").String()) t.Fatalf("content.0.name = %q, want sum", gjson.GetBytes(out, "content.0.name").String())
} }
if gjson.Get(out, "stop_reason").String() != "tool_use" { if gjson.GetBytes(out, "stop_reason").String() != "tool_use" {
t.Fatalf("stop_reason = %q, want tool_use", gjson.Get(out, "stop_reason").String()) t.Fatalf("stop_reason = %q, want tool_use", gjson.GetBytes(out, "stop_reason").String())
} }
} }
@@ -282,18 +285,24 @@ func TestTranslateGitHubCopilotResponsesStreamToClaude_TextLifecycle(t *testing.
var param any var param any
created := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.created","response":{"id":"resp_1","model":"gpt-5-codex"}}`), &param) created := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.created","response":{"id":"resp_1","model":"gpt-5-codex"}}`), &param)
if len(created) == 0 || !strings.Contains(created[0], "message_start") { if len(created) == 0 || !strings.Contains(string(created[0]), "message_start") {
t.Fatalf("created events = %#v, want message_start", created) t.Fatalf("created events = %#v, want message_start", created)
} }
delta := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.output_text.delta","delta":"he"}`), &param) delta := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.output_text.delta","delta":"he"}`), &param)
joinedDelta := strings.Join(delta, "") var joinedDelta string
for _, d := range delta {
joinedDelta += string(d)
}
if !strings.Contains(joinedDelta, "content_block_start") || !strings.Contains(joinedDelta, "text_delta") { if !strings.Contains(joinedDelta, "content_block_start") || !strings.Contains(joinedDelta, "text_delta") {
t.Fatalf("delta events = %#v, want content_block_start + text_delta", delta) t.Fatalf("delta events = %#v, want content_block_start + text_delta", delta)
} }
completed := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.completed","response":{"usage":{"input_tokens":7,"output_tokens":9}}}`), &param) completed := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.completed","response":{"usage":{"input_tokens":7,"output_tokens":9}}}`), &param)
joinedCompleted := strings.Join(completed, "") var joinedCompleted string
for _, c := range completed {
joinedCompleted += string(c)
}
if !strings.Contains(joinedCompleted, "message_delta") || !strings.Contains(joinedCompleted, "message_stop") { if !strings.Contains(joinedCompleted, "message_delta") || !strings.Contains(joinedCompleted, "message_stop") {
t.Fatalf("completed events = %#v, want message_delta + message_stop", completed) t.Fatalf("completed events = %#v, want message_delta + message_stop", completed)
} }
@@ -312,15 +321,17 @@ func TestApplyHeaders_XInitiator_UserOnly(t *testing.T) {
} }
} }
func TestApplyHeaders_XInitiator_UserWhenLastRoleIsUser(t *testing.T) { func TestApplyHeaders_XInitiator_AgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
t.Parallel() t.Parallel()
e := &GitHubCopilotExecutor{} e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil) req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Last role governs the initiator decision. // When the last role is "user" and the message contains tool_result content,
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":"tool result here"}]}`) // the request is a continuation (e.g. Claude tool result translated to a
// synthetic user message). Should be "agent".
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":[{"type":"tool_result","tool_use_id":"tu1","content":"file contents..."}]}]}`)
e.applyHeaders(req, "token", body) e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" { if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want user (last role is user)", got) t.Fatalf("X-Initiator = %q, want agent (last user contains tool_result)", got)
} }
} }
@@ -328,10 +339,11 @@ func TestApplyHeaders_XInitiator_AgentWithToolRole(t *testing.T) {
t.Parallel() t.Parallel()
e := &GitHubCopilotExecutor{} e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil) req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// When the last message has role "tool", it's clearly agent-initiated.
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"tool","content":"result"}]}`) body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"tool","content":"result"}]}`)
e.applyHeaders(req, "token", body) e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "agent" { if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want agent (tool role exists)", got) t.Fatalf("X-Initiator = %q, want agent (last role is tool)", got)
} }
} }
@@ -346,14 +358,15 @@ func TestApplyHeaders_XInitiator_InputArrayLastAssistantMessage(t *testing.T) {
} }
} }
func TestApplyHeaders_XInitiator_InputArrayLastUserMessage(t *testing.T) { func TestApplyHeaders_XInitiator_InputArrayAgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
t.Parallel() t.Parallel()
e := &GitHubCopilotExecutor{} e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil) req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Responses API: last item is user-role but history contains assistant → agent.
body := []byte(`{"input":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I can help"}]},{"type":"message","role":"user","content":[{"type":"input_text","text":"Do X"}]}]}`) body := []byte(`{"input":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I can help"}]},{"type":"message","role":"user","content":[{"type":"input_text","text":"Do X"}]}]}`)
e.applyHeaders(req, "token", body) e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" { if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want user (last role is user)", got) t.Fatalf("X-Initiator = %q, want agent (history has assistant)", got)
} }
} }
@@ -368,6 +381,33 @@ func TestApplyHeaders_XInitiator_InputArrayLastFunctionCallOutput(t *testing.T)
} }
} }
func TestApplyHeaders_XInitiator_UserInMultiTurnNoTools(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Genuine multi-turn: user → assistant (plain text) → user follow-up.
// No tool messages → should be "user" (not a false-positive).
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"Hi there!"},{"role":"user","content":"what is 2+2?"}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (genuine multi-turn, no tools)", got)
}
}
func TestApplyHeaders_XInitiator_UserFollowUpAfterToolHistory(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// User follow-up after a completed tool-use conversation.
// The last message is a genuine user question — should be "user", not "agent".
// This aligns with opencode's behavior: only active tool loops are agent-initiated.
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":[{"type":"tool_use","id":"tu1","name":"Read","input":{}}]},{"role":"tool","tool_call_id":"tu1","content":"file data"},{"role":"assistant","content":"I read the file."},{"role":"user","content":"What did we do so far?"}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (genuine follow-up after tool history)", got)
}
}
// --- Tests for x-github-api-version header (Problem M) --- // --- Tests for x-github-api-version header (Problem M) ---
func TestApplyHeaders_GitHubAPIVersion(t *testing.T) { func TestApplyHeaders_GitHubAPIVersion(t *testing.T) {
@@ -414,3 +454,364 @@ func TestDetectVisionContent_NoMessages(t *testing.T) {
t.Fatal("expected no vision content when messages field is absent") t.Fatal("expected no vision content when messages field is absent")
} }
} }
// --- Tests for applyGitHubCopilotResponsesDefaults ---
func TestApplyGitHubCopilotResponsesDefaults_SetsAllDefaults(t *testing.T) {
t.Parallel()
body := []byte(`{"input":"hello","reasoning":{"effort":"medium"}}`)
got := applyGitHubCopilotResponsesDefaults(body)
if gjson.GetBytes(got, "store").Bool() != false {
t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
}
inc := gjson.GetBytes(got, "include")
if !inc.IsArray() || inc.Array()[0].String() != "reasoning.encrypted_content" {
t.Fatalf("include = %s, want [\"reasoning.encrypted_content\"]", inc.Raw)
}
if gjson.GetBytes(got, "reasoning.summary").String() != "auto" {
t.Fatalf("reasoning.summary = %q, want auto", gjson.GetBytes(got, "reasoning.summary").String())
}
}
func TestApplyGitHubCopilotResponsesDefaults_DoesNotOverrideExisting(t *testing.T) {
t.Parallel()
body := []byte(`{"input":"hello","store":true,"include":["other"],"reasoning":{"effort":"high","summary":"concise"}}`)
got := applyGitHubCopilotResponsesDefaults(body)
if gjson.GetBytes(got, "store").Bool() != true {
t.Fatalf("store should not be overridden, got %s", gjson.GetBytes(got, "store").Raw)
}
if gjson.GetBytes(got, "include").Array()[0].String() != "other" {
t.Fatalf("include should not be overridden, got %s", gjson.GetBytes(got, "include").Raw)
}
if gjson.GetBytes(got, "reasoning.summary").String() != "concise" {
t.Fatalf("reasoning.summary should not be overridden, got %q", gjson.GetBytes(got, "reasoning.summary").String())
}
}
func TestApplyGitHubCopilotResponsesDefaults_NoReasoningEffort(t *testing.T) {
t.Parallel()
body := []byte(`{"input":"hello"}`)
got := applyGitHubCopilotResponsesDefaults(body)
if gjson.GetBytes(got, "store").Bool() != false {
t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
}
// reasoning.summary should NOT be set when reasoning.effort is absent
if gjson.GetBytes(got, "reasoning.summary").Exists() {
t.Fatalf("reasoning.summary should not be set when reasoning.effort is absent, got %q", gjson.GetBytes(got, "reasoning.summary").String())
}
}
// --- Tests for normalizeGitHubCopilotReasoningField ---
func TestNormalizeReasoningField_NonStreaming(t *testing.T) {
t.Parallel()
data := []byte(`{"choices":[{"message":{"content":"hello","reasoning_text":"I think..."}}]}`)
got := normalizeGitHubCopilotReasoningField(data)
rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
if rc != "I think..." {
t.Fatalf("reasoning_content = %q, want %q", rc, "I think...")
}
}
func TestNormalizeReasoningField_Streaming(t *testing.T) {
t.Parallel()
data := []byte(`{"choices":[{"delta":{"reasoning_text":"thinking delta"}}]}`)
got := normalizeGitHubCopilotReasoningField(data)
rc := gjson.GetBytes(got, "choices.0.delta.reasoning_content").String()
if rc != "thinking delta" {
t.Fatalf("reasoning_content = %q, want %q", rc, "thinking delta")
}
}
func TestNormalizeReasoningField_PreservesExistingReasoningContent(t *testing.T) {
t.Parallel()
data := []byte(`{"choices":[{"message":{"reasoning_text":"old","reasoning_content":"existing"}}]}`)
got := normalizeGitHubCopilotReasoningField(data)
rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
if rc != "existing" {
t.Fatalf("reasoning_content = %q, want %q (should not overwrite)", rc, "existing")
}
}
func TestNormalizeReasoningField_MultiChoice(t *testing.T) {
t.Parallel()
data := []byte(`{"choices":[{"message":{"reasoning_text":"thought-0"}},{"message":{"reasoning_text":"thought-1"}}]}`)
got := normalizeGitHubCopilotReasoningField(data)
rc0 := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
rc1 := gjson.GetBytes(got, "choices.1.message.reasoning_content").String()
if rc0 != "thought-0" {
t.Fatalf("choices[0].reasoning_content = %q, want %q", rc0, "thought-0")
}
if rc1 != "thought-1" {
t.Fatalf("choices[1].reasoning_content = %q, want %q", rc1, "thought-1")
}
}
func TestNormalizeReasoningField_NoChoices(t *testing.T) {
t.Parallel()
data := []byte(`{"id":"chatcmpl-123"}`)
got := normalizeGitHubCopilotReasoningField(data)
if string(got) != string(data) {
t.Fatalf("expected no change, got %s", string(got))
}
}
func TestApplyHeaders_OpenAIIntentValue(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
e.applyHeaders(req, "token", nil)
if got := req.Header.Get("Openai-Intent"); got != "conversation-edits" {
t.Fatalf("Openai-Intent = %q, want conversation-edits", got)
}
}
// --- Tests for CountTokens (local tiktoken estimation) ---
func TestCountTokens_ReturnsPositiveCount(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
body := []byte(`{"model":"gpt-4o","messages":[{"role":"user","content":"Hello, world!"}]}`)
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
Model: "gpt-4o",
Payload: body,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("openai"),
})
if err != nil {
t.Fatalf("CountTokens() error: %v", err)
}
if len(resp.Payload) == 0 {
t.Fatal("CountTokens() returned empty payload")
}
// The response should contain a positive token count.
tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
if tokens <= 0 {
t.Fatalf("expected positive token count, got %d", tokens)
}
}
func TestCountTokens_ClaudeSourceFormatTranslates(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
body := []byte(`{"model":"claude-sonnet-4","messages":[{"role":"user","content":"Tell me a joke"}],"max_tokens":1024}`)
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
Model: "claude-sonnet-4",
Payload: body,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("claude"),
})
if err != nil {
t.Fatalf("CountTokens() error: %v", err)
}
// Claude source format → should get input_tokens in response
inputTokens := gjson.GetBytes(resp.Payload, "input_tokens").Int()
if inputTokens <= 0 {
// Fallback: check usage.prompt_tokens (depends on translator registration)
promptTokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
if promptTokens <= 0 {
t.Fatalf("expected positive token count, got payload: %s", resp.Payload)
}
}
}
func TestCountTokens_EmptyPayload(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
Model: "gpt-4o",
Payload: []byte(`{"model":"gpt-4o","messages":[]}`),
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("openai"),
})
if err != nil {
t.Fatalf("CountTokens() error: %v", err)
}
tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
// Empty messages should return 0 tokens.
if tokens != 0 {
t.Fatalf("expected 0 tokens for empty messages, got %d", tokens)
}
}
func TestStripUnsupportedBetas_RemovesContext1M(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"claude-opus-4.6","betas":["interleaved-thinking-2025-05-14","context-1m-2025-08-07","claude-code-20250219"],"messages":[]}`)
result := stripUnsupportedBetas(body)
betas := gjson.GetBytes(result, "betas")
if !betas.Exists() {
t.Fatal("betas field should still exist after stripping")
}
for _, item := range betas.Array() {
if item.String() == "context-1m-2025-08-07" {
t.Fatal("context-1m-2025-08-07 should have been stripped")
}
}
// Other betas should be preserved
found := false
for _, item := range betas.Array() {
if item.String() == "interleaved-thinking-2025-05-14" {
found = true
}
}
if !found {
t.Fatal("other betas should be preserved")
}
}
func TestStripUnsupportedBetas_NoBetasField(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"gpt-4o","messages":[]}`)
result := stripUnsupportedBetas(body)
// Should be unchanged
if string(result) != string(body) {
t.Fatalf("body should be unchanged when no betas field exists, got %s", string(result))
}
}
func TestStripUnsupportedBetas_MetadataBetas(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"claude-opus-4.6","metadata":{"betas":["context-1m-2025-08-07","other-beta"]},"messages":[]}`)
result := stripUnsupportedBetas(body)
betas := gjson.GetBytes(result, "metadata.betas")
if !betas.Exists() {
t.Fatal("metadata.betas field should still exist after stripping")
}
for _, item := range betas.Array() {
if item.String() == "context-1m-2025-08-07" {
t.Fatal("context-1m-2025-08-07 should have been stripped from metadata.betas")
}
}
if betas.Array()[0].String() != "other-beta" {
t.Fatal("other betas in metadata.betas should be preserved")
}
}
func TestStripUnsupportedBetas_AllBetasStripped(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"claude-opus-4.6","betas":["context-1m-2025-08-07"],"messages":[]}`)
result := stripUnsupportedBetas(body)
betas := gjson.GetBytes(result, "betas")
if betas.Exists() {
t.Fatal("betas field should be deleted when all betas are stripped")
}
}
func TestCopilotModelEntry_Limits(t *testing.T) {
t.Parallel()
tests := []struct {
name string
capabilities map[string]any
wantNil bool
wantPrompt int
wantOutput int
wantContext int
}{
{
name: "nil capabilities",
capabilities: nil,
wantNil: true,
},
{
name: "no limits key",
capabilities: map[string]any{"family": "claude-opus-4.6"},
wantNil: true,
},
{
name: "limits is not a map",
capabilities: map[string]any{"limits": "invalid"},
wantNil: true,
},
{
name: "all zero values",
capabilities: map[string]any{
"limits": map[string]any{
"max_context_window_tokens": float64(0),
"max_prompt_tokens": float64(0),
"max_output_tokens": float64(0),
},
},
wantNil: true,
},
{
name: "individual account limits (128K prompt)",
capabilities: map[string]any{
"limits": map[string]any{
"max_context_window_tokens": float64(144000),
"max_prompt_tokens": float64(128000),
"max_output_tokens": float64(64000),
},
},
wantNil: false,
wantPrompt: 128000,
wantOutput: 64000,
wantContext: 144000,
},
{
name: "business account limits (168K prompt)",
capabilities: map[string]any{
"limits": map[string]any{
"max_context_window_tokens": float64(200000),
"max_prompt_tokens": float64(168000),
"max_output_tokens": float64(32000),
},
},
wantNil: false,
wantPrompt: 168000,
wantOutput: 32000,
wantContext: 200000,
},
{
name: "partial limits (only prompt)",
capabilities: map[string]any{
"limits": map[string]any{
"max_prompt_tokens": float64(128000),
},
},
wantNil: false,
wantPrompt: 128000,
wantOutput: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
entry := copilotauth.CopilotModelEntry{
ID: "claude-opus-4.6",
Capabilities: tt.capabilities,
}
limits := entry.Limits()
if tt.wantNil {
if limits != nil {
t.Fatalf("expected nil limits, got %+v", limits)
}
return
}
if limits == nil {
t.Fatal("expected non-nil limits, got nil")
}
if limits.MaxPromptTokens != tt.wantPrompt {
t.Errorf("MaxPromptTokens = %d, want %d", limits.MaxPromptTokens, tt.wantPrompt)
}
if limits.MaxOutputTokens != tt.wantOutput {
t.Errorf("MaxOutputTokens = %d, want %d", limits.MaxOutputTokens, tt.wantOutput)
}
if tt.wantContext > 0 && limits.MaxContextWindowTokens != tt.wantContext {
t.Errorf("MaxContextWindowTokens = %d, want %d", limits.MaxContextWindowTokens, tt.wantContext)
}
})
}
}

View File

@@ -30,6 +30,8 @@ const (
qwenRateLimitWindow = time.Minute // sliding window duration qwenRateLimitWindow = time.Minute // sliding window duration
) )
var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`)
// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls. // qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
var qwenBeijingLoc = func() *time.Location { var qwenBeijingLoc = func() *time.Location {
loc, err := time.LoadLocation("Asia/Shanghai") loc, err := time.LoadLocation("Asia/Shanghai")
@@ -170,6 +172,105 @@ func timeUntilNextDay() time.Duration {
return tomorrow.Sub(now) return tomorrow.Sub(now)
} }
// ensureQwenSystemMessage ensures the request has a single system message at the beginning.
// It always injects the default system prompt and merges any user-provided system messages
// into the injected system message content to satisfy Qwen's strict message ordering rules.
func ensureQwenSystemMessage(payload []byte) ([]byte, error) {
isInjectedSystemPart := func(part gjson.Result) bool {
if !part.Exists() || !part.IsObject() {
return false
}
if !strings.EqualFold(part.Get("type").String(), "text") {
return false
}
if !strings.EqualFold(part.Get("cache_control.type").String(), "ephemeral") {
return false
}
text := part.Get("text").String()
return text == "" || text == "You are Qwen Code."
}
defaultParts := gjson.ParseBytes(qwenDefaultSystemMessage).Get("content")
var systemParts []any
if defaultParts.Exists() && defaultParts.IsArray() {
for _, part := range defaultParts.Array() {
systemParts = append(systemParts, part.Value())
}
}
if len(systemParts) == 0 {
systemParts = append(systemParts, map[string]any{
"type": "text",
"text": "You are Qwen Code.",
"cache_control": map[string]any{
"type": "ephemeral",
},
})
}
appendSystemContent := func(content gjson.Result) {
makeTextPart := func(text string) map[string]any {
return map[string]any{
"type": "text",
"text": text,
}
}
if !content.Exists() || content.Type == gjson.Null {
return
}
if content.IsArray() {
for _, part := range content.Array() {
if part.Type == gjson.String {
systemParts = append(systemParts, makeTextPart(part.String()))
continue
}
if isInjectedSystemPart(part) {
continue
}
systemParts = append(systemParts, part.Value())
}
return
}
if content.Type == gjson.String {
systemParts = append(systemParts, makeTextPart(content.String()))
return
}
if content.IsObject() {
if isInjectedSystemPart(content) {
return
}
systemParts = append(systemParts, content.Value())
return
}
systemParts = append(systemParts, makeTextPart(content.String()))
}
messages := gjson.GetBytes(payload, "messages")
var nonSystemMessages []any
if messages.Exists() && messages.IsArray() {
for _, msg := range messages.Array() {
if strings.EqualFold(msg.Get("role").String(), "system") {
appendSystemContent(msg.Get("content"))
continue
}
nonSystemMessages = append(nonSystemMessages, msg.Value())
}
}
newMessages := make([]any, 0, 1+len(nonSystemMessages))
newMessages = append(newMessages, map[string]any{
"role": "system",
"content": systemParts,
})
newMessages = append(newMessages, nonSystemMessages...)
updated, errSet := sjson.SetBytes(payload, "messages", newMessages)
if errSet != nil {
return nil, fmt.Errorf("qwen executor: set system message failed: %w", errSet)
}
return updated, nil
}
// QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions. // QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions.
// If access token is unavailable, it falls back to legacy via ClientAdapter. // If access token is unavailable, it falls back to legacy via ClientAdapter.
type QwenExecutor struct { type QwenExecutor struct {
@@ -251,6 +352,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
requestedModel := helps.PayloadRequestedModel(opts, req.Model) requestedModel := helps.PayloadRequestedModel(opts, req.Model)
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
body, err = ensureQwenSystemMessage(body)
if err != nil {
return resp, err
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -357,15 +462,19 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
return nil, err return nil, err
} }
toolsResult := gjson.GetBytes(body, "tools") // toolsResult := gjson.GetBytes(body, "tools")
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response. // I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
// This will have no real consequences. It's just to scare Qwen3. // This will have no real consequences. It's just to scare Qwen3.
if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() { // if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`)) // body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
} // }
body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
requestedModel := helps.PayloadRequestedModel(opts, req.Model) requestedModel := helps.PayloadRequestedModel(opts, req.Model)
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel) body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
body, err = ensureQwenSystemMessage(body)
if err != nil {
return nil, err
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))

View File

@@ -4,6 +4,7 @@ import (
"testing" "testing"
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
"github.com/tidwall/gjson"
) )
func TestQwenExecutorParseSuffix(t *testing.T) { func TestQwenExecutorParseSuffix(t *testing.T) {
@@ -28,3 +29,123 @@ func TestQwenExecutorParseSuffix(t *testing.T) {
}) })
} }
} }
func TestEnsureQwenSystemMessage_MergeStringSystem(t *testing.T) {
payload := []byte(`{
"model": "qwen3.6-plus",
"stream": true,
"messages": [
{ "role": "system", "content": "ABCDEFG" },
{ "role": "user", "content": [ { "type": "text", "text": "你好" } ] }
]
}`)
out, err := ensureQwenSystemMessage(payload)
if err != nil {
t.Fatalf("ensureQwenSystemMessage() error = %v", err)
}
msgs := gjson.GetBytes(out, "messages").Array()
if len(msgs) != 2 {
t.Fatalf("messages length = %d, want 2", len(msgs))
}
if msgs[0].Get("role").String() != "system" {
t.Fatalf("messages[0].role = %q, want %q", msgs[0].Get("role").String(), "system")
}
parts := msgs[0].Get("content").Array()
if len(parts) != 2 {
t.Fatalf("messages[0].content length = %d, want 2", len(parts))
}
if parts[0].Get("text").String() != "You are Qwen Code." || parts[0].Get("cache_control.type").String() != "ephemeral" {
t.Fatalf("messages[0].content[0] = %s, want injected system part", parts[0].Raw)
}
if parts[1].Get("type").String() != "text" || parts[1].Get("text").String() != "ABCDEFG" {
t.Fatalf("messages[0].content[1] = %s, want text part with ABCDEFG", parts[1].Raw)
}
if msgs[1].Get("role").String() != "user" {
t.Fatalf("messages[1].role = %q, want %q", msgs[1].Get("role").String(), "user")
}
}
func TestEnsureQwenSystemMessage_MergeObjectSystem(t *testing.T) {
payload := []byte(`{
"messages": [
{ "role": "system", "content": { "type": "text", "text": "ABCDEFG" } },
{ "role": "user", "content": [ { "type": "text", "text": "你好" } ] }
]
}`)
out, err := ensureQwenSystemMessage(payload)
if err != nil {
t.Fatalf("ensureQwenSystemMessage() error = %v", err)
}
msgs := gjson.GetBytes(out, "messages").Array()
if len(msgs) != 2 {
t.Fatalf("messages length = %d, want 2", len(msgs))
}
parts := msgs[0].Get("content").Array()
if len(parts) != 2 {
t.Fatalf("messages[0].content length = %d, want 2", len(parts))
}
if parts[1].Get("text").String() != "ABCDEFG" {
t.Fatalf("messages[0].content[1].text = %q, want %q", parts[1].Get("text").String(), "ABCDEFG")
}
}
func TestEnsureQwenSystemMessage_PrependsWhenMissing(t *testing.T) {
payload := []byte(`{
"messages": [
{ "role": "user", "content": [ { "type": "text", "text": "你好" } ] }
]
}`)
out, err := ensureQwenSystemMessage(payload)
if err != nil {
t.Fatalf("ensureQwenSystemMessage() error = %v", err)
}
msgs := gjson.GetBytes(out, "messages").Array()
if len(msgs) != 2 {
t.Fatalf("messages length = %d, want 2", len(msgs))
}
if msgs[0].Get("role").String() != "system" {
t.Fatalf("messages[0].role = %q, want %q", msgs[0].Get("role").String(), "system")
}
if !msgs[0].Get("content").IsArray() || len(msgs[0].Get("content").Array()) == 0 {
t.Fatalf("messages[0].content = %s, want non-empty array", msgs[0].Get("content").Raw)
}
if msgs[1].Get("role").String() != "user" {
t.Fatalf("messages[1].role = %q, want %q", msgs[1].Get("role").String(), "user")
}
}
func TestEnsureQwenSystemMessage_MergesMultipleSystemMessages(t *testing.T) {
payload := []byte(`{
"messages": [
{ "role": "system", "content": "A" },
{ "role": "user", "content": [ { "type": "text", "text": "hi" } ] },
{ "role": "system", "content": "B" }
]
}`)
out, err := ensureQwenSystemMessage(payload)
if err != nil {
t.Fatalf("ensureQwenSystemMessage() error = %v", err)
}
msgs := gjson.GetBytes(out, "messages").Array()
if len(msgs) != 2 {
t.Fatalf("messages length = %d, want 2", len(msgs))
}
parts := msgs[0].Get("content").Array()
if len(parts) != 3 {
t.Fatalf("messages[0].content length = %d, want 3", len(parts))
}
if parts[1].Get("text").String() != "A" {
t.Fatalf("messages[0].content[1].text = %q, want %q", parts[1].Get("text").String(), "A")
}
if parts[2].Get("text").String() != "B" {
t.Fatalf("messages[0].content[2].text = %q, want %q", parts[2].Get("text").String(), "B")
}
}

View File

@@ -6,7 +6,7 @@
package claude package claude
import ( import (
"bytes" "fmt"
"strings" "strings"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
@@ -31,8 +31,6 @@ const geminiClaudeThoughtSignature = "skip_thought_signature_validator"
// - []byte: The transformed request in Gemini CLI format. // - []byte: The transformed request in Gemini CLI format.
func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte { func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
rawJSON := inputRawJSON rawJSON := inputRawJSON
rawJSON = bytes.Replace(rawJSON, []byte(`"url":{"type":"string","format":"uri",`), []byte(`"url":{"type":"string",`), -1)
// Build output Gemini CLI request JSON // Build output Gemini CLI request JSON
out := []byte(`{"contents":[]}`) out := []byte(`{"contents":[]}`)
out, _ = sjson.SetBytes(out, "model", modelName) out, _ = sjson.SetBytes(out, "model", modelName)
@@ -146,13 +144,37 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
}) })
} }
// strip trailing model turn with unanswered function calls —
// Gemini returns empty responses when the last turn is a model
// functionCall with no corresponding user functionResponse.
contents := gjson.GetBytes(out, "contents")
if contents.Exists() && contents.IsArray() {
arr := contents.Array()
if len(arr) > 0 {
last := arr[len(arr)-1]
if last.Get("role").String() == "model" {
hasFC := false
last.Get("parts").ForEach(func(_, part gjson.Result) bool {
if part.Get("functionCall").Exists() {
hasFC = true
return false
}
return true
})
if hasFC {
out, _ = sjson.DeleteBytes(out, fmt.Sprintf("contents.%d", len(arr)-1))
}
}
}
}
// tools // tools
if toolsResult := gjson.GetBytes(rawJSON, "tools"); toolsResult.IsArray() { if toolsResult := gjson.GetBytes(rawJSON, "tools"); toolsResult.IsArray() {
hasTools := false hasTools := false
toolsResult.ForEach(func(_, toolResult gjson.Result) bool { toolsResult.ForEach(func(_, toolResult gjson.Result) bool {
inputSchemaResult := toolResult.Get("input_schema") inputSchemaResult := toolResult.Get("input_schema")
if inputSchemaResult.Exists() && inputSchemaResult.IsObject() { if inputSchemaResult.Exists() && inputSchemaResult.IsObject() {
inputSchema := inputSchemaResult.Raw inputSchema := util.CleanJSONSchemaForGemini(inputSchemaResult.Raw)
tool := []byte(toolResult.Raw) tool := []byte(toolResult.Raw)
var err error var err error
tool, err = sjson.DeleteBytes(tool, "input_schema") tool, err = sjson.DeleteBytes(tool, "input_schema")
@@ -168,6 +190,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
tool, _ = sjson.DeleteBytes(tool, "type") tool, _ = sjson.DeleteBytes(tool, "type")
tool, _ = sjson.DeleteBytes(tool, "cache_control") tool, _ = sjson.DeleteBytes(tool, "cache_control")
tool, _ = sjson.DeleteBytes(tool, "defer_loading") tool, _ = sjson.DeleteBytes(tool, "defer_loading")
tool, _ = sjson.DeleteBytes(tool, "eager_input_streaming")
tool, _ = sjson.SetBytes(tool, "name", util.SanitizeFunctionName(gjson.GetBytes(tool, "name").String())) tool, _ = sjson.SetBytes(tool, "name", util.SanitizeFunctionName(gjson.GetBytes(tool, "name").String()))
if gjson.ValidBytes(tool) && gjson.ParseBytes(tool).IsObject() { if gjson.ValidBytes(tool) && gjson.ParseBytes(tool).IsObject() {
if !hasTools { if !hasTools {

View File

@@ -9,6 +9,7 @@ import (
"context" "context"
"fmt" "fmt"
"io" "io"
"net"
"net/http" "net/http"
"strings" "strings"
"time" "time"
@@ -49,7 +50,23 @@ func (h *GeminiCLIAPIHandler) Models() []map[string]any {
// CLIHandler handles CLI-specific requests for Gemini API operations. // CLIHandler handles CLI-specific requests for Gemini API operations.
// It restricts access to localhost only and routes requests to appropriate internal handlers. // It restricts access to localhost only and routes requests to appropriate internal handlers.
func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) { func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") { if h.Cfg == nil || !h.Cfg.EnableGeminiCLIEndpoint {
c.JSON(http.StatusForbidden, handlers.ErrorResponse{
Error: handlers.ErrorDetail{
Message: "Gemini CLI endpoint is disabled",
Type: "forbidden",
},
})
return
}
requestHost := c.Request.Host
requestHostname := requestHost
if hostname, _, errSplitHostPort := net.SplitHostPort(requestHost); errSplitHostPort == nil {
requestHostname = hostname
}
if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") || requestHostname != "127.0.0.1" {
c.JSON(http.StatusForbidden, handlers.ErrorResponse{ c.JSON(http.StatusForbidden, handlers.ErrorResponse{
Error: handlers.ErrorDetail{ Error: handlers.ErrorDetail{
Message: "CLI reply only allow local access", Message: "CLI reply only allow local access",

View File

@@ -379,7 +379,7 @@ func shouldReplaceWebsocketTranscript(rawJSON []byte, nextInput gjson.Result) bo
for _, item := range nextInput.Array() { for _, item := range nextInput.Array() {
switch strings.TrimSpace(item.Get("type").String()) { switch strings.TrimSpace(item.Get("type").String()) {
case "function_call": case "function_call", "custom_tool_call":
return true return true
case "message": case "message":
role := strings.TrimSpace(item.Get("role").String()) role := strings.TrimSpace(item.Get("role").String())
@@ -431,7 +431,7 @@ func dedupeFunctionCallsByCallID(rawArray string) (string, error) {
continue continue
} }
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String()) itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
if itemType == "function_call" { if isResponsesToolCallType(itemType) {
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String()) callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
if callID != "" { if callID != "" {
if _, ok := seenCallIDs[callID]; ok { if _, ok := seenCallIDs[callID]; ok {

View File

@@ -520,6 +520,92 @@ func TestRepairResponsesWebsocketToolCallsDropsOrphanOutputWhenCallMissing(t *te
} }
} }
func TestRepairResponsesWebsocketToolCallsInsertsCachedCustomToolOutput(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
cacheWarm := []byte(`{"previous_response_id":"resp-1","input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"}]}`)
warmed := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, cacheWarm)
if gjson.GetBytes(warmed, "input.0.call_id").String() != "call-1" {
t.Fatalf("expected warmup output to remain")
}
raw := []byte(`{"input":[{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"},{"type":"message","id":"msg-1"}]}`)
repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw)
input := gjson.GetBytes(repaired, "input").Array()
if len(input) != 3 {
t.Fatalf("repaired input len = %d, want 3", len(input))
}
if input[0].Get("type").String() != "custom_tool_call" || input[0].Get("call_id").String() != "call-1" {
t.Fatalf("unexpected first item: %s", input[0].Raw)
}
if input[1].Get("type").String() != "custom_tool_call_output" || input[1].Get("call_id").String() != "call-1" {
t.Fatalf("missing inserted output: %s", input[1].Raw)
}
if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" {
t.Fatalf("unexpected trailing item: %s", input[2].Raw)
}
}
func TestRepairResponsesWebsocketToolCallsDropsOrphanCustomToolCall(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
raw := []byte(`{"input":[{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"},{"type":"message","id":"msg-1"}]}`)
repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw)
input := gjson.GetBytes(repaired, "input").Array()
if len(input) != 1 {
t.Fatalf("repaired input len = %d, want 1", len(input))
}
if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" {
t.Fatalf("unexpected remaining item: %s", input[0].Raw)
}
}
func TestRepairResponsesWebsocketToolCallsInsertsCachedCustomToolCallForOrphanOutput(t *testing.T) {
outputCache := newWebsocketToolOutputCache(time.Minute, 10)
callCache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
callCache.record(sessionKey, "call-1", []byte(`{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"}`))
raw := []byte(`{"input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`)
repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw)
input := gjson.GetBytes(repaired, "input").Array()
if len(input) != 3 {
t.Fatalf("repaired input len = %d, want 3", len(input))
}
if input[0].Get("type").String() != "custom_tool_call" || input[0].Get("call_id").String() != "call-1" {
t.Fatalf("missing inserted call: %s", input[0].Raw)
}
if input[1].Get("type").String() != "custom_tool_call_output" || input[1].Get("call_id").String() != "call-1" {
t.Fatalf("unexpected output item: %s", input[1].Raw)
}
if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" {
t.Fatalf("unexpected trailing item: %s", input[2].Raw)
}
}
func TestRepairResponsesWebsocketToolCallsDropsOrphanCustomToolOutputWhenCallMissing(t *testing.T) {
outputCache := newWebsocketToolOutputCache(time.Minute, 10)
callCache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
raw := []byte(`{"input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`)
repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw)
input := gjson.GetBytes(repaired, "input").Array()
if len(input) != 1 {
t.Fatalf("repaired input len = %d, want 1", len(input))
}
if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" {
t.Fatalf("unexpected remaining item: %s", input[0].Raw)
}
}
func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) { func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10) cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1" sessionKey := "session-1"
@@ -536,6 +622,38 @@ func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
} }
} }
func TestRecordResponsesWebsocketCustomToolCallsFromCompletedPayloadWithCache(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
payload := []byte(`{"type":"response.completed","response":{"id":"resp-1","output":[{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch","input":"*** Begin Patch"}]}}`)
recordResponsesWebsocketToolCallsFromPayloadWithCache(cache, sessionKey, payload)
cached, ok := cache.get(sessionKey, "call-1")
if !ok {
t.Fatalf("expected cached custom tool call")
}
if gjson.GetBytes(cached, "type").String() != "custom_tool_call" || gjson.GetBytes(cached, "call_id").String() != "call-1" {
t.Fatalf("unexpected cached custom tool call: %s", cached)
}
}
func TestRecordResponsesWebsocketCustomToolCallsFromOutputItemDoneWithCache(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
payload := []byte(`{"type":"response.output_item.done","item":{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch","input":"*** Begin Patch"}}`)
recordResponsesWebsocketToolCallsFromPayloadWithCache(cache, sessionKey, payload)
cached, ok := cache.get(sessionKey, "call-1")
if !ok {
t.Fatalf("expected cached custom tool call")
}
if gjson.GetBytes(cached, "type").String() != "custom_tool_call" || gjson.GetBytes(cached, "call_id").String() != "call-1" {
t.Fatalf("unexpected cached custom tool call: %s", cached)
}
}
func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) { func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) {
gin.SetMode(gin.TestMode) gin.SetMode(gin.TestMode)
@@ -1023,6 +1141,161 @@ func TestNormalizeResponsesWebsocketRequestDropsDuplicateFunctionCallsByCallID(t
} }
} }
func TestNormalizeResponsesWebsocketRequestTreatsCustomToolTranscriptReplacementAsReset(t *testing.T) {
lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"message","id":"msg-1"},{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-1","call_id":"call-1"},{"type":"message","id":"assistant-1","role":"assistant"}]}`)
lastResponseOutput := []byte(`[
{"type":"message","id":"assistant-1","role":"assistant"}
]`)
raw := []byte(`{"type":"response.create","input":[{"type":"custom_tool_call","id":"ctc-compact","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-compact","call_id":"call-1"},{"type":"message","id":"msg-2"}]}`)
normalized, next, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput)
if errMsg != nil {
t.Fatalf("unexpected error: %v", errMsg.Error)
}
if gjson.GetBytes(normalized, "previous_response_id").Exists() {
t.Fatalf("previous_response_id must not exist in transcript replacement mode")
}
items := gjson.GetBytes(normalized, "input").Array()
if len(items) != 3 {
t.Fatalf("replacement input len = %d, want 3: %s", len(items), normalized)
}
if items[0].Get("id").String() != "ctc-compact" ||
items[1].Get("id").String() != "tool-out-compact" ||
items[2].Get("id").String() != "msg-2" {
t.Fatalf("replacement transcript was not preserved as-is: %s", normalized)
}
if !bytes.Equal(next, normalized) {
t.Fatalf("next request snapshot should match replacement request")
}
}
func TestNormalizeResponsesWebsocketRequestDropsDuplicateCustomToolCallsByCallID(t *testing.T) {
lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-1","call_id":"call-1"}]}`)
lastResponseOutput := []byte(`[
{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"}
]`)
raw := []byte(`{"type":"response.create","input":[{"type":"message","id":"msg-2"}]}`)
normalized, _, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput)
if errMsg != nil {
t.Fatalf("unexpected error: %v", errMsg.Error)
}
items := gjson.GetBytes(normalized, "input").Array()
if len(items) != 3 {
t.Fatalf("merged input len = %d, want 3: %s", len(items), normalized)
}
if items[0].Get("id").String() != "ctc-1" ||
items[1].Get("id").String() != "tool-out-1" ||
items[2].Get("id").String() != "msg-2" {
t.Fatalf("unexpected merged input order: %s", normalized)
}
}
func TestResponsesWebsocketCompactionResetsTurnStateOnCustomToolTranscriptReplacement(t *testing.T) {
gin.SetMode(gin.TestMode)
executor := &websocketCompactionCaptureExecutor{}
manager := coreauth.NewManager(nil, nil, nil)
manager.RegisterExecutor(executor)
auth := &coreauth.Auth{ID: "auth-sse", Provider: executor.Identifier(), Status: coreauth.StatusActive}
if _, err := manager.Register(context.Background(), auth); err != nil {
t.Fatalf("Register auth: %v", err)
}
registry.GetGlobalRegistry().RegisterClient(auth.ID, auth.Provider, []*registry.ModelInfo{{ID: "test-model"}})
t.Cleanup(func() {
registry.GetGlobalRegistry().UnregisterClient(auth.ID)
})
base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager)
h := NewOpenAIResponsesAPIHandler(base)
router := gin.New()
router.GET("/v1/responses/ws", h.ResponsesWebsocket)
router.POST("/v1/responses/compact", h.Compact)
server := httptest.NewServer(router)
defer server.Close()
wsURL := "ws" + strings.TrimPrefix(server.URL, "http") + "/v1/responses/ws"
conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
if err != nil {
t.Fatalf("dial websocket: %v", err)
}
defer func() {
if errClose := conn.Close(); errClose != nil {
t.Fatalf("close websocket: %v", errClose)
}
}()
requests := []string{
`{"type":"response.create","model":"test-model","input":[{"type":"message","id":"msg-1"}]}`,
`{"type":"response.create","input":[{"type":"custom_tool_call_output","call_id":"call-1","id":"tool-out-1"}]}`,
}
for i := range requests {
if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(requests[i])); errWrite != nil {
t.Fatalf("write websocket message %d: %v", i+1, errWrite)
}
_, payload, errReadMessage := conn.ReadMessage()
if errReadMessage != nil {
t.Fatalf("read websocket message %d: %v", i+1, errReadMessage)
}
if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted {
t.Fatalf("message %d payload type = %s, want %s", i+1, got, wsEventTypeCompleted)
}
}
compactResp, errPost := server.Client().Post(
server.URL+"/v1/responses/compact",
"application/json",
strings.NewReader(`{"model":"test-model","input":[{"type":"message","id":"summary-1"}]}`),
)
if errPost != nil {
t.Fatalf("compact request failed: %v", errPost)
}
if errClose := compactResp.Body.Close(); errClose != nil {
t.Fatalf("close compact response body: %v", errClose)
}
if compactResp.StatusCode != http.StatusOK {
t.Fatalf("compact status = %d, want %d", compactResp.StatusCode, http.StatusOK)
}
postCompact := `{"type":"response.create","input":[{"type":"custom_tool_call","id":"ctc-compact","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-compact","call_id":"call-1"},{"type":"message","id":"msg-2"}]}`
if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(postCompact)); errWrite != nil {
t.Fatalf("write post-compact websocket message: %v", errWrite)
}
_, payload, errReadMessage := conn.ReadMessage()
if errReadMessage != nil {
t.Fatalf("read post-compact websocket message: %v", errReadMessage)
}
if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted {
t.Fatalf("post-compact payload type = %s, want %s", got, wsEventTypeCompleted)
}
executor.mu.Lock()
defer executor.mu.Unlock()
if executor.compactPayload == nil {
t.Fatalf("compact payload was not captured")
}
if len(executor.streamPayloads) != 3 {
t.Fatalf("stream payload count = %d, want 3", len(executor.streamPayloads))
}
merged := executor.streamPayloads[2]
items := gjson.GetBytes(merged, "input").Array()
if len(items) != 3 {
t.Fatalf("merged input len = %d, want 3: %s", len(items), merged)
}
if items[0].Get("id").String() != "ctc-compact" ||
items[1].Get("id").String() != "tool-out-compact" ||
items[2].Get("id").String() != "msg-2" {
t.Fatalf("unexpected post-compact input order: %s", merged)
}
if items[0].Get("call_id").String() != "call-1" {
t.Fatalf("post-compact custom tool call id = %s, want call-1", items[0].Get("call_id").String())
}
}
func TestResponsesWebsocketCompactionResetsTurnStateOnTranscriptReplacement(t *testing.T) { func TestResponsesWebsocketCompactionResetsTurnStateOnTranscriptReplacement(t *testing.T) {
gin.SetMode(gin.TestMode) gin.SetMode(gin.TestMode)

View File

@@ -266,15 +266,15 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
continue continue
} }
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String()) itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
switch itemType { switch {
case "function_call_output": case isResponsesToolCallOutputType(itemType):
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String()) callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
if callID == "" { if callID == "" {
continue continue
} }
outputPresent[callID] = struct{}{} outputPresent[callID] = struct{}{}
outputCache.record(sessionKey, callID, item) outputCache.record(sessionKey, callID, item)
case "function_call": case isResponsesToolCallType(itemType):
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String()) callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
if callID == "" { if callID == "" {
continue continue
@@ -293,7 +293,7 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
continue continue
} }
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String()) itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
if itemType == "function_call_output" { if isResponsesToolCallOutputType(itemType) {
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String()) callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
if callID == "" { if callID == "" {
// Upstream rejects tool outputs without a call_id; drop it. // Upstream rejects tool outputs without a call_id; drop it.
@@ -325,7 +325,7 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
// Drop orphaned function_call_output items; upstream rejects transcripts with missing calls. // Drop orphaned function_call_output items; upstream rejects transcripts with missing calls.
continue continue
} }
if itemType != "function_call" { if !isResponsesToolCallType(itemType) {
filtered = append(filtered, item) filtered = append(filtered, item)
continue continue
} }
@@ -376,7 +376,7 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
return return
} }
for _, item := range output.Array() { for _, item := range output.Array() {
if strings.TrimSpace(item.Get("type").String()) != "function_call" { if !isResponsesToolCallType(item.Get("type").String()) {
continue continue
} }
callID := strings.TrimSpace(item.Get("call_id").String()) callID := strings.TrimSpace(item.Get("call_id").String())
@@ -390,7 +390,7 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
if !item.Exists() || !item.IsObject() { if !item.Exists() || !item.IsObject() {
return return
} }
if strings.TrimSpace(item.Get("type").String()) != "function_call" { if !isResponsesToolCallType(item.Get("type").String()) {
return return
} }
callID := strings.TrimSpace(item.Get("call_id").String()) callID := strings.TrimSpace(item.Get("call_id").String())
@@ -400,3 +400,21 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
cache.record(sessionKey, callID, json.RawMessage(item.Raw)) cache.record(sessionKey, callID, json.RawMessage(item.Raw))
} }
} }
func isResponsesToolCallType(itemType string) bool {
switch strings.TrimSpace(itemType) {
case "function_call", "custom_tool_call":
return true
default:
return false
}
}
func isResponsesToolCallOutputType(itemType string) bool {
switch strings.TrimSpace(itemType) {
case "function_call_output", "custom_tool_call_output":
return true
default:
return false
}
}