mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-04-04 19:51:18 +00:00
Compare commits
45 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7223fee2de | ||
|
|
ada8e2905e | ||
|
|
4ba10531da | ||
|
|
3774b56e9f | ||
|
|
c2d4137fb9 | ||
|
|
2ee938acaf | ||
|
|
8d5e470e1f | ||
|
|
3882494878 | ||
|
|
088c1d07f4 | ||
|
|
8430b28cfa | ||
|
|
f3ab8f4bc5 | ||
|
|
0e4f189c2e | ||
|
|
98509f615c | ||
|
|
e7a66ae504 | ||
|
|
754b126944 | ||
|
|
ae37ccffbf | ||
|
|
42c062bb5b | ||
|
|
87bf0b73d5 | ||
|
|
f389667ec3 | ||
|
|
29dba0399b | ||
|
|
a824e7cd0b | ||
|
|
140faef7dc | ||
|
|
adb580b344 | ||
|
|
06405f2129 | ||
|
|
b849bf79d6 | ||
|
|
59af2c57b1 | ||
|
|
d1fd2c4ad4 | ||
|
|
b6c6379bfa | ||
|
|
8f0e66b72e | ||
|
|
f63cf6ff7a | ||
|
|
d2419ed49d | ||
|
|
516d22c695 | ||
|
|
73cda6e836 | ||
|
|
0805989ee5 | ||
|
|
75da02af55 | ||
|
|
ab9ebea592 | ||
|
|
7ee37ee4b9 | ||
|
|
837afffb31 | ||
|
|
f5e9f01811 | ||
|
|
ff7dbb5867 | ||
|
|
e34b2b4f1d | ||
|
|
36efcc6e28 | ||
|
|
a337ecf35c | ||
|
|
14cb2b95c6 | ||
|
|
fdeef48498 |
6
.gitignore
vendored
6
.gitignore
vendored
@@ -54,4 +54,10 @@ _bmad-output/*
|
|||||||
# macOS
|
# macOS
|
||||||
.DS_Store
|
.DS_Store
|
||||||
._*
|
._*
|
||||||
|
|
||||||
|
# Opencode
|
||||||
|
.beads/
|
||||||
|
.opencode/
|
||||||
|
.cli-proxy-api/
|
||||||
|
.venv/
|
||||||
*.bak
|
*.bak
|
||||||
|
|||||||
93
README.md
93
README.md
@@ -14,99 +14,6 @@ This project only accepts pull requests that relate to third-party provider supp
|
|||||||
|
|
||||||
If you need to submit any non-third-party provider changes, please open them against the [mainline](https://github.com/router-for-me/CLIProxyAPI) repository.
|
If you need to submit any non-third-party provider changes, please open them against the [mainline](https://github.com/router-for-me/CLIProxyAPI) repository.
|
||||||
|
|
||||||
1. Fork the repository
|
|
||||||
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
|
||||||
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
|
||||||
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
||||||
5. Open a Pull Request
|
|
||||||
|
|
||||||
## Who is with us?
|
|
||||||
|
|
||||||
Those projects are based on CLIProxyAPI:
|
|
||||||
|
|
||||||
### [vibeproxy](https://github.com/automazeio/vibeproxy)
|
|
||||||
|
|
||||||
Native macOS menu bar app to use your Claude Code & ChatGPT subscriptions with AI coding tools - no API keys needed
|
|
||||||
|
|
||||||
### [Subtitle Translator](https://github.com/VjayC/SRT-Subtitle-Translator-Validator)
|
|
||||||
|
|
||||||
Browser-based tool to translate SRT subtitles using your Gemini subscription via CLIProxyAPI with automatic validation/error correction - no API keys needed
|
|
||||||
|
|
||||||
### [CCS (Claude Code Switch)](https://github.com/kaitranntt/ccs)
|
|
||||||
|
|
||||||
CLI wrapper for instant switching between multiple Claude accounts and alternative models (Gemini, Codex, Antigravity) via CLIProxyAPI OAuth - no API keys needed
|
|
||||||
|
|
||||||
### [Quotio](https://github.com/nguyenphutrong/quotio)
|
|
||||||
|
|
||||||
Native macOS menu bar app that unifies Claude, Gemini, OpenAI, Qwen, and Antigravity subscriptions with real-time quota tracking and smart auto-failover for AI coding tools like Claude Code, OpenCode, and Droid - no API keys needed.
|
|
||||||
|
|
||||||
### [CodMate](https://github.com/loocor/CodMate)
|
|
||||||
|
|
||||||
Native macOS SwiftUI app for managing CLI AI sessions (Codex, Claude Code, Gemini CLI) with unified provider management, Git review, project organization, global search, and terminal integration. Integrates CLIProxyAPI to provide OAuth authentication for Codex, Claude, Gemini, Antigravity, and Qwen Code, with built-in and third-party provider rerouting through a single proxy endpoint - no API keys needed for OAuth providers.
|
|
||||||
|
|
||||||
### [ProxyPilot](https://github.com/Finesssee/ProxyPilot)
|
|
||||||
|
|
||||||
Windows-native CLIProxyAPI fork with TUI, system tray, and multi-provider OAuth for AI coding tools - no API keys needed.
|
|
||||||
|
|
||||||
### [Claude Proxy VSCode](https://github.com/uzhao/claude-proxy-vscode)
|
|
||||||
|
|
||||||
VSCode extension for quick switching between Claude Code models, featuring integrated CLIProxyAPI as its backend with automatic background lifecycle management.
|
|
||||||
|
|
||||||
### [ZeroLimit](https://github.com/0xtbug/zero-limit)
|
|
||||||
|
|
||||||
Windows desktop app built with Tauri + React for monitoring AI coding assistant quotas via CLIProxyAPI. Track usage across Gemini, Claude, OpenAI Codex, and Antigravity accounts with real-time dashboard, system tray integration, and one-click proxy control - no API keys needed.
|
|
||||||
|
|
||||||
### [CPA-XXX Panel](https://github.com/ferretgeek/CPA-X)
|
|
||||||
|
|
||||||
A lightweight web admin panel for CLIProxyAPI with health checks, resource monitoring, real-time logs, auto-update, request statistics and pricing display. Supports one-click installation and systemd service.
|
|
||||||
|
|
||||||
### [CLIProxyAPI Tray](https://github.com/kitephp/CLIProxyAPI_Tray)
|
|
||||||
|
|
||||||
A Windows tray application implemented using PowerShell scripts, without relying on any third-party libraries. The main features include: automatic creation of shortcuts, silent running, password management, channel switching (Main / Plus), and automatic downloading and updating.
|
|
||||||
|
|
||||||
### [霖君](https://github.com/wangdabaoqq/LinJun)
|
|
||||||
|
|
||||||
霖君 is a cross-platform desktop application for managing AI programming assistants, supporting macOS, Windows, and Linux systems. Unified management of Claude Code, Gemini CLI, OpenAI Codex, Qwen Code, and other AI coding tools, with local proxy for multi-account quota tracking and one-click configuration.
|
|
||||||
|
|
||||||
### [CLIProxyAPI Dashboard](https://github.com/itsmylife44/cliproxyapi-dashboard)
|
|
||||||
|
|
||||||
A modern web-based management dashboard for CLIProxyAPI built with Next.js, React, and PostgreSQL. Features real-time log streaming, structured configuration editing, API key management, OAuth provider integration for Claude/Gemini/Codex, usage analytics, container management, and config sync with OpenCode via companion plugin - no manual YAML editing needed.
|
|
||||||
|
|
||||||
### [All API Hub](https://github.com/qixing-jk/all-api-hub)
|
|
||||||
|
|
||||||
Browser extension for one-stop management of New API-compatible relay site accounts, featuring balance and usage dashboards, auto check-in, one-click key export to common apps, in-page API availability testing, and channel/model sync and redirection. It integrates with CLIProxyAPI through the Management API for one-click provider import and config sync.
|
|
||||||
|
|
||||||
### [Shadow AI](https://github.com/HEUDavid/shadow-ai)
|
|
||||||
|
|
||||||
Shadow AI is an AI assistant tool designed specifically for restricted environments. It provides a stealthy operation
|
|
||||||
mode without windows or traces, and enables cross-device AI Q&A interaction and control via the local area network (
|
|
||||||
LAN). Essentially, it is an automated collaboration layer of "screen/audio capture + AI inference + low-friction delivery",
|
|
||||||
helping users to immersively use AI assistants across applications on controlled devices or in restricted environments.
|
|
||||||
|
|
||||||
### [ProxyPal](https://github.com/buddingnewinsights/proxypal)
|
|
||||||
|
|
||||||
Cross-platform desktop app (macOS, Windows, Linux) wrapping CLIProxyAPI with a native GUI. Connects Claude, ChatGPT, Gemini, GitHub Copilot, Qwen, iFlow, and custom OpenAI-compatible endpoints with usage analytics, request monitoring, and auto-configuration for popular coding tools - no API keys needed.
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> If you developed a project based on CLIProxyAPI, please open a PR to add it to this list.
|
|
||||||
|
|
||||||
## More choices
|
|
||||||
|
|
||||||
Those projects are ports of CLIProxyAPI or inspired by it:
|
|
||||||
|
|
||||||
### [9Router](https://github.com/decolua/9router)
|
|
||||||
|
|
||||||
A Next.js implementation inspired by CLIProxyAPI, easy to install and use, built from scratch with format translation (OpenAI/Claude/Gemini/Ollama), combo system with auto-fallback, multi-account management with exponential backoff, a Next.js web dashboard, and support for CLI tools (Cursor, Claude Code, Cline, RooCode) - no API keys needed.
|
|
||||||
|
|
||||||
### [OmniRoute](https://github.com/diegosouzapw/OmniRoute)
|
|
||||||
|
|
||||||
Never stop coding. Smart routing to FREE & low-cost AI models with automatic fallback.
|
|
||||||
|
|
||||||
OmniRoute is an AI gateway for multi-provider LLMs: an OpenAI-compatible endpoint with smart routing, load balancing, retries, and fallbacks. Add policies, rate limits, caching, and observability for reliable, cost-aware inference.
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> If you have developed a port of CLIProxyAPI or a project inspired by it, please open a PR to add it to this list.
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
||||||
|
|||||||
92
README_CN.md
92
README_CN.md
@@ -1,6 +1,6 @@
|
|||||||
# CLIProxyAPI Plus
|
# CLIProxyAPI Plus
|
||||||
|
|
||||||
[English](README.md) | 中文 | [日本語](README_JA.md)
|
[English](README.md) | 中文
|
||||||
|
|
||||||
这是 [CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI) 的 Plus 版本,在原有基础上增加了第三方供应商的支持。
|
这是 [CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI) 的 Plus 版本,在原有基础上增加了第三方供应商的支持。
|
||||||
|
|
||||||
@@ -12,96 +12,6 @@
|
|||||||
|
|
||||||
如果需要提交任何非第三方供应商支持的 Pull Request,请提交到[主线](https://github.com/router-for-me/CLIProxyAPI)版本。
|
如果需要提交任何非第三方供应商支持的 Pull Request,请提交到[主线](https://github.com/router-for-me/CLIProxyAPI)版本。
|
||||||
|
|
||||||
1. Fork 仓库
|
|
||||||
2. 创建您的功能分支(`git checkout -b feature/amazing-feature`)
|
|
||||||
3. 提交您的更改(`git commit -m 'Add some amazing feature'`)
|
|
||||||
4. 推送到分支(`git push origin feature/amazing-feature`)
|
|
||||||
5. 打开 Pull Request
|
|
||||||
|
|
||||||
## 谁与我们在一起?
|
|
||||||
|
|
||||||
这些项目基于 CLIProxyAPI:
|
|
||||||
|
|
||||||
### [vibeproxy](https://github.com/automazeio/vibeproxy)
|
|
||||||
|
|
||||||
一个原生 macOS 菜单栏应用,让您可以使用 Claude Code & ChatGPT 订阅服务和 AI 编程工具,无需 API 密钥。
|
|
||||||
|
|
||||||
### [Subtitle Translator](https://github.com/VjayC/SRT-Subtitle-Translator-Validator)
|
|
||||||
|
|
||||||
一款基于浏览器的 SRT 字幕翻译工具,可通过 CLI 代理 API 使用您的 Gemini 订阅。内置自动验证与错误修正功能,无需 API 密钥。
|
|
||||||
|
|
||||||
### [CCS (Claude Code Switch)](https://github.com/kaitranntt/ccs)
|
|
||||||
|
|
||||||
CLI 封装器,用于通过 CLIProxyAPI OAuth 即时切换多个 Claude 账户和替代模型(Gemini, Codex, Antigravity),无需 API 密钥。
|
|
||||||
|
|
||||||
### [Quotio](https://github.com/nguyenphutrong/quotio)
|
|
||||||
|
|
||||||
原生 macOS 菜单栏应用,统一管理 Claude、Gemini、OpenAI、Qwen 和 Antigravity 订阅,提供实时配额追踪和智能自动故障转移,支持 Claude Code、OpenCode 和 Droid 等 AI 编程工具,无需 API 密钥。
|
|
||||||
|
|
||||||
### [CodMate](https://github.com/loocor/CodMate)
|
|
||||||
|
|
||||||
原生 macOS SwiftUI 应用,用于管理 CLI AI 会话(Claude Code、Codex、Gemini CLI),提供统一的提供商管理、Git 审查、项目组织、全局搜索和终端集成。集成 CLIProxyAPI 为 Codex、Claude、Gemini、Antigravity 和 Qwen Code 提供统一的 OAuth 认证,支持内置和第三方提供商通过单一代理端点重路由 - OAuth 提供商无需 API 密钥。
|
|
||||||
|
|
||||||
### [ProxyPilot](https://github.com/Finesssee/ProxyPilot)
|
|
||||||
|
|
||||||
原生 Windows CLIProxyAPI 分支,集成 TUI、系统托盘及多服务商 OAuth 认证,专为 AI 编程工具打造,无需 API 密钥。
|
|
||||||
|
|
||||||
### [Claude Proxy VSCode](https://github.com/uzhao/claude-proxy-vscode)
|
|
||||||
|
|
||||||
一款 VSCode 扩展,提供了在 VSCode 中快速切换 Claude Code 模型的功能,内置 CLIProxyAPI 作为其后端,支持后台自动启动和关闭。
|
|
||||||
|
|
||||||
### [ZeroLimit](https://github.com/0xtbug/zero-limit)
|
|
||||||
|
|
||||||
Windows 桌面应用,基于 Tauri + React 构建,用于通过 CLIProxyAPI 监控 AI 编程助手配额。支持跨 Gemini、Claude、OpenAI Codex 和 Antigravity 账户的使用量追踪,提供实时仪表盘、系统托盘集成和一键代理控制,无需 API 密钥。
|
|
||||||
|
|
||||||
### [CPA-XXX Panel](https://github.com/ferretgeek/CPA-X)
|
|
||||||
|
|
||||||
面向 CLIProxyAPI 的 Web 管理面板,提供健康检查、资源监控、日志查看、自动更新、请求统计与定价展示,支持一键安装与 systemd 服务。
|
|
||||||
|
|
||||||
### [CLIProxyAPI Tray](https://github.com/kitephp/CLIProxyAPI_Tray)
|
|
||||||
|
|
||||||
Windows 托盘应用,基于 PowerShell 脚本实现,不依赖任何第三方库。主要功能包括:自动创建快捷方式、静默运行、密码管理、通道切换(Main / Plus)以及自动下载与更新。
|
|
||||||
|
|
||||||
### [霖君](https://github.com/wangdabaoqq/LinJun)
|
|
||||||
|
|
||||||
霖君是一款用于管理AI编程助手的跨平台桌面应用,支持macOS、Windows、Linux系统。统一管理Claude Code、Gemini CLI、OpenAI Codex、Qwen Code等AI编程工具,本地代理实现多账户配额跟踪和一键配置。
|
|
||||||
|
|
||||||
### [CLIProxyAPI Dashboard](https://github.com/itsmylife44/cliproxyapi-dashboard)
|
|
||||||
|
|
||||||
一个面向 CLIProxyAPI 的现代化 Web 管理仪表盘,基于 Next.js、React 和 PostgreSQL 构建。支持实时日志流、结构化配置编辑、API Key 管理、Claude/Gemini/Codex 的 OAuth 提供方集成、使用量分析、容器管理,并可通过配套插件与 OpenCode 同步配置,无需手动编辑 YAML。
|
|
||||||
|
|
||||||
### [All API Hub](https://github.com/qixing-jk/all-api-hub)
|
|
||||||
|
|
||||||
用于一站式管理 New API 兼容中转站账号的浏览器扩展,提供余额与用量看板、自动签到、密钥一键导出到常用应用、网页内 API 可用性测试,以及渠道与模型同步和重定向。支持通过 CLIProxyAPI Management API 一键导入 Provider 与同步配置。
|
|
||||||
|
|
||||||
### [Shadow AI](https://github.com/HEUDavid/shadow-ai)
|
|
||||||
|
|
||||||
Shadow AI 是一款专为受限环境设计的 AI 辅助工具。提供无窗口、无痕迹的隐蔽运行方式,并通过局域网实现跨设备的 AI 问答交互与控制。本质上是一个「屏幕/音频采集 + AI 推理 + 低摩擦投送」的自动化协作层,帮助用户在受控设备/受限环境下沉浸式跨应用地使用 AI 助手。
|
|
||||||
|
|
||||||
### [ProxyPal](https://github.com/buddingnewinsights/proxypal)
|
|
||||||
|
|
||||||
跨平台桌面应用(macOS、Windows、Linux),以原生 GUI 封装 CLIProxyAPI。支持连接 Claude、ChatGPT、Gemini、GitHub Copilot、Qwen、iFlow 及自定义 OpenAI 兼容端点,具备使用分析、请求监控和热门编程工具自动配置功能,无需 API 密钥。
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> 如果你开发了基于 CLIProxyAPI 的项目,请提交一个 PR(拉取请求)将其添加到此列表中。
|
|
||||||
|
|
||||||
## 更多选择
|
|
||||||
|
|
||||||
以下项目是 CLIProxyAPI 的移植版或受其启发:
|
|
||||||
|
|
||||||
### [9Router](https://github.com/decolua/9router)
|
|
||||||
|
|
||||||
基于 Next.js 的实现,灵感来自 CLIProxyAPI,易于安装使用;自研格式转换(OpenAI/Claude/Gemini/Ollama)、组合系统与自动回退、多账户管理(指数退避)、Next.js Web 控制台,并支持 Cursor、Claude Code、Cline、RooCode 等 CLI 工具,无需 API 密钥。
|
|
||||||
|
|
||||||
### [OmniRoute](https://github.com/diegosouzapw/OmniRoute)
|
|
||||||
|
|
||||||
代码不止,创新不停。智能路由至免费及低成本 AI 模型,并支持自动故障转移。
|
|
||||||
|
|
||||||
OmniRoute 是一个面向多供应商大语言模型的 AI 网关:它提供兼容 OpenAI 的端点,具备智能路由、负载均衡、重试及回退机制。通过添加策略、速率限制、缓存和可观测性,确保推理过程既可靠又具备成本意识。
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> 如果你开发了 CLIProxyAPI 的移植或衍生项目,请提交 PR 将其添加到此列表中。
|
|
||||||
|
|
||||||
## 许可证
|
## 许可证
|
||||||
|
|
||||||
此项目根据 MIT 许可证授权 - 有关详细信息,请参阅 [LICENSE](LICENSE) 文件。
|
此项目根据 MIT 许可证授权 - 有关详细信息,请参阅 [LICENSE](LICENSE) 文件。
|
||||||
|
|||||||
199
README_JA.md
199
README_JA.md
@@ -1,199 +0,0 @@
|
|||||||
# CLI Proxy API
|
|
||||||
|
|
||||||
[English](README.md) | [中文](README_CN.md) | 日本語
|
|
||||||
|
|
||||||
CLI向けのOpenAI/Gemini/Claude/Codex互換APIインターフェースを提供するプロキシサーバーです。
|
|
||||||
|
|
||||||
OAuth経由でOpenAI Codex(GPTモデル)およびClaude Codeもサポートしています。
|
|
||||||
|
|
||||||
ローカルまたはマルチアカウントのCLIアクセスを、OpenAI(Responses含む)/Gemini/Claude互換のクライアントやSDKで利用できます。
|
|
||||||
|
|
||||||
## スポンサー
|
|
||||||
|
|
||||||
[](https://z.ai/subscribe?ic=8JVLJQFSKB)
|
|
||||||
|
|
||||||
本プロジェクトはZ.aiにスポンサーされており、GLM CODING PLANの提供を受けています。
|
|
||||||
|
|
||||||
GLM CODING PLANはAIコーディング向けに設計されたサブスクリプションサービスで、月額わずか$10から利用可能です。フラッグシップのGLM-4.7および(GLM-5はProユーザーのみ利用可能)モデルを10以上の人気AIコーディングツール(Claude Code、Cline、Roo Codeなど)で利用でき、開発者にトップクラスの高速かつ安定したコーディング体験を提供します。
|
|
||||||
|
|
||||||
GLM CODING PLANを10%割引で取得:https://z.ai/subscribe?ic=8JVLJQFSKB
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
<table>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td width="180"><a href="https://www.packyapi.com/register?aff=cliproxyapi"><img src="./assets/packycode.png" alt="PackyCode" width="150"></a></td>
|
|
||||||
<td>PackyCodeのスポンサーシップに感謝します!PackyCodeは信頼性が高く効率的なAPIリレーサービスプロバイダーで、Claude Code、Codex、Geminiなどのリレーサービスを提供しています。PackyCodeは当ソフトウェアのユーザーに特別割引を提供しています:<a href="https://www.packyapi.com/register?aff=cliproxyapi">こちらのリンク</a>から登録し、チャージ時にプロモーションコード「cliproxyapi」を入力すると10%割引になります。</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td width="180"><a href="https://www.aicodemirror.com/register?invitecode=TJNAIF"><img src="./assets/aicodemirror.png" alt="AICodeMirror" width="150"></a></td>
|
|
||||||
<td>AICodeMirrorのスポンサーシップに感謝します!AICodeMirrorはClaude Code / Codex / Gemini CLI向けの公式高安定性リレーサービスを提供しており、エンタープライズグレードの同時接続、迅速な請求書発行、24時間365日の専任技術サポートを備えています。Claude Code / Codex / Geminiの公式チャネルが元の価格の38% / 2% / 9%で利用でき、チャージ時にはさらに割引があります!CLIProxyAPIユーザー向けの特別特典:<a href="https://www.aicodemirror.com/register?invitecode=TJNAIF">こちらのリンク</a>から登録すると、初回チャージが20%割引になり、エンタープライズのお客様は最大25%割引を受けられます!</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td width="180"><a href="https://shop.bmoplus.com/?utm_source=github"><img src="./assets/bmoplus.png" alt="BmoPlus" width="150"></a></td>
|
|
||||||
<td>本プロジェクトにご支援いただいた BmoPlus に感謝いたします!BmoPlusは、AIサブスクリプションのヘビーユーザー向けに特化した信頼性の高いAIアカウントサービスプロバイダーであり、安定した ChatGPT Plus / ChatGPT Pro (完全保証) / Claude Pro / Super Grok / Gemini Pro の公式代行チャージおよび即納アカウントを提供しています。こちらの<a href="https://shop.bmoplus.com/?utm_source=github">BmoPlus AIアカウント専門店/代行チャージ</a>経由でご登録・ご注文いただいたユーザー様は、GPTを <b>公式サイト価格の約1割(90% OFF)</b> という驚異的な価格でご利用いただけます!</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td width="180"><a href="https://www.lingtrue.com/register"><img src="./assets/lingtrue.png" alt="LingtrueAPI" width="150"></a></td>
|
|
||||||
<td>LingtrueAPIのスポンサーシップに感謝します!LingtrueAPIはグローバルな大規模モデルAPIリレーサービスプラットフォームで、Claude Code、Codex、GeminiなどのトップモデルAPI呼び出しサービスを提供し、ユーザーが低コストかつ高い安定性で世界中のAI能力に接続できるよう支援しています。LingtrueAPIは本ソフトウェアのユーザーに特別割引を提供しています:<a href="https://www.lingtrue.com/register">こちらのリンク</a>から登録し、初回チャージ時にプロモーションコード「LingtrueAPI」を入力すると10%割引になります。</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
## 概要
|
|
||||||
|
|
||||||
- CLIモデル向けのOpenAI/Gemini/Claude互換APIエンドポイント
|
|
||||||
- OAuthログインによるOpenAI Codexサポート(GPTモデル)
|
|
||||||
- OAuthログインによるClaude Codeサポート
|
|
||||||
- OAuthログインによるQwen Codeサポート
|
|
||||||
- OAuthログインによるiFlowサポート
|
|
||||||
- プロバイダールーティングによるAmp CLIおよびIDE拡張機能のサポート
|
|
||||||
- ストリーミングおよび非ストリーミングレスポンス
|
|
||||||
- 関数呼び出し/ツールのサポート
|
|
||||||
- マルチモーダル入力サポート(テキストと画像)
|
|
||||||
- ラウンドロビン負荷分散による複数アカウント対応(Gemini、OpenAI、Claude、QwenおよびiFlow)
|
|
||||||
- シンプルなCLI認証フロー(Gemini、OpenAI、Claude、QwenおよびiFlow)
|
|
||||||
- Generative Language APIキーのサポート
|
|
||||||
- AI Studioビルドのマルチアカウント負荷分散
|
|
||||||
- Gemini CLIのマルチアカウント負荷分散
|
|
||||||
- Claude Codeのマルチアカウント負荷分散
|
|
||||||
- Qwen Codeのマルチアカウント負荷分散
|
|
||||||
- iFlowのマルチアカウント負荷分散
|
|
||||||
- OpenAI Codexのマルチアカウント負荷分散
|
|
||||||
- 設定によるOpenAI互換アップストリームプロバイダー(例:OpenRouter)
|
|
||||||
- プロキシ埋め込み用の再利用可能なGo SDK(`docs/sdk-usage.md`を参照)
|
|
||||||
|
|
||||||
## はじめに
|
|
||||||
|
|
||||||
CLIProxyAPIガイド:[https://help.router-for.me/](https://help.router-for.me/)
|
|
||||||
|
|
||||||
## 管理API
|
|
||||||
|
|
||||||
[MANAGEMENT_API.md](https://help.router-for.me/management/api)を参照
|
|
||||||
|
|
||||||
## Amp CLIサポート
|
|
||||||
|
|
||||||
CLIProxyAPIは[Amp CLI](https://ampcode.com)およびAmp IDE拡張機能の統合サポートを含んでおり、Google/ChatGPT/ClaudeのOAuthサブスクリプションをAmpのコーディングツールで使用できます:
|
|
||||||
|
|
||||||
- Ampの APIパターン用のプロバイダールートエイリアス(`/api/provider/{provider}/v1...`)
|
|
||||||
- OAuth認証およびアカウント機能用の管理プロキシ
|
|
||||||
- 自動ルーティングによるスマートモデルフォールバック
|
|
||||||
- 利用できないモデルを代替モデルにルーティングする**モデルマッピング**(例:`claude-opus-4.5` → `claude-sonnet-4`)
|
|
||||||
- localhostのみの管理エンドポイントによるセキュリティファーストの設計
|
|
||||||
|
|
||||||
特定のバックエンド系統のリクエスト/レスポンス形状が必要な場合は、統合された `/v1/...` エンドポイントよりも provider-specific のパスを優先してください。
|
|
||||||
|
|
||||||
- messages 系のバックエンドには `/api/provider/{provider}/v1/messages`
|
|
||||||
- モデル単位の generate 系エンドポイントには `/api/provider/{provider}/v1beta/models/...`
|
|
||||||
- chat-completions 系のバックエンドには `/api/provider/{provider}/v1/chat/completions`
|
|
||||||
|
|
||||||
これらのパスはプロトコル面の選択には役立ちますが、同じクライアント向けモデル名が複数バックエンドで再利用されている場合、それだけで推論実行系が一意に固定されるわけではありません。実際の推論ルーティングは、引き続きリクエスト内の model/alias 解決に従います。厳密にバックエンドを固定したい場合は、一意な alias や prefix を使うか、クライアント向けモデル名の重複自体を避けてください。
|
|
||||||
|
|
||||||
**→ [Amp CLI統合ガイドの完全版](https://help.router-for.me/agent-client/amp-cli.html)**
|
|
||||||
|
|
||||||
## SDKドキュメント
|
|
||||||
|
|
||||||
- 使い方:[docs/sdk-usage.md](docs/sdk-usage.md)
|
|
||||||
- 上級(エグゼキューターとトランスレーター):[docs/sdk-advanced.md](docs/sdk-advanced.md)
|
|
||||||
- アクセス:[docs/sdk-access.md](docs/sdk-access.md)
|
|
||||||
- ウォッチャー:[docs/sdk-watcher.md](docs/sdk-watcher.md)
|
|
||||||
- カスタムプロバイダーの例:`examples/custom-provider`
|
|
||||||
|
|
||||||
## コントリビューション
|
|
||||||
|
|
||||||
コントリビューションを歓迎します!お気軽にPull Requestを送ってください。
|
|
||||||
|
|
||||||
1. リポジトリをフォーク
|
|
||||||
2. フィーチャーブランチを作成(`git checkout -b feature/amazing-feature`)
|
|
||||||
3. 変更をコミット(`git commit -m 'Add some amazing feature'`)
|
|
||||||
4. ブランチにプッシュ(`git push origin feature/amazing-feature`)
|
|
||||||
5. Pull Requestを作成
|
|
||||||
|
|
||||||
## 関連プロジェクト
|
|
||||||
|
|
||||||
CLIProxyAPIをベースにした以下のプロジェクトがあります:
|
|
||||||
|
|
||||||
### [vibeproxy](https://github.com/automazeio/vibeproxy)
|
|
||||||
|
|
||||||
macOSネイティブのメニューバーアプリで、Claude CodeとChatGPTのサブスクリプションをAIコーディングツールで使用可能 - APIキー不要
|
|
||||||
|
|
||||||
### [Subtitle Translator](https://github.com/VjayC/SRT-Subtitle-Translator-Validator)
|
|
||||||
|
|
||||||
CLIProxyAPI経由でGeminiサブスクリプションを使用してSRT字幕を翻訳するブラウザベースのツール。自動検証/エラー修正機能付き - APIキー不要
|
|
||||||
|
|
||||||
### [CCS (Claude Code Switch)](https://github.com/kaitranntt/ccs)
|
|
||||||
|
|
||||||
CLIProxyAPI OAuthを使用して複数のClaudeアカウントや代替モデル(Gemini、Codex、Antigravity)を即座に切り替えるCLIラッパー - APIキー不要
|
|
||||||
|
|
||||||
### [Quotio](https://github.com/nguyenphutrong/quotio)
|
|
||||||
|
|
||||||
Claude、Gemini、OpenAI、Qwen、Antigravityのサブスクリプションを統合し、リアルタイムのクォータ追跡とスマート自動フェイルオーバーを備えたmacOSネイティブのメニューバーアプリ。Claude Code、OpenCode、Droidなどのコーディングツール向け - APIキー不要
|
|
||||||
|
|
||||||
### [CodMate](https://github.com/loocor/CodMate)
|
|
||||||
|
|
||||||
CLI AIセッション(Codex、Claude Code、Gemini CLI)を管理するmacOS SwiftUIネイティブアプリ。統合プロバイダー管理、Gitレビュー、プロジェクト整理、グローバル検索、ターミナル統合機能を搭載。CLIProxyAPIと統合し、Codex、Claude、Gemini、Antigravity、Qwen CodeのOAuth認証を提供。単一のプロキシエンドポイントを通じた組み込みおよびサードパーティプロバイダーの再ルーティングに対応 - OAuthプロバイダーではAPIキー不要
|
|
||||||
|
|
||||||
### [ProxyPilot](https://github.com/Finesssee/ProxyPilot)
|
|
||||||
|
|
||||||
TUI、システムトレイ、マルチプロバイダーOAuthを備えたWindows向けCLIProxyAPIフォーク - AIコーディングツール用、APIキー不要
|
|
||||||
|
|
||||||
### [Claude Proxy VSCode](https://github.com/uzhao/claude-proxy-vscode)
|
|
||||||
|
|
||||||
Claude Codeモデルを素早く切り替えるVSCode拡張機能。バックエンドとしてCLIProxyAPIを統合し、バックグラウンドでの自動ライフサイクル管理を搭載
|
|
||||||
|
|
||||||
### [ZeroLimit](https://github.com/0xtbug/zero-limit)
|
|
||||||
|
|
||||||
CLIProxyAPIを使用してAIコーディングアシスタントのクォータを監視するTauri + React製のWindowsデスクトップアプリ。Gemini、Claude、OpenAI Codex、Antigravityアカウントの使用量をリアルタイムダッシュボード、システムトレイ統合、ワンクリックプロキシコントロールで追跡 - APIキー不要
|
|
||||||
|
|
||||||
### [CPA-XXX Panel](https://github.com/ferretgeek/CPA-X)
|
|
||||||
|
|
||||||
CLIProxyAPI向けの軽量Web管理パネル。ヘルスチェック、リソース監視、リアルタイムログ、自動更新、リクエスト統計、料金表示機能を搭載。ワンクリックインストールとsystemdサービスに対応
|
|
||||||
|
|
||||||
### [CLIProxyAPI Tray](https://github.com/kitephp/CLIProxyAPI_Tray)
|
|
||||||
|
|
||||||
PowerShellスクリプトで実装されたWindowsトレイアプリケーション。サードパーティライブラリに依存せず、ショートカットの自動作成、サイレント実行、パスワード管理、チャネル切り替え(Main / Plus)、自動ダウンロードおよび自動更新に対応
|
|
||||||
|
|
||||||
### [霖君](https://github.com/wangdabaoqq/LinJun)
|
|
||||||
|
|
||||||
霖君はAIプログラミングアシスタントを管理するクロスプラットフォームデスクトップアプリケーションで、macOS、Windows、Linuxシステムに対応。Claude Code、Gemini CLI、OpenAI Codex、Qwen Codeなどのコーディングツールを統合管理し、ローカルプロキシによるマルチアカウントクォータ追跡とワンクリック設定が可能
|
|
||||||
|
|
||||||
### [CLIProxyAPI Dashboard](https://github.com/itsmylife44/cliproxyapi-dashboard)
|
|
||||||
|
|
||||||
Next.js、React、PostgreSQLで構築されたCLIProxyAPI用のモダンなWebベース管理ダッシュボード。リアルタイムログストリーミング、構造化された設定編集、APIキー管理、Claude/Gemini/Codex向けOAuthプロバイダー統合、使用量分析、コンテナ管理、コンパニオンプラグインによるOpenCodeとの設定同期機能を搭載 - 手動でのYAML編集は不要
|
|
||||||
|
|
||||||
### [All API Hub](https://github.com/qixing-jk/all-api-hub)
|
|
||||||
|
|
||||||
New API互換リレーサイトアカウントをワンストップで管理するブラウザ拡張機能。残高と使用量のダッシュボード、自動チェックイン、一般的なアプリへのワンクリックキーエクスポート、ページ内API可用性テスト、チャネル/モデルの同期とリダイレクト機能を搭載。Management APIを通じてCLIProxyAPIと統合し、ワンクリックでプロバイダーのインポートと設定同期が可能
|
|
||||||
|
|
||||||
### [Shadow AI](https://github.com/HEUDavid/shadow-ai)
|
|
||||||
|
|
||||||
Shadow AIは制限された環境向けに特別に設計されたAIアシスタントツールです。ウィンドウや痕跡のないステルス動作モードを提供し、LAN(ローカルエリアネットワーク)を介したクロスデバイスAI質疑応答のインタラクションと制御を可能にします。本質的には「画面/音声キャプチャ + AI推論 + 低摩擦デリバリー」の自動化コラボレーションレイヤーであり、制御されたデバイスや制限された環境でアプリケーション横断的にAIアシスタントを没入的に使用できるようユーザーを支援します。
|
|
||||||
|
|
||||||
### [ProxyPal](https://github.com/buddingnewinsights/proxypal)
|
|
||||||
|
|
||||||
CLIProxyAPIをネイティブGUIでラップしたクロスプラットフォームデスクトップアプリ(macOS、Windows、Linux)。Claude、ChatGPT、Gemini、GitHub Copilot、Qwen、iFlow、カスタムOpenAI互換エンドポイントに対応し、使用状況分析、リクエスト監視、人気コーディングツールの自動設定機能を搭載 - APIキー不要
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> CLIProxyAPIをベースにプロジェクトを開発した場合は、PRを送ってこのリストに追加してください。
|
|
||||||
|
|
||||||
## その他の選択肢
|
|
||||||
|
|
||||||
以下のプロジェクトはCLIProxyAPIの移植版またはそれに触発されたものです:
|
|
||||||
|
|
||||||
### [9Router](https://github.com/decolua/9router)
|
|
||||||
|
|
||||||
CLIProxyAPIに触発されたNext.js実装。インストールと使用が簡単で、フォーマット変換(OpenAI/Claude/Gemini/Ollama)、自動フォールバック付きコンボシステム、指数バックオフ付きマルチアカウント管理、Next.js Webダッシュボード、CLIツール(Cursor、Claude Code、Cline、RooCode)のサポートをゼロから構築 - APIキー不要
|
|
||||||
|
|
||||||
### [OmniRoute](https://github.com/diegosouzapw/OmniRoute)
|
|
||||||
|
|
||||||
コーディングを止めない。無料および低コストのAIモデルへのスマートルーティングと自動フォールバック。
|
|
||||||
|
|
||||||
OmniRouteはマルチプロバイダーLLM向けのAIゲートウェイです:スマートルーティング、負荷分散、リトライ、フォールバックを備えたOpenAI互換エンドポイント。ポリシー、レート制限、キャッシュ、可観測性を追加して、信頼性が高くコストを意識した推論を実現します。
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> CLIProxyAPIの移植版またはそれに触発されたプロジェクトを開発した場合は、PRを送ってこのリストに追加してください。
|
|
||||||
|
|
||||||
## ライセンス
|
|
||||||
|
|
||||||
本プロジェクトはMITライセンスの下でライセンスされています - 詳細は[LICENSE](LICENSE)ファイルを参照してください。
|
|
||||||
@@ -26,6 +26,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||||
sdkauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
|
sdkauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
|
||||||
coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
|
"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
|
||||||
@@ -188,7 +189,7 @@ func fetchModels(ctx context.Context, auth *coreauth.Auth) []modelEntry {
|
|||||||
httpReq.Close = true
|
httpReq.Close = true
|
||||||
httpReq.Header.Set("Content-Type", "application/json")
|
httpReq.Header.Set("Content-Type", "application/json")
|
||||||
httpReq.Header.Set("Authorization", "Bearer "+accessToken)
|
httpReq.Header.Set("Authorization", "Bearer "+accessToken)
|
||||||
httpReq.Header.Set("User-Agent", "antigravity/1.19.6 darwin/arm64")
|
httpReq.Header.Set("User-Agent", misc.AntigravityUserAgent())
|
||||||
|
|
||||||
httpClient := &http.Client{Timeout: 30 * time.Second}
|
httpClient := &http.Client{Timeout: 30 * time.Second}
|
||||||
if transport, _, errProxy := proxyutil.BuildHTTPTransport(auth.ProxyURL); errProxy == nil && transport != nil {
|
if transport, _, errProxy := proxyutil.BuildHTTPTransport(auth.ProxyURL); errProxy == nil && transport != nil {
|
||||||
|
|||||||
@@ -99,6 +99,7 @@ func main() {
|
|||||||
var codeBuddyLogin bool
|
var codeBuddyLogin bool
|
||||||
var projectID string
|
var projectID string
|
||||||
var vertexImport string
|
var vertexImport string
|
||||||
|
var vertexImportPrefix string
|
||||||
var configPath string
|
var configPath string
|
||||||
var password string
|
var password string
|
||||||
var tuiMode bool
|
var tuiMode bool
|
||||||
@@ -139,6 +140,7 @@ func main() {
|
|||||||
flag.StringVar(&projectID, "project_id", "", "Project ID (Gemini only, not required)")
|
flag.StringVar(&projectID, "project_id", "", "Project ID (Gemini only, not required)")
|
||||||
flag.StringVar(&configPath, "config", DefaultConfigPath, "Configure File Path")
|
flag.StringVar(&configPath, "config", DefaultConfigPath, "Configure File Path")
|
||||||
flag.StringVar(&vertexImport, "vertex-import", "", "Import Vertex service account key JSON file")
|
flag.StringVar(&vertexImport, "vertex-import", "", "Import Vertex service account key JSON file")
|
||||||
|
flag.StringVar(&vertexImportPrefix, "vertex-import-prefix", "", "Prefix for Vertex model namespacing (use with -vertex-import)")
|
||||||
flag.StringVar(&password, "password", "", "")
|
flag.StringVar(&password, "password", "", "")
|
||||||
flag.BoolVar(&tuiMode, "tui", false, "Start with terminal management UI")
|
flag.BoolVar(&tuiMode, "tui", false, "Start with terminal management UI")
|
||||||
flag.BoolVar(&standalone, "standalone", false, "In TUI mode, start an embedded local server")
|
flag.BoolVar(&standalone, "standalone", false, "In TUI mode, start an embedded local server")
|
||||||
@@ -510,7 +512,7 @@ func main() {
|
|||||||
|
|
||||||
if vertexImport != "" {
|
if vertexImport != "" {
|
||||||
// Handle Vertex service account import
|
// Handle Vertex service account import
|
||||||
cmd.DoVertexImport(cfg, vertexImport)
|
cmd.DoVertexImport(cfg, vertexImport, vertexImportPrefix)
|
||||||
} else if login {
|
} else if login {
|
||||||
// Handle Google/Gemini login
|
// Handle Google/Gemini login
|
||||||
cmd.DoLogin(cfg, projectID, options)
|
cmd.DoLogin(cfg, projectID, options)
|
||||||
@@ -596,6 +598,7 @@ func main() {
|
|||||||
if standalone {
|
if standalone {
|
||||||
// Standalone mode: start an embedded local server and connect TUI client to it.
|
// Standalone mode: start an embedded local server and connect TUI client to it.
|
||||||
managementasset.StartAutoUpdater(context.Background(), configFilePath)
|
managementasset.StartAutoUpdater(context.Background(), configFilePath)
|
||||||
|
misc.StartAntigravityVersionUpdater(context.Background())
|
||||||
if !localModel {
|
if !localModel {
|
||||||
registry.StartModelsUpdater(context.Background())
|
registry.StartModelsUpdater(context.Background())
|
||||||
}
|
}
|
||||||
@@ -671,6 +674,7 @@ func main() {
|
|||||||
} else {
|
} else {
|
||||||
// Start the main proxy service
|
// Start the main proxy service
|
||||||
managementasset.StartAutoUpdater(context.Background(), configFilePath)
|
managementasset.StartAutoUpdater(context.Background(), configFilePath)
|
||||||
|
misc.StartAntigravityVersionUpdater(context.Background())
|
||||||
if !localModel {
|
if !localModel {
|
||||||
registry.StartModelsUpdater(context.Background())
|
registry.StartModelsUpdater(context.Background())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -105,6 +105,10 @@ routing:
|
|||||||
# When true, enable authentication for the WebSocket API (/v1/ws).
|
# When true, enable authentication for the WebSocket API (/v1/ws).
|
||||||
ws-auth: false
|
ws-auth: false
|
||||||
|
|
||||||
|
# When true, enable Gemini CLI internal endpoints (/v1internal:*).
|
||||||
|
# Default is false for safety.
|
||||||
|
enable-gemini-cli-endpoint: false
|
||||||
|
|
||||||
# When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
|
# When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
|
||||||
nonstream-keepalive-interval: 0
|
nonstream-keepalive-interval: 0
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import (
|
|||||||
|
|
||||||
"github.com/fxamacker/cbor/v2"
|
"github.com/fxamacker/cbor/v2"
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli"
|
||||||
coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
|
"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
|
||||||
@@ -700,6 +701,11 @@ func (h *Handler) apiCallTransport(auth *coreauth.Auth) http.RoundTripper {
|
|||||||
if proxyStr := strings.TrimSpace(auth.ProxyURL); proxyStr != "" {
|
if proxyStr := strings.TrimSpace(auth.ProxyURL); proxyStr != "" {
|
||||||
proxyCandidates = append(proxyCandidates, proxyStr)
|
proxyCandidates = append(proxyCandidates, proxyStr)
|
||||||
}
|
}
|
||||||
|
if h != nil && h.cfg != nil {
|
||||||
|
if proxyStr := strings.TrimSpace(proxyURLFromAPIKeyConfig(h.cfg, auth)); proxyStr != "" {
|
||||||
|
proxyCandidates = append(proxyCandidates, proxyStr)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if h != nil && h.cfg != nil {
|
if h != nil && h.cfg != nil {
|
||||||
if proxyStr := strings.TrimSpace(h.cfg.ProxyURL); proxyStr != "" {
|
if proxyStr := strings.TrimSpace(h.cfg.ProxyURL); proxyStr != "" {
|
||||||
@@ -722,6 +728,123 @@ func (h *Handler) apiCallTransport(auth *coreauth.Auth) http.RoundTripper {
|
|||||||
return clone
|
return clone
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type apiKeyConfigEntry interface {
|
||||||
|
GetAPIKey() string
|
||||||
|
GetBaseURL() string
|
||||||
|
}
|
||||||
|
|
||||||
|
func resolveAPIKeyConfig[T apiKeyConfigEntry](entries []T, auth *coreauth.Auth) *T {
|
||||||
|
if auth == nil || len(entries) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
attrKey, attrBase := "", ""
|
||||||
|
if auth.Attributes != nil {
|
||||||
|
attrKey = strings.TrimSpace(auth.Attributes["api_key"])
|
||||||
|
attrBase = strings.TrimSpace(auth.Attributes["base_url"])
|
||||||
|
}
|
||||||
|
for i := range entries {
|
||||||
|
entry := &entries[i]
|
||||||
|
cfgKey := strings.TrimSpace((*entry).GetAPIKey())
|
||||||
|
cfgBase := strings.TrimSpace((*entry).GetBaseURL())
|
||||||
|
if attrKey != "" && attrBase != "" {
|
||||||
|
if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) {
|
||||||
|
return entry
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if attrKey != "" && strings.EqualFold(cfgKey, attrKey) {
|
||||||
|
if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) {
|
||||||
|
return entry
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) {
|
||||||
|
return entry
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if attrKey != "" {
|
||||||
|
for i := range entries {
|
||||||
|
entry := &entries[i]
|
||||||
|
if strings.EqualFold(strings.TrimSpace((*entry).GetAPIKey()), attrKey) {
|
||||||
|
return entry
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func proxyURLFromAPIKeyConfig(cfg *config.Config, auth *coreauth.Auth) string {
|
||||||
|
if cfg == nil || auth == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
authKind, authAccount := auth.AccountInfo()
|
||||||
|
if !strings.EqualFold(strings.TrimSpace(authKind), "api_key") {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
attrs := auth.Attributes
|
||||||
|
compatName := ""
|
||||||
|
providerKey := ""
|
||||||
|
if len(attrs) > 0 {
|
||||||
|
compatName = strings.TrimSpace(attrs["compat_name"])
|
||||||
|
providerKey = strings.TrimSpace(attrs["provider_key"])
|
||||||
|
}
|
||||||
|
if compatName != "" || strings.EqualFold(strings.TrimSpace(auth.Provider), "openai-compatibility") {
|
||||||
|
return resolveOpenAICompatAPIKeyProxyURL(cfg, auth, strings.TrimSpace(authAccount), providerKey, compatName)
|
||||||
|
}
|
||||||
|
|
||||||
|
switch strings.ToLower(strings.TrimSpace(auth.Provider)) {
|
||||||
|
case "gemini":
|
||||||
|
if entry := resolveAPIKeyConfig(cfg.GeminiKey, auth); entry != nil {
|
||||||
|
return strings.TrimSpace(entry.ProxyURL)
|
||||||
|
}
|
||||||
|
case "claude":
|
||||||
|
if entry := resolveAPIKeyConfig(cfg.ClaudeKey, auth); entry != nil {
|
||||||
|
return strings.TrimSpace(entry.ProxyURL)
|
||||||
|
}
|
||||||
|
case "codex":
|
||||||
|
if entry := resolveAPIKeyConfig(cfg.CodexKey, auth); entry != nil {
|
||||||
|
return strings.TrimSpace(entry.ProxyURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func resolveOpenAICompatAPIKeyProxyURL(cfg *config.Config, auth *coreauth.Auth, apiKey, providerKey, compatName string) string {
|
||||||
|
if cfg == nil || auth == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
apiKey = strings.TrimSpace(apiKey)
|
||||||
|
if apiKey == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
candidates := make([]string, 0, 3)
|
||||||
|
if v := strings.TrimSpace(compatName); v != "" {
|
||||||
|
candidates = append(candidates, v)
|
||||||
|
}
|
||||||
|
if v := strings.TrimSpace(providerKey); v != "" {
|
||||||
|
candidates = append(candidates, v)
|
||||||
|
}
|
||||||
|
if v := strings.TrimSpace(auth.Provider); v != "" {
|
||||||
|
candidates = append(candidates, v)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range cfg.OpenAICompatibility {
|
||||||
|
compat := &cfg.OpenAICompatibility[i]
|
||||||
|
for _, candidate := range candidates {
|
||||||
|
if candidate != "" && strings.EqualFold(strings.TrimSpace(candidate), compat.Name) {
|
||||||
|
for j := range compat.APIKeyEntries {
|
||||||
|
entry := &compat.APIKeyEntries[j]
|
||||||
|
if strings.EqualFold(strings.TrimSpace(entry.APIKey), apiKey) {
|
||||||
|
return strings.TrimSpace(entry.ProxyURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
func buildProxyTransport(proxyStr string) *http.Transport {
|
func buildProxyTransport(proxyStr string) *http.Transport {
|
||||||
transport, _, errBuild := proxyutil.BuildHTTPTransport(proxyStr)
|
transport, _, errBuild := proxyutil.BuildHTTPTransport(proxyStr)
|
||||||
if errBuild != nil {
|
if errBuild != nil {
|
||||||
|
|||||||
@@ -58,6 +58,105 @@ func TestAPICallTransportInvalidAuthFallsBackToGlobalProxy(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAPICallTransportAPIKeyAuthFallsBackToConfigProxyURL(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
h := &Handler{
|
||||||
|
cfg: &config.Config{
|
||||||
|
SDKConfig: sdkconfig.SDKConfig{ProxyURL: "http://global-proxy.example.com:8080"},
|
||||||
|
GeminiKey: []config.GeminiKey{{
|
||||||
|
APIKey: "gemini-key",
|
||||||
|
ProxyURL: "http://gemini-proxy.example.com:8080",
|
||||||
|
}},
|
||||||
|
ClaudeKey: []config.ClaudeKey{{
|
||||||
|
APIKey: "claude-key",
|
||||||
|
ProxyURL: "http://claude-proxy.example.com:8080",
|
||||||
|
}},
|
||||||
|
CodexKey: []config.CodexKey{{
|
||||||
|
APIKey: "codex-key",
|
||||||
|
ProxyURL: "http://codex-proxy.example.com:8080",
|
||||||
|
}},
|
||||||
|
OpenAICompatibility: []config.OpenAICompatibility{{
|
||||||
|
Name: "bohe",
|
||||||
|
BaseURL: "https://bohe.example.com",
|
||||||
|
APIKeyEntries: []config.OpenAICompatibilityAPIKey{{
|
||||||
|
APIKey: "compat-key",
|
||||||
|
ProxyURL: "http://compat-proxy.example.com:8080",
|
||||||
|
}},
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
auth *coreauth.Auth
|
||||||
|
wantProxy string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "gemini",
|
||||||
|
auth: &coreauth.Auth{
|
||||||
|
Provider: "gemini",
|
||||||
|
Attributes: map[string]string{"api_key": "gemini-key"},
|
||||||
|
},
|
||||||
|
wantProxy: "http://gemini-proxy.example.com:8080",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "claude",
|
||||||
|
auth: &coreauth.Auth{
|
||||||
|
Provider: "claude",
|
||||||
|
Attributes: map[string]string{"api_key": "claude-key"},
|
||||||
|
},
|
||||||
|
wantProxy: "http://claude-proxy.example.com:8080",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "codex",
|
||||||
|
auth: &coreauth.Auth{
|
||||||
|
Provider: "codex",
|
||||||
|
Attributes: map[string]string{"api_key": "codex-key"},
|
||||||
|
},
|
||||||
|
wantProxy: "http://codex-proxy.example.com:8080",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "openai-compatibility",
|
||||||
|
auth: &coreauth.Auth{
|
||||||
|
Provider: "bohe",
|
||||||
|
Attributes: map[string]string{
|
||||||
|
"api_key": "compat-key",
|
||||||
|
"compat_name": "bohe",
|
||||||
|
"provider_key": "bohe",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantProxy: "http://compat-proxy.example.com:8080",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range cases {
|
||||||
|
tc := tc
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
transport := h.apiCallTransport(tc.auth)
|
||||||
|
httpTransport, ok := transport.(*http.Transport)
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("transport type = %T, want *http.Transport", transport)
|
||||||
|
}
|
||||||
|
|
||||||
|
req, errRequest := http.NewRequest(http.MethodGet, "https://example.com", nil)
|
||||||
|
if errRequest != nil {
|
||||||
|
t.Fatalf("http.NewRequest returned error: %v", errRequest)
|
||||||
|
}
|
||||||
|
|
||||||
|
proxyURL, errProxy := httpTransport.Proxy(req)
|
||||||
|
if errProxy != nil {
|
||||||
|
t.Fatalf("httpTransport.Proxy returned error: %v", errProxy)
|
||||||
|
}
|
||||||
|
if proxyURL == nil || proxyURL.String() != tc.wantProxy {
|
||||||
|
t.Fatalf("proxy URL = %v, want %s", proxyURL, tc.wantProxy)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestAuthByIndexDistinguishesSharedAPIKeysAcrossProviders(t *testing.T) {
|
func TestAuthByIndexDistinguishesSharedAPIKeysAcrossProviders(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
|||||||
@@ -253,6 +253,7 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
|
|||||||
log.Debugf("amp model mapping: request %s -> %s", normalizedModel, resolvedModel)
|
log.Debugf("amp model mapping: request %s -> %s", normalizedModel, resolvedModel)
|
||||||
logAmpRouting(RouteTypeModelMapping, modelName, resolvedModel, providerName, requestPath)
|
logAmpRouting(RouteTypeModelMapping, modelName, resolvedModel, providerName, requestPath)
|
||||||
rewriter := NewResponseRewriter(c.Writer, modelName)
|
rewriter := NewResponseRewriter(c.Writer, modelName)
|
||||||
|
rewriter.suppressThinking = true
|
||||||
c.Writer = rewriter
|
c.Writer = rewriter
|
||||||
// Filter Anthropic-Beta header only for local handling paths
|
// Filter Anthropic-Beta header only for local handling paths
|
||||||
filterAntropicBetaHeader(c)
|
filterAntropicBetaHeader(c)
|
||||||
@@ -267,6 +268,7 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
|
|||||||
// proxies (e.g. NewAPI) may return a different model name and lack
|
// proxies (e.g. NewAPI) may return a different model name and lack
|
||||||
// Amp-required fields like thinking.signature.
|
// Amp-required fields like thinking.signature.
|
||||||
rewriter := NewResponseRewriter(c.Writer, modelName)
|
rewriter := NewResponseRewriter(c.Writer, modelName)
|
||||||
|
rewriter.suppressThinking = providerName != "claude"
|
||||||
c.Writer = rewriter
|
c.Writer = rewriter
|
||||||
// Filter Anthropic-Beta header only for local handling paths
|
// Filter Anthropic-Beta header only for local handling paths
|
||||||
filterAntropicBetaHeader(c)
|
filterAntropicBetaHeader(c)
|
||||||
|
|||||||
@@ -17,19 +17,18 @@ import (
|
|||||||
// and to keep Amp-compatible response shapes.
|
// and to keep Amp-compatible response shapes.
|
||||||
type ResponseRewriter struct {
|
type ResponseRewriter struct {
|
||||||
gin.ResponseWriter
|
gin.ResponseWriter
|
||||||
body *bytes.Buffer
|
body *bytes.Buffer
|
||||||
originalModel string
|
originalModel string
|
||||||
isStreaming bool
|
isStreaming bool
|
||||||
suppressedContentBlock map[int]struct{}
|
suppressThinking bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewResponseRewriter creates a new response rewriter for model name substitution.
|
// NewResponseRewriter creates a new response rewriter for model name substitution.
|
||||||
func NewResponseRewriter(w gin.ResponseWriter, originalModel string) *ResponseRewriter {
|
func NewResponseRewriter(w gin.ResponseWriter, originalModel string) *ResponseRewriter {
|
||||||
return &ResponseRewriter{
|
return &ResponseRewriter{
|
||||||
ResponseWriter: w,
|
ResponseWriter: w,
|
||||||
body: &bytes.Buffer{},
|
body: &bytes.Buffer{},
|
||||||
originalModel: originalModel,
|
originalModel: originalModel,
|
||||||
suppressedContentBlock: make(map[int]struct{}),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -99,7 +98,8 @@ func (rw *ResponseRewriter) Write(data []byte) (int, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if rw.isStreaming {
|
if rw.isStreaming {
|
||||||
n, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(data))
|
rewritten := rw.rewriteStreamChunk(data)
|
||||||
|
n, err := rw.ResponseWriter.Write(rewritten)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
|
if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
|
||||||
flusher.Flush()
|
flusher.Flush()
|
||||||
@@ -162,19 +162,10 @@ func ensureAmpSignature(data []byte) []byte {
|
|||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rw *ResponseRewriter) markSuppressedContentBlock(index int) {
|
|
||||||
if rw.suppressedContentBlock == nil {
|
|
||||||
rw.suppressedContentBlock = make(map[int]struct{})
|
|
||||||
}
|
|
||||||
rw.suppressedContentBlock[index] = struct{}{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (rw *ResponseRewriter) isSuppressedContentBlock(index int) bool {
|
|
||||||
_, ok := rw.suppressedContentBlock[index]
|
|
||||||
return ok
|
|
||||||
}
|
|
||||||
|
|
||||||
func (rw *ResponseRewriter) suppressAmpThinking(data []byte) []byte {
|
func (rw *ResponseRewriter) suppressAmpThinking(data []byte) []byte {
|
||||||
|
if !rw.suppressThinking {
|
||||||
|
return data
|
||||||
|
}
|
||||||
if gjson.GetBytes(data, `content.#(type=="tool_use")`).Exists() {
|
if gjson.GetBytes(data, `content.#(type=="tool_use")`).Exists() {
|
||||||
filtered := gjson.GetBytes(data, `content.#(type!="thinking")#`)
|
filtered := gjson.GetBytes(data, `content.#(type!="thinking")#`)
|
||||||
if filtered.Exists() {
|
if filtered.Exists() {
|
||||||
@@ -185,33 +176,11 @@ func (rw *ResponseRewriter) suppressAmpThinking(data []byte) []byte {
|
|||||||
data, err = sjson.SetBytes(data, "content", filtered.Value())
|
data, err = sjson.SetBytes(data, "content", filtered.Value())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("Amp ResponseRewriter: failed to suppress thinking blocks: %v", err)
|
log.Warnf("Amp ResponseRewriter: failed to suppress thinking blocks: %v", err)
|
||||||
} else {
|
|
||||||
log.Debugf("Amp ResponseRewriter: Suppressed %d thinking blocks due to tool usage", originalCount-filteredCount)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
eventType := gjson.GetBytes(data, "type").String()
|
|
||||||
indexResult := gjson.GetBytes(data, "index")
|
|
||||||
if eventType == "content_block_start" && gjson.GetBytes(data, "content_block.type").String() == "thinking" && indexResult.Exists() {
|
|
||||||
rw.markSuppressedContentBlock(int(indexResult.Int()))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if gjson.GetBytes(data, "delta.type").String() == "thinking_delta" {
|
|
||||||
if indexResult.Exists() {
|
|
||||||
rw.markSuppressedContentBlock(int(indexResult.Int()))
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if eventType == "content_block_stop" && indexResult.Exists() {
|
|
||||||
index := int(indexResult.Int())
|
|
||||||
if rw.isSuppressedContentBlock(index) {
|
|
||||||
delete(rw.suppressedContentBlock, index)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -262,6 +231,10 @@ func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte {
|
|||||||
jsonData := bytes.TrimPrefix(bytes.TrimSpace(lines[dataIdx]), []byte("data: "))
|
jsonData := bytes.TrimPrefix(bytes.TrimSpace(lines[dataIdx]), []byte("data: "))
|
||||||
if len(jsonData) > 0 && jsonData[0] == '{' {
|
if len(jsonData) > 0 && jsonData[0] == '{' {
|
||||||
rewritten := rw.rewriteStreamEvent(jsonData)
|
rewritten := rw.rewriteStreamEvent(jsonData)
|
||||||
|
if rewritten == nil {
|
||||||
|
i = dataIdx + 1
|
||||||
|
continue
|
||||||
|
}
|
||||||
// Emit event line
|
// Emit event line
|
||||||
out = append(out, line)
|
out = append(out, line)
|
||||||
// Emit blank lines between event and data
|
// Emit blank lines between event and data
|
||||||
@@ -287,7 +260,9 @@ func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte {
|
|||||||
jsonData := bytes.TrimPrefix(trimmed, []byte("data: "))
|
jsonData := bytes.TrimPrefix(trimmed, []byte("data: "))
|
||||||
if len(jsonData) > 0 && jsonData[0] == '{' {
|
if len(jsonData) > 0 && jsonData[0] == '{' {
|
||||||
rewritten := rw.rewriteStreamEvent(jsonData)
|
rewritten := rw.rewriteStreamEvent(jsonData)
|
||||||
out = append(out, append([]byte("data: "), rewritten...))
|
if rewritten != nil {
|
||||||
|
out = append(out, append([]byte("data: "), rewritten...))
|
||||||
|
}
|
||||||
i++
|
i++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ func TestRewriteStreamChunk_MessageModel(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestRewriteStreamChunk_PreservesThinkingWithSignatureInjection(t *testing.T) {
|
func TestRewriteStreamChunk_PreservesThinkingWithSignatureInjection(t *testing.T) {
|
||||||
rw := &ResponseRewriter{suppressedContentBlock: make(map[int]struct{})}
|
rw := &ResponseRewriter{}
|
||||||
|
|
||||||
chunk := []byte("event: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"thinking\",\"thinking\":\"\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"abc\"}}\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0}\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":1,\"content_block\":{\"type\":\"tool_use\",\"name\":\"bash\",\"input\":{}}}\n\n")
|
chunk := []byte("event: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"thinking\",\"thinking\":\"\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"abc\"}}\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0}\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":1,\"content_block\":{\"type\":\"tool_use\",\"name\":\"bash\",\"input\":{}}}\n\n")
|
||||||
result := rw.rewriteStreamChunk(chunk)
|
result := rw.rewriteStreamChunk(chunk)
|
||||||
|
|||||||
@@ -323,6 +323,10 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
|
|||||||
// setupRoutes configures the API routes for the server.
|
// setupRoutes configures the API routes for the server.
|
||||||
// It defines the endpoints and associates them with their respective handlers.
|
// It defines the endpoints and associates them with their respective handlers.
|
||||||
func (s *Server) setupRoutes() {
|
func (s *Server) setupRoutes() {
|
||||||
|
s.engine.GET("/healthz", func(c *gin.Context) {
|
||||||
|
c.JSON(http.StatusOK, gin.H{"status": "ok"})
|
||||||
|
})
|
||||||
|
|
||||||
s.engine.GET("/management.html", s.serveManagementControlPanel)
|
s.engine.GET("/management.html", s.serveManagementControlPanel)
|
||||||
openaiHandlers := openai.NewOpenAIAPIHandler(s.handlers)
|
openaiHandlers := openai.NewOpenAIAPIHandler(s.handlers)
|
||||||
geminiHandlers := gemini.NewGeminiAPIHandler(s.handlers)
|
geminiHandlers := gemini.NewGeminiAPIHandler(s.handlers)
|
||||||
@@ -569,6 +573,8 @@ func (s *Server) registerManagementRoutes() {
|
|||||||
mgmt.PUT("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
|
mgmt.PUT("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
|
||||||
mgmt.PATCH("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
|
mgmt.PATCH("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
|
||||||
|
|
||||||
|
mgmt.GET("/copilot-quota", s.mgmt.GetCopilotQuota)
|
||||||
|
|
||||||
mgmt.GET("/api-keys", s.mgmt.GetAPIKeys)
|
mgmt.GET("/api-keys", s.mgmt.GetAPIKeys)
|
||||||
mgmt.PUT("/api-keys", s.mgmt.PutAPIKeys)
|
mgmt.PUT("/api-keys", s.mgmt.PutAPIKeys)
|
||||||
mgmt.PATCH("/api-keys", s.mgmt.PatchAPIKeys)
|
mgmt.PATCH("/api-keys", s.mgmt.PatchAPIKeys)
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package api
|
package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"os"
|
"os"
|
||||||
@@ -46,6 +47,28 @@ func newTestServer(t *testing.T) *Server {
|
|||||||
return NewServer(cfg, authManager, accessManager, configPath)
|
return NewServer(cfg, authManager, accessManager, configPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestHealthz(t *testing.T) {
|
||||||
|
server := newTestServer(t)
|
||||||
|
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
server.engine.ServeHTTP(rr, req)
|
||||||
|
|
||||||
|
if rr.Code != http.StatusOK {
|
||||||
|
t.Fatalf("unexpected status code: got %d want %d; body=%s", rr.Code, http.StatusOK, rr.Body.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
var resp struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
|
||||||
|
t.Fatalf("failed to parse response JSON: %v; body=%s", err, rr.Body.String())
|
||||||
|
}
|
||||||
|
if resp.Status != "ok" {
|
||||||
|
t.Fatalf("unexpected response status: got %q want %q", resp.Status, "ok")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestAmpProviderModelRoutes(t *testing.T) {
|
func TestAmpProviderModelRoutes(t *testing.T) {
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
name string
|
name string
|
||||||
|
|||||||
@@ -235,6 +235,74 @@ type CopilotModelEntry struct {
|
|||||||
Capabilities map[string]any `json:"capabilities,omitempty"`
|
Capabilities map[string]any `json:"capabilities,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CopilotModelLimits holds the token limits returned by the Copilot /models API
|
||||||
|
// under capabilities.limits. These limits vary by account type (individual vs
|
||||||
|
// business) and are the authoritative source for enforcing prompt size.
|
||||||
|
type CopilotModelLimits struct {
|
||||||
|
// MaxContextWindowTokens is the total context window (prompt + output).
|
||||||
|
MaxContextWindowTokens int
|
||||||
|
// MaxPromptTokens is the hard limit on input/prompt tokens.
|
||||||
|
// Exceeding this triggers a 400 error from the Copilot API.
|
||||||
|
MaxPromptTokens int
|
||||||
|
// MaxOutputTokens is the maximum number of output/completion tokens.
|
||||||
|
MaxOutputTokens int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Limits extracts the token limits from the model's capabilities map.
|
||||||
|
// Returns nil if no limits are available or the structure is unexpected.
|
||||||
|
//
|
||||||
|
// Expected Copilot API shape:
|
||||||
|
//
|
||||||
|
// "capabilities": {
|
||||||
|
// "limits": {
|
||||||
|
// "max_context_window_tokens": 200000,
|
||||||
|
// "max_prompt_tokens": 168000,
|
||||||
|
// "max_output_tokens": 32000
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
func (e *CopilotModelEntry) Limits() *CopilotModelLimits {
|
||||||
|
if e.Capabilities == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
limitsRaw, ok := e.Capabilities["limits"]
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
limitsMap, ok := limitsRaw.(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
result := &CopilotModelLimits{
|
||||||
|
MaxContextWindowTokens: anyToInt(limitsMap["max_context_window_tokens"]),
|
||||||
|
MaxPromptTokens: anyToInt(limitsMap["max_prompt_tokens"]),
|
||||||
|
MaxOutputTokens: anyToInt(limitsMap["max_output_tokens"]),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only return if at least one field is populated.
|
||||||
|
if result.MaxContextWindowTokens == 0 && result.MaxPromptTokens == 0 && result.MaxOutputTokens == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// anyToInt converts a JSON-decoded numeric value to int.
|
||||||
|
// Go's encoding/json decodes numbers into float64 when the target is any/interface{}.
|
||||||
|
func anyToInt(v any) int {
|
||||||
|
switch n := v.(type) {
|
||||||
|
case float64:
|
||||||
|
return int(n)
|
||||||
|
case float32:
|
||||||
|
return int(n)
|
||||||
|
case int:
|
||||||
|
return n
|
||||||
|
case int64:
|
||||||
|
return int(n)
|
||||||
|
default:
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// CopilotModelsResponse represents the response from the Copilot /models endpoint.
|
// CopilotModelsResponse represents the response from the Copilot /models endpoint.
|
||||||
type CopilotModelsResponse struct {
|
type CopilotModelsResponse struct {
|
||||||
Data []CopilotModelEntry `json:"data"`
|
Data []CopilotModelEntry `json:"data"`
|
||||||
|
|||||||
@@ -30,6 +30,10 @@ type VertexCredentialStorage struct {
|
|||||||
|
|
||||||
// Type is the provider identifier stored alongside credentials. Always "vertex".
|
// Type is the provider identifier stored alongside credentials. Always "vertex".
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
|
|
||||||
|
// Prefix optionally namespaces models for this credential (e.g., "teamA").
|
||||||
|
// This results in model names like "teamA/gemini-2.0-flash".
|
||||||
|
Prefix string `json:"prefix,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// SaveTokenToFile writes the credential payload to the given file path in JSON format.
|
// SaveTokenToFile writes the credential payload to the given file path in JSON format.
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import (
|
|||||||
// DoVertexImport imports a Google Cloud service account key JSON and persists
|
// DoVertexImport imports a Google Cloud service account key JSON and persists
|
||||||
// it as a "vertex" provider credential. The file content is embedded in the auth
|
// it as a "vertex" provider credential. The file content is embedded in the auth
|
||||||
// file to allow portable deployment across stores.
|
// file to allow portable deployment across stores.
|
||||||
func DoVertexImport(cfg *config.Config, keyPath string) {
|
func DoVertexImport(cfg *config.Config, keyPath string, prefix string) {
|
||||||
if cfg == nil {
|
if cfg == nil {
|
||||||
cfg = &config.Config{}
|
cfg = &config.Config{}
|
||||||
}
|
}
|
||||||
@@ -62,13 +62,28 @@ func DoVertexImport(cfg *config.Config, keyPath string) {
|
|||||||
// Default location if not provided by user. Can be edited in the saved file later.
|
// Default location if not provided by user. Can be edited in the saved file later.
|
||||||
location := "us-central1"
|
location := "us-central1"
|
||||||
|
|
||||||
fileName := fmt.Sprintf("vertex-%s.json", sanitizeFilePart(projectID))
|
// Normalize and validate prefix: must be a single segment (no "/" allowed).
|
||||||
|
prefix = strings.TrimSpace(prefix)
|
||||||
|
prefix = strings.Trim(prefix, "/")
|
||||||
|
if prefix != "" && strings.Contains(prefix, "/") {
|
||||||
|
log.Errorf("vertex-import: prefix must be a single segment (no '/' allowed): %q", prefix)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Include prefix in filename so importing the same project with different
|
||||||
|
// prefixes creates separate credential files instead of overwriting.
|
||||||
|
baseName := sanitizeFilePart(projectID)
|
||||||
|
if prefix != "" {
|
||||||
|
baseName = sanitizeFilePart(prefix) + "-" + baseName
|
||||||
|
}
|
||||||
|
fileName := fmt.Sprintf("vertex-%s.json", baseName)
|
||||||
// Build auth record
|
// Build auth record
|
||||||
storage := &vertex.VertexCredentialStorage{
|
storage := &vertex.VertexCredentialStorage{
|
||||||
ServiceAccount: sa,
|
ServiceAccount: sa,
|
||||||
ProjectID: projectID,
|
ProjectID: projectID,
|
||||||
Email: email,
|
Email: email,
|
||||||
Location: location,
|
Location: location,
|
||||||
|
Prefix: prefix,
|
||||||
}
|
}
|
||||||
metadata := map[string]any{
|
metadata := map[string]any{
|
||||||
"service_account": sa,
|
"service_account": sa,
|
||||||
@@ -76,6 +91,7 @@ func DoVertexImport(cfg *config.Config, keyPath string) {
|
|||||||
"email": email,
|
"email": email,
|
||||||
"location": location,
|
"location": location,
|
||||||
"type": "vertex",
|
"type": "vertex",
|
||||||
|
"prefix": prefix,
|
||||||
"label": labelForVertex(projectID, email),
|
"label": labelForVertex(projectID, email),
|
||||||
}
|
}
|
||||||
record := &coreauth.Auth{
|
record := &coreauth.Auth{
|
||||||
|
|||||||
@@ -9,6 +9,10 @@ type SDKConfig struct {
|
|||||||
// ProxyURL is the URL of an optional proxy server to use for outbound requests.
|
// ProxyURL is the URL of an optional proxy server to use for outbound requests.
|
||||||
ProxyURL string `yaml:"proxy-url" json:"proxy-url"`
|
ProxyURL string `yaml:"proxy-url" json:"proxy-url"`
|
||||||
|
|
||||||
|
// EnableGeminiCLIEndpoint controls whether Gemini CLI internal endpoints (/v1internal:*) are enabled.
|
||||||
|
// Default is false for safety; when false, /v1internal:* requests are rejected.
|
||||||
|
EnableGeminiCLIEndpoint bool `yaml:"enable-gemini-cli-endpoint" json:"enable-gemini-cli-endpoint"`
|
||||||
|
|
||||||
// ForceModelPrefix requires explicit model prefixes (e.g., "teamA/gemini-3-pro-preview")
|
// ForceModelPrefix requires explicit model prefixes (e.g., "teamA/gemini-3-pro-preview")
|
||||||
// to target prefixed credentials. When false, unprefixed model requests may use prefixed
|
// to target prefixed credentials. When false, unprefixed model requests may use prefixed
|
||||||
// credentials as well.
|
// credentials as well.
|
||||||
|
|||||||
151
internal/misc/antigravity_version.go
Normal file
151
internal/misc/antigravity_version.go
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
// Package misc provides miscellaneous utility functions for the CLI Proxy API server.
|
||||||
|
package misc
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
antigravityReleasesURL = "https://antigravity-auto-updater-974169037036.us-central1.run.app/releases"
|
||||||
|
antigravityFallbackVersion = "1.21.9"
|
||||||
|
antigravityVersionCacheTTL = 6 * time.Hour
|
||||||
|
antigravityFetchTimeout = 10 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
type antigravityRelease struct {
|
||||||
|
Version string `json:"version"`
|
||||||
|
ExecutionID string `json:"execution_id"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
cachedAntigravityVersion = antigravityFallbackVersion
|
||||||
|
antigravityVersionMu sync.RWMutex
|
||||||
|
antigravityVersionExpiry time.Time
|
||||||
|
antigravityUpdaterOnce sync.Once
|
||||||
|
)
|
||||||
|
|
||||||
|
// StartAntigravityVersionUpdater starts a background goroutine that periodically refreshes the cached antigravity version.
|
||||||
|
// This is intentionally decoupled from request execution to avoid blocking executors on version lookups.
|
||||||
|
func StartAntigravityVersionUpdater(ctx context.Context) {
|
||||||
|
antigravityUpdaterOnce.Do(func() {
|
||||||
|
go runAntigravityVersionUpdater(ctx)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func runAntigravityVersionUpdater(ctx context.Context) {
|
||||||
|
if ctx == nil {
|
||||||
|
ctx = context.Background()
|
||||||
|
}
|
||||||
|
|
||||||
|
ticker := time.NewTicker(antigravityVersionCacheTTL / 2)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
log.Infof("periodic antigravity version refresh started (interval=%s)", antigravityVersionCacheTTL/2)
|
||||||
|
|
||||||
|
refreshAntigravityVersion(ctx)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
refreshAntigravityVersion(ctx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func refreshAntigravityVersion(ctx context.Context) {
|
||||||
|
version, errFetch := fetchAntigravityLatestVersion(ctx)
|
||||||
|
|
||||||
|
antigravityVersionMu.Lock()
|
||||||
|
defer antigravityVersionMu.Unlock()
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
|
||||||
|
if errFetch == nil {
|
||||||
|
cachedAntigravityVersion = version
|
||||||
|
antigravityVersionExpiry = now.Add(antigravityVersionCacheTTL)
|
||||||
|
log.WithField("version", version).Info("fetched latest antigravity version")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if cachedAntigravityVersion == "" || now.After(antigravityVersionExpiry) {
|
||||||
|
cachedAntigravityVersion = antigravityFallbackVersion
|
||||||
|
antigravityVersionExpiry = now.Add(antigravityVersionCacheTTL)
|
||||||
|
log.WithError(errFetch).Warn("failed to refresh antigravity version, using fallback version")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.WithError(errFetch).Debug("failed to refresh antigravity version, keeping cached value")
|
||||||
|
}
|
||||||
|
|
||||||
|
// AntigravityLatestVersion returns the cached antigravity version refreshed by StartAntigravityVersionUpdater.
|
||||||
|
// It falls back to antigravityFallbackVersion if the cache is empty or stale.
|
||||||
|
func AntigravityLatestVersion() string {
|
||||||
|
antigravityVersionMu.RLock()
|
||||||
|
if cachedAntigravityVersion != "" && time.Now().Before(antigravityVersionExpiry) {
|
||||||
|
v := cachedAntigravityVersion
|
||||||
|
antigravityVersionMu.RUnlock()
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
antigravityVersionMu.RUnlock()
|
||||||
|
|
||||||
|
return antigravityFallbackVersion
|
||||||
|
}
|
||||||
|
|
||||||
|
// AntigravityUserAgent returns the User-Agent string for antigravity requests
|
||||||
|
// using the latest version fetched from the releases API.
|
||||||
|
func AntigravityUserAgent() string {
|
||||||
|
return fmt.Sprintf("antigravity/%s darwin/arm64", AntigravityLatestVersion())
|
||||||
|
}
|
||||||
|
|
||||||
|
func fetchAntigravityLatestVersion(ctx context.Context) (string, error) {
|
||||||
|
if ctx == nil {
|
||||||
|
ctx = context.Background()
|
||||||
|
}
|
||||||
|
|
||||||
|
client := &http.Client{Timeout: antigravityFetchTimeout}
|
||||||
|
|
||||||
|
httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodGet, antigravityReleasesURL, nil)
|
||||||
|
if errReq != nil {
|
||||||
|
return "", fmt.Errorf("build antigravity releases request: %w", errReq)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, errDo := client.Do(httpReq)
|
||||||
|
if errDo != nil {
|
||||||
|
return "", fmt.Errorf("fetch antigravity releases: %w", errDo)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if errClose := resp.Body.Close(); errClose != nil {
|
||||||
|
log.WithError(errClose).Warn("antigravity releases response body close error")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return "", fmt.Errorf("antigravity releases API returned status %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
var releases []antigravityRelease
|
||||||
|
if errDecode := json.NewDecoder(resp.Body).Decode(&releases); errDecode != nil {
|
||||||
|
return "", fmt.Errorf("decode antigravity releases response: %w", errDecode)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(releases) == 0 {
|
||||||
|
return "", errors.New("antigravity releases API returned empty list")
|
||||||
|
}
|
||||||
|
|
||||||
|
version := releases[0].Version
|
||||||
|
if version == "" {
|
||||||
|
return "", errors.New("antigravity releases API returned empty version")
|
||||||
|
}
|
||||||
|
|
||||||
|
return version, nil
|
||||||
|
}
|
||||||
@@ -93,6 +93,30 @@ func GetAntigravityModels() []*ModelInfo {
|
|||||||
func GetCodeBuddyModels() []*ModelInfo {
|
func GetCodeBuddyModels() []*ModelInfo {
|
||||||
now := int64(1748044800) // 2025-05-24
|
now := int64(1748044800) // 2025-05-24
|
||||||
return []*ModelInfo{
|
return []*ModelInfo{
|
||||||
|
{
|
||||||
|
ID: "auto",
|
||||||
|
Object: "model",
|
||||||
|
Created: now,
|
||||||
|
OwnedBy: "tencent",
|
||||||
|
Type: "codebuddy",
|
||||||
|
DisplayName: "Auto",
|
||||||
|
Description: "Automatic model selection via CodeBuddy",
|
||||||
|
ContextLength: 128000,
|
||||||
|
MaxCompletionTokens: 32768,
|
||||||
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: "glm-5.0-turbo",
|
||||||
|
Object: "model",
|
||||||
|
Created: now,
|
||||||
|
OwnedBy: "tencent",
|
||||||
|
Type: "codebuddy",
|
||||||
|
DisplayName: "GLM-5.0 Turbo",
|
||||||
|
Description: "GLM-5.0 Turbo via CodeBuddy",
|
||||||
|
ContextLength: 128000,
|
||||||
|
MaxCompletionTokens: 32768,
|
||||||
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
ID: "glm-5.0",
|
ID: "glm-5.0",
|
||||||
Object: "model",
|
Object: "model",
|
||||||
@@ -118,13 +142,13 @@ func GetCodeBuddyModels() []*ModelInfo {
|
|||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "minimax-m2.5",
|
ID: "minimax-m2.7",
|
||||||
Object: "model",
|
Object: "model",
|
||||||
Created: now,
|
Created: now,
|
||||||
OwnedBy: "tencent",
|
OwnedBy: "tencent",
|
||||||
Type: "codebuddy",
|
Type: "codebuddy",
|
||||||
DisplayName: "MiniMax M2.5",
|
DisplayName: "MiniMax M2.7",
|
||||||
Description: "MiniMax M2.5 via CodeBuddy",
|
Description: "MiniMax M2.7 via CodeBuddy",
|
||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 32768,
|
MaxCompletionTokens: 32768,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
@@ -141,6 +165,19 @@ func GetCodeBuddyModels() []*ModelInfo {
|
|||||||
MaxCompletionTokens: 32768,
|
MaxCompletionTokens: 32768,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
ID: "kimi-k2-thinking",
|
||||||
|
Object: "model",
|
||||||
|
Created: now,
|
||||||
|
OwnedBy: "tencent",
|
||||||
|
Type: "codebuddy",
|
||||||
|
DisplayName: "Kimi K2 Thinking",
|
||||||
|
Description: "Kimi K2 Thinking via CodeBuddy",
|
||||||
|
ContextLength: 128000,
|
||||||
|
MaxCompletionTokens: 32768,
|
||||||
|
Thinking: &ThinkingSupport{ZeroAllowed: true},
|
||||||
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
ID: "deepseek-v3-2-volc",
|
ID: "deepseek-v3-2-volc",
|
||||||
Object: "model",
|
Object: "model",
|
||||||
@@ -148,24 +185,11 @@ func GetCodeBuddyModels() []*ModelInfo {
|
|||||||
OwnedBy: "tencent",
|
OwnedBy: "tencent",
|
||||||
Type: "codebuddy",
|
Type: "codebuddy",
|
||||||
DisplayName: "DeepSeek V3.2 (Volc)",
|
DisplayName: "DeepSeek V3.2 (Volc)",
|
||||||
Description: "DeepSeek V3.2 via CodeBuddy (Volcano Engine)",
|
Description: "DeepSeek V3.2 via CodeBuddy",
|
||||||
ContextLength: 128000,
|
ContextLength: 128000,
|
||||||
MaxCompletionTokens: 32768,
|
MaxCompletionTokens: 32768,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
ID: "hunyuan-2.0-thinking",
|
|
||||||
Object: "model",
|
|
||||||
Created: now,
|
|
||||||
OwnedBy: "tencent",
|
|
||||||
Type: "codebuddy",
|
|
||||||
DisplayName: "Hunyuan 2.0 Thinking",
|
|
||||||
Description: "Tencent Hunyuan 2.0 Thinking via CodeBuddy",
|
|
||||||
ContextLength: 128000,
|
|
||||||
MaxCompletionTokens: 32768,
|
|
||||||
Thinking: &ThinkingSupport{ZeroAllowed: true},
|
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -525,6 +549,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
|
|||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-opus-4.6",
|
ID: "claude-opus-4.6",
|
||||||
@@ -537,6 +562,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
|
|||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-sonnet-4",
|
ID: "claude-sonnet-4",
|
||||||
@@ -549,6 +575,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
|
|||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-sonnet-4.5",
|
ID: "claude-sonnet-4.5",
|
||||||
@@ -561,6 +588,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
|
|||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-sonnet-4.6",
|
ID: "claude-sonnet-4.6",
|
||||||
@@ -573,6 +601,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
|
|||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
SupportedEndpoints: []string{"/chat/completions"},
|
SupportedEndpoints: []string{"/chat/completions"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gemini-2.5-pro",
|
ID: "gemini-2.5-pro",
|
||||||
|
|||||||
@@ -280,6 +280,7 @@
|
|||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"low",
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -554,6 +555,7 @@
|
|||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"low",
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -610,6 +612,8 @@
|
|||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"minimal",
|
"minimal",
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -838,6 +842,7 @@
|
|||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"low",
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -896,6 +901,8 @@
|
|||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"minimal",
|
"minimal",
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -1070,6 +1077,8 @@
|
|||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"minimal",
|
"minimal",
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -1371,6 +1380,75 @@
|
|||||||
"xhigh"
|
"xhigh"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gpt-5.3-codex",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1770307200,
|
||||||
|
"owned_by": "openai",
|
||||||
|
"type": "openai",
|
||||||
|
"display_name": "GPT 5.3 Codex",
|
||||||
|
"version": "gpt-5.3",
|
||||||
|
"description": "Stable version of GPT 5.3 Codex, The best model for coding and agentic tasks across domains.",
|
||||||
|
"context_length": 400000,
|
||||||
|
"max_completion_tokens": 128000,
|
||||||
|
"supported_parameters": [
|
||||||
|
"tools"
|
||||||
|
],
|
||||||
|
"thinking": {
|
||||||
|
"levels": [
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"xhigh"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gpt-5.4",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1772668800,
|
||||||
|
"owned_by": "openai",
|
||||||
|
"type": "openai",
|
||||||
|
"display_name": "GPT 5.4",
|
||||||
|
"version": "gpt-5.4",
|
||||||
|
"description": "Stable version of GPT 5.4",
|
||||||
|
"context_length": 1050000,
|
||||||
|
"max_completion_tokens": 128000,
|
||||||
|
"supported_parameters": [
|
||||||
|
"tools"
|
||||||
|
],
|
||||||
|
"thinking": {
|
||||||
|
"levels": [
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"xhigh"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gpt-5.4-mini",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1773705600,
|
||||||
|
"owned_by": "openai",
|
||||||
|
"type": "openai",
|
||||||
|
"display_name": "GPT 5.4 Mini",
|
||||||
|
"version": "gpt-5.4-mini",
|
||||||
|
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
|
||||||
|
"context_length": 400000,
|
||||||
|
"max_completion_tokens": 128000,
|
||||||
|
"supported_parameters": [
|
||||||
|
"tools"
|
||||||
|
],
|
||||||
|
"thinking": {
|
||||||
|
"levels": [
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"xhigh"
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"codex-team": [
|
"codex-team": [
|
||||||
@@ -1623,6 +1701,29 @@
|
|||||||
"xhigh"
|
"xhigh"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gpt-5.4-mini",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1773705600,
|
||||||
|
"owned_by": "openai",
|
||||||
|
"type": "openai",
|
||||||
|
"display_name": "GPT 5.4 Mini",
|
||||||
|
"version": "gpt-5.4-mini",
|
||||||
|
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
|
||||||
|
"context_length": 400000,
|
||||||
|
"max_completion_tokens": 128000,
|
||||||
|
"supported_parameters": [
|
||||||
|
"tools"
|
||||||
|
],
|
||||||
|
"thinking": {
|
||||||
|
"levels": [
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"xhigh"
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"codex-plus": [
|
"codex-plus": [
|
||||||
@@ -1898,6 +1999,29 @@
|
|||||||
"xhigh"
|
"xhigh"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gpt-5.4-mini",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1773705600,
|
||||||
|
"owned_by": "openai",
|
||||||
|
"type": "openai",
|
||||||
|
"display_name": "GPT 5.4 Mini",
|
||||||
|
"version": "gpt-5.4-mini",
|
||||||
|
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
|
||||||
|
"context_length": 400000,
|
||||||
|
"max_completion_tokens": 128000,
|
||||||
|
"supported_parameters": [
|
||||||
|
"tools"
|
||||||
|
],
|
||||||
|
"thinking": {
|
||||||
|
"levels": [
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"xhigh"
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"codex-pro": [
|
"codex-pro": [
|
||||||
@@ -2173,55 +2297,40 @@
|
|||||||
"xhigh"
|
"xhigh"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "gpt-5.4-mini",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1773705600,
|
||||||
|
"owned_by": "openai",
|
||||||
|
"type": "openai",
|
||||||
|
"display_name": "GPT 5.4 Mini",
|
||||||
|
"version": "gpt-5.4-mini",
|
||||||
|
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
|
||||||
|
"context_length": 400000,
|
||||||
|
"max_completion_tokens": 128000,
|
||||||
|
"supported_parameters": [
|
||||||
|
"tools"
|
||||||
|
],
|
||||||
|
"thinking": {
|
||||||
|
"levels": [
|
||||||
|
"low",
|
||||||
|
"medium",
|
||||||
|
"high",
|
||||||
|
"xhigh"
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"qwen": [
|
"qwen": [
|
||||||
{
|
|
||||||
"id": "qwen3-coder-plus",
|
|
||||||
"object": "model",
|
|
||||||
"created": 1753228800,
|
|
||||||
"owned_by": "qwen",
|
|
||||||
"type": "qwen",
|
|
||||||
"display_name": "Qwen3 Coder Plus",
|
|
||||||
"version": "3.0",
|
|
||||||
"description": "Advanced code generation and understanding model",
|
|
||||||
"context_length": 32768,
|
|
||||||
"max_completion_tokens": 8192,
|
|
||||||
"supported_parameters": [
|
|
||||||
"temperature",
|
|
||||||
"top_p",
|
|
||||||
"max_tokens",
|
|
||||||
"stream",
|
|
||||||
"stop"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "qwen3-coder-flash",
|
|
||||||
"object": "model",
|
|
||||||
"created": 1753228800,
|
|
||||||
"owned_by": "qwen",
|
|
||||||
"type": "qwen",
|
|
||||||
"display_name": "Qwen3 Coder Flash",
|
|
||||||
"version": "3.0",
|
|
||||||
"description": "Fast code generation model",
|
|
||||||
"context_length": 8192,
|
|
||||||
"max_completion_tokens": 2048,
|
|
||||||
"supported_parameters": [
|
|
||||||
"temperature",
|
|
||||||
"top_p",
|
|
||||||
"max_tokens",
|
|
||||||
"stream",
|
|
||||||
"stop"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": "coder-model",
|
"id": "coder-model",
|
||||||
"object": "model",
|
"object": "model",
|
||||||
"created": 1771171200,
|
"created": 1771171200,
|
||||||
"owned_by": "qwen",
|
"owned_by": "qwen",
|
||||||
"type": "qwen",
|
"type": "qwen",
|
||||||
"display_name": "Qwen 3.5 Plus",
|
"display_name": "Qwen 3.6 Plus",
|
||||||
"version": "3.5",
|
"version": "3.6",
|
||||||
"description": "efficient hybrid model with leading coding performance",
|
"description": "efficient hybrid model with leading coding performance",
|
||||||
"context_length": 1048576,
|
"context_length": 1048576,
|
||||||
"max_completion_tokens": 65536,
|
"max_completion_tokens": 65536,
|
||||||
@@ -2232,25 +2341,6 @@
|
|||||||
"stream",
|
"stream",
|
||||||
"stop"
|
"stop"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "vision-model",
|
|
||||||
"object": "model",
|
|
||||||
"created": 1758672000,
|
|
||||||
"owned_by": "qwen",
|
|
||||||
"type": "qwen",
|
|
||||||
"display_name": "Qwen3 Vision Model",
|
|
||||||
"version": "3.0",
|
|
||||||
"description": "Vision model model",
|
|
||||||
"context_length": 32768,
|
|
||||||
"max_completion_tokens": 2048,
|
|
||||||
"supported_parameters": [
|
|
||||||
"temperature",
|
|
||||||
"top_p",
|
|
||||||
"max_tokens",
|
|
||||||
"stream",
|
|
||||||
"stop"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"iflow": [
|
"iflow": [
|
||||||
@@ -2639,11 +2729,12 @@
|
|||||||
"context_length": 1048576,
|
"context_length": 1048576,
|
||||||
"max_completion_tokens": 65535,
|
"max_completion_tokens": 65535,
|
||||||
"thinking": {
|
"thinking": {
|
||||||
"min": 128,
|
"min": 1,
|
||||||
"max": 32768,
|
"max": 65535,
|
||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"low",
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -2659,11 +2750,12 @@
|
|||||||
"context_length": 1048576,
|
"context_length": 1048576,
|
||||||
"max_completion_tokens": 65535,
|
"max_completion_tokens": 65535,
|
||||||
"thinking": {
|
"thinking": {
|
||||||
"min": 128,
|
"min": 1,
|
||||||
"max": 32768,
|
"max": 65535,
|
||||||
"dynamic_allowed": true,
|
"dynamic_allowed": true,
|
||||||
"levels": [
|
"levels": [
|
||||||
"low",
|
"low",
|
||||||
|
"medium",
|
||||||
"high"
|
"high"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ import (
|
|||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||||
@@ -45,7 +46,7 @@ const (
|
|||||||
antigravityGeneratePath = "/v1internal:generateContent"
|
antigravityGeneratePath = "/v1internal:generateContent"
|
||||||
antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
|
antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
|
||||||
antigravityClientSecret = "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"
|
antigravityClientSecret = "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"
|
||||||
defaultAntigravityAgent = "antigravity/1.19.6 darwin/arm64"
|
defaultAntigravityAgent = "antigravity/1.21.9 darwin/arm64" // fallback only; overridden at runtime by misc.AntigravityUserAgent()
|
||||||
antigravityAuthType = "antigravity"
|
antigravityAuthType = "antigravity"
|
||||||
refreshSkew = 3000 * time.Second
|
refreshSkew = 3000 * time.Second
|
||||||
antigravityCreditsRetryTTL = 5 * time.Hour
|
antigravityCreditsRetryTTL = 5 * time.Hour
|
||||||
@@ -1739,7 +1740,7 @@ func resolveUserAgent(auth *cliproxyauth.Auth) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return defaultAntigravityAgent
|
return misc.AntigravityUserAgent()
|
||||||
}
|
}
|
||||||
|
|
||||||
func antigravityRetryAttempts(auth *cliproxyauth.Auth, cfg *config.Config) int {
|
func antigravityRetryAttempts(auth *cliproxyauth.Auth, cfg *config.Config) int {
|
||||||
|
|||||||
@@ -137,6 +137,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
|||||||
|
|
||||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||||
body = disableThinkingIfToolChoiceForced(body)
|
body = disableThinkingIfToolChoiceForced(body)
|
||||||
|
body = normalizeClaudeTemperatureForThinking(body)
|
||||||
|
|
||||||
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
|
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
|
||||||
if countCacheControls(body) == 0 {
|
if countCacheControls(body) == 0 {
|
||||||
@@ -307,6 +308,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
|||||||
|
|
||||||
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
|
||||||
body = disableThinkingIfToolChoiceForced(body)
|
body = disableThinkingIfToolChoiceForced(body)
|
||||||
|
body = normalizeClaudeTemperatureForThinking(body)
|
||||||
|
|
||||||
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
|
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
|
||||||
if countCacheControls(body) == 0 {
|
if countCacheControls(body) == 0 {
|
||||||
@@ -651,6 +653,25 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte {
|
|||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// normalizeClaudeTemperatureForThinking keeps Anthropic message requests valid when
|
||||||
|
// thinking is enabled. Anthropic rejects temperatures other than 1 when
|
||||||
|
// thinking.type is enabled/adaptive/auto.
|
||||||
|
func normalizeClaudeTemperatureForThinking(body []byte) []byte {
|
||||||
|
if !gjson.GetBytes(body, "temperature").Exists() {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
thinkingType := strings.ToLower(strings.TrimSpace(gjson.GetBytes(body, "thinking.type").String()))
|
||||||
|
switch thinkingType {
|
||||||
|
case "enabled", "adaptive", "auto":
|
||||||
|
if temp := gjson.GetBytes(body, "temperature"); temp.Exists() && temp.Type == gjson.Number && temp.Float() == 1 {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
body, _ = sjson.SetBytes(body, "temperature", 1)
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
type compositeReadCloser struct {
|
type compositeReadCloser struct {
|
||||||
io.Reader
|
io.Reader
|
||||||
closers []func() error
|
closers []func() error
|
||||||
@@ -827,6 +848,14 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
|
|||||||
hasClaude1MHeader = true
|
hasClaude1MHeader = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Also check auth attributes — GitLab Duo sets gitlab_duo_force_context_1m
|
||||||
|
// when routing through the Anthropic gateway, but the gin headers won't have
|
||||||
|
// X-CPA-CLAUDE-1M because the request is internally constructed.
|
||||||
|
if !hasClaude1MHeader && auth != nil && auth.Attributes != nil {
|
||||||
|
if auth.Attributes["gitlab_duo_force_context_1m"] == "true" {
|
||||||
|
hasClaude1MHeader = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Merge extra betas from request body and request flags.
|
// Merge extra betas from request body and request flags.
|
||||||
if len(extraBetas) > 0 || hasClaude1MHeader {
|
if len(extraBetas) > 0 || hasClaude1MHeader {
|
||||||
|
|||||||
@@ -1833,3 +1833,43 @@ func TestApplyCloaking_PreservesConfiguredStrictModeAndSensitiveWordsWhenModeOmi
|
|||||||
t.Fatalf("expected configured sensitive word obfuscation to apply, got %q", got)
|
t.Fatalf("expected configured sensitive word obfuscation to apply, got %q", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNormalizeClaudeTemperatureForThinking_AdaptiveCoercesToOne(t *testing.T) {
|
||||||
|
payload := []byte(`{"temperature":0,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"}}`)
|
||||||
|
out := normalizeClaudeTemperatureForThinking(payload)
|
||||||
|
|
||||||
|
if got := gjson.GetBytes(out, "temperature").Float(); got != 1 {
|
||||||
|
t.Fatalf("temperature = %v, want 1", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeClaudeTemperatureForThinking_EnabledCoercesToOne(t *testing.T) {
|
||||||
|
payload := []byte(`{"temperature":0.2,"thinking":{"type":"enabled","budget_tokens":2048}}`)
|
||||||
|
out := normalizeClaudeTemperatureForThinking(payload)
|
||||||
|
|
||||||
|
if got := gjson.GetBytes(out, "temperature").Float(); got != 1 {
|
||||||
|
t.Fatalf("temperature = %v, want 1", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeClaudeTemperatureForThinking_NoThinkingLeavesTemperatureAlone(t *testing.T) {
|
||||||
|
payload := []byte(`{"temperature":0,"messages":[{"role":"user","content":"hi"}]}`)
|
||||||
|
out := normalizeClaudeTemperatureForThinking(payload)
|
||||||
|
|
||||||
|
if got := gjson.GetBytes(out, "temperature").Float(); got != 0 {
|
||||||
|
t.Fatalf("temperature = %v, want 0", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeClaudeTemperatureForThinking_AfterForcedToolChoiceKeepsOriginalTemperature(t *testing.T) {
|
||||||
|
payload := []byte(`{"temperature":0,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"},"tool_choice":{"type":"any"}}`)
|
||||||
|
out := disableThinkingIfToolChoiceForced(payload)
|
||||||
|
out = normalizeClaudeTemperatureForThinking(out)
|
||||||
|
|
||||||
|
if gjson.GetBytes(out, "thinking").Exists() {
|
||||||
|
t.Fatalf("thinking should be removed when tool_choice forces tool use")
|
||||||
|
}
|
||||||
|
if got := gjson.GetBytes(out, "temperature").Float(); got != 0 {
|
||||||
|
t.Fatalf("temperature = %v, want 0", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -17,6 +18,7 @@ import (
|
|||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||||
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
|
||||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
@@ -40,7 +42,7 @@ const (
|
|||||||
copilotEditorVersion = "vscode/1.107.0"
|
copilotEditorVersion = "vscode/1.107.0"
|
||||||
copilotPluginVersion = "copilot-chat/0.35.0"
|
copilotPluginVersion = "copilot-chat/0.35.0"
|
||||||
copilotIntegrationID = "vscode-chat"
|
copilotIntegrationID = "vscode-chat"
|
||||||
copilotOpenAIIntent = "conversation-panel"
|
copilotOpenAIIntent = "conversation-edits"
|
||||||
copilotGitHubAPIVer = "2025-04-01"
|
copilotGitHubAPIVer = "2025-04-01"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -126,6 +128,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
|
|||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
body = e.normalizeModel(req.Model, body)
|
body = e.normalizeModel(req.Model, body)
|
||||||
body = flattenAssistantContent(body)
|
body = flattenAssistantContent(body)
|
||||||
|
body = stripUnsupportedBetas(body)
|
||||||
|
|
||||||
// Detect vision content before input normalization removes messages
|
// Detect vision content before input normalization removes messages
|
||||||
hasVision := detectVisionContent(body)
|
hasVision := detectVisionContent(body)
|
||||||
@@ -142,6 +145,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
|
|||||||
if useResponses {
|
if useResponses {
|
||||||
body = normalizeGitHubCopilotResponsesInput(body)
|
body = normalizeGitHubCopilotResponsesInput(body)
|
||||||
body = normalizeGitHubCopilotResponsesTools(body)
|
body = normalizeGitHubCopilotResponsesTools(body)
|
||||||
|
body = applyGitHubCopilotResponsesDefaults(body)
|
||||||
} else {
|
} else {
|
||||||
body = normalizeGitHubCopilotChatTools(body)
|
body = normalizeGitHubCopilotChatTools(body)
|
||||||
}
|
}
|
||||||
@@ -225,9 +229,10 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
|
|||||||
if useResponses && from.String() == "claude" {
|
if useResponses && from.String() == "claude" {
|
||||||
converted = translateGitHubCopilotResponsesNonStreamToClaude(data)
|
converted = translateGitHubCopilotResponsesNonStreamToClaude(data)
|
||||||
} else {
|
} else {
|
||||||
|
data = normalizeGitHubCopilotReasoningField(data)
|
||||||
converted = sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m)
|
converted = sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m)
|
||||||
}
|
}
|
||||||
resp = cliproxyexecutor.Response{Payload: converted}
|
resp = cliproxyexecutor.Response{Payload: converted, Headers: httpResp.Header.Clone()}
|
||||||
reporter.ensurePublished(ctx)
|
reporter.ensurePublished(ctx)
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
@@ -256,6 +261,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
|
|||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
body = e.normalizeModel(req.Model, body)
|
body = e.normalizeModel(req.Model, body)
|
||||||
body = flattenAssistantContent(body)
|
body = flattenAssistantContent(body)
|
||||||
|
body = stripUnsupportedBetas(body)
|
||||||
|
|
||||||
// Detect vision content before input normalization removes messages
|
// Detect vision content before input normalization removes messages
|
||||||
hasVision := detectVisionContent(body)
|
hasVision := detectVisionContent(body)
|
||||||
@@ -272,6 +278,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
|
|||||||
if useResponses {
|
if useResponses {
|
||||||
body = normalizeGitHubCopilotResponsesInput(body)
|
body = normalizeGitHubCopilotResponsesInput(body)
|
||||||
body = normalizeGitHubCopilotResponsesTools(body)
|
body = normalizeGitHubCopilotResponsesTools(body)
|
||||||
|
body = applyGitHubCopilotResponsesDefaults(body)
|
||||||
} else {
|
} else {
|
||||||
body = normalizeGitHubCopilotChatTools(body)
|
body = normalizeGitHubCopilotChatTools(body)
|
||||||
}
|
}
|
||||||
@@ -378,7 +385,20 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
|
|||||||
if useResponses && from.String() == "claude" {
|
if useResponses && from.String() == "claude" {
|
||||||
chunks = translateGitHubCopilotResponsesStreamToClaude(bytes.Clone(line), ¶m)
|
chunks = translateGitHubCopilotResponsesStreamToClaude(bytes.Clone(line), ¶m)
|
||||||
} else {
|
} else {
|
||||||
chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), ¶m)
|
// Strip SSE "data: " prefix before reasoning field normalization,
|
||||||
|
// since normalizeGitHubCopilotReasoningField expects pure JSON.
|
||||||
|
// Re-wrap with the prefix afterward for the translator.
|
||||||
|
normalizedLine := bytes.Clone(line)
|
||||||
|
if bytes.HasPrefix(line, dataTag) {
|
||||||
|
sseData := bytes.TrimSpace(line[len(dataTag):])
|
||||||
|
if !bytes.Equal(sseData, []byte("[DONE]")) && gjson.ValidBytes(sseData) {
|
||||||
|
normalized := normalizeGitHubCopilotReasoningField(bytes.Clone(sseData))
|
||||||
|
if !bytes.Equal(normalized, sseData) {
|
||||||
|
normalizedLine = append(append([]byte(nil), dataTag...), normalized...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, normalizedLine, ¶m)
|
||||||
}
|
}
|
||||||
for i := range chunks {
|
for i := range chunks {
|
||||||
out <- cliproxyexecutor.StreamChunk{Payload: bytes.Clone(chunks[i])}
|
out <- cliproxyexecutor.StreamChunk{Payload: bytes.Clone(chunks[i])}
|
||||||
@@ -400,9 +420,28 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// CountTokens is not supported for GitHub Copilot.
|
// CountTokens estimates token count locally using tiktoken, since the GitHub
|
||||||
func (e *GitHubCopilotExecutor) CountTokens(_ context.Context, _ *cliproxyauth.Auth, _ cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
// Copilot API does not expose a dedicated token counting endpoint.
|
||||||
return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported for github-copilot"}
|
func (e *GitHubCopilotExecutor) CountTokens(ctx context.Context, _ *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
|
||||||
|
baseModel := thinking.ParseSuffix(req.Model).ModelName
|
||||||
|
|
||||||
|
from := opts.SourceFormat
|
||||||
|
to := sdktranslator.FromString("openai")
|
||||||
|
translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
|
||||||
|
|
||||||
|
enc, err := helps.TokenizerForModel(baseModel)
|
||||||
|
if err != nil {
|
||||||
|
return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: tokenizer init failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
count, err := helps.CountOpenAIChatTokens(enc, translated)
|
||||||
|
if err != nil {
|
||||||
|
return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: token counting failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
usageJSON := helps.BuildOpenAIUsageJSON(count)
|
||||||
|
translatedUsage := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON)
|
||||||
|
return cliproxyexecutor.Response{Payload: translatedUsage}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Refresh validates the GitHub token is still working.
|
// Refresh validates the GitHub token is still working.
|
||||||
@@ -491,46 +530,127 @@ func (e *GitHubCopilotExecutor) applyHeaders(r *http.Request, apiToken string, b
|
|||||||
r.Header.Set("X-Request-Id", uuid.NewString())
|
r.Header.Set("X-Request-Id", uuid.NewString())
|
||||||
|
|
||||||
initiator := "user"
|
initiator := "user"
|
||||||
if role := detectLastConversationRole(body); role == "assistant" || role == "tool" {
|
if isAgentInitiated(body) {
|
||||||
initiator = "agent"
|
initiator = "agent"
|
||||||
}
|
}
|
||||||
r.Header.Set("X-Initiator", initiator)
|
r.Header.Set("X-Initiator", initiator)
|
||||||
}
|
}
|
||||||
|
|
||||||
func detectLastConversationRole(body []byte) string {
|
// isAgentInitiated determines whether the current request is agent-initiated
|
||||||
|
// (tool callbacks, continuations) rather than user-initiated (new user prompt).
|
||||||
|
//
|
||||||
|
// GitHub Copilot uses the X-Initiator header for billing:
|
||||||
|
// - "user" → consumes premium request quota
|
||||||
|
// - "agent" → free (tool loops, continuations)
|
||||||
|
//
|
||||||
|
// The challenge: Claude Code sends tool results as role:"user" messages with
|
||||||
|
// content type "tool_result". After translation to OpenAI format, the tool_result
|
||||||
|
// part becomes a separate role:"tool" message, but if the original Claude message
|
||||||
|
// also contained text content (e.g. skill invocations, attachment descriptions),
|
||||||
|
// a role:"user" message is emitted AFTER the tool message, making the last message
|
||||||
|
// appear user-initiated when it's actually part of an agent tool loop.
|
||||||
|
//
|
||||||
|
// VSCode Copilot Chat solves this with explicit flags (iterationNumber,
|
||||||
|
// isContinuation, subAgentInvocationId). Since CPA doesn't have these flags,
|
||||||
|
// we infer agent status by checking whether the conversation contains prior
|
||||||
|
// assistant/tool messages — if it does, the current request is a continuation.
|
||||||
|
//
|
||||||
|
// References:
|
||||||
|
// - opencode#8030, opencode#15824: same root cause and fix approach
|
||||||
|
// - vscode-copilot-chat: toolCallingLoop.ts (iterationNumber === 0)
|
||||||
|
// - pi-ai: github-copilot-headers.ts (last message role check)
|
||||||
|
func isAgentInitiated(body []byte) bool {
|
||||||
if len(body) == 0 {
|
if len(body) == 0 {
|
||||||
return ""
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Chat Completions API: check messages array
|
||||||
if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
|
if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
|
||||||
arr := messages.Array()
|
arr := messages.Array()
|
||||||
|
if len(arr) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
lastRole := ""
|
||||||
for i := len(arr) - 1; i >= 0; i-- {
|
for i := len(arr) - 1; i >= 0; i-- {
|
||||||
if role := arr[i].Get("role").String(); role != "" {
|
if r := arr[i].Get("role").String(); r != "" {
|
||||||
return role
|
lastRole = r
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If last message is assistant or tool, clearly agent-initiated.
|
||||||
|
if lastRole == "assistant" || lastRole == "tool" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// If last message is "user", check whether it contains tool results
|
||||||
|
// (indicating a tool-loop continuation) or if the preceding message
|
||||||
|
// is an assistant tool_use. This is more precise than checking for
|
||||||
|
// any prior assistant message, which would false-positive on genuine
|
||||||
|
// multi-turn follow-ups.
|
||||||
|
if lastRole == "user" {
|
||||||
|
// Check if the last user message contains tool_result content
|
||||||
|
lastContent := arr[len(arr)-1].Get("content")
|
||||||
|
if lastContent.Exists() && lastContent.IsArray() {
|
||||||
|
for _, part := range lastContent.Array() {
|
||||||
|
if part.Get("type").String() == "tool_result" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check if the second-to-last message is an assistant with tool_use
|
||||||
|
if len(arr) >= 2 {
|
||||||
|
prev := arr[len(arr)-2]
|
||||||
|
if prev.Get("role").String() == "assistant" {
|
||||||
|
prevContent := prev.Get("content")
|
||||||
|
if prevContent.Exists() && prevContent.IsArray() {
|
||||||
|
for _, part := range prevContent.Array() {
|
||||||
|
if part.Get("type").String() == "tool_use" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Responses API: check input array
|
||||||
if inputs := gjson.GetBytes(body, "input"); inputs.Exists() && inputs.IsArray() {
|
if inputs := gjson.GetBytes(body, "input"); inputs.Exists() && inputs.IsArray() {
|
||||||
arr := inputs.Array()
|
arr := inputs.Array()
|
||||||
for i := len(arr) - 1; i >= 0; i-- {
|
if len(arr) == 0 {
|
||||||
item := arr[i]
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// Most Responses input items carry a top-level role.
|
// Check last item
|
||||||
if role := item.Get("role").String(); role != "" {
|
last := arr[len(arr)-1]
|
||||||
return role
|
if role := last.Get("role").String(); role == "assistant" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
switch last.Get("type").String() {
|
||||||
|
case "function_call", "function_call_arguments", "computer_call":
|
||||||
|
return true
|
||||||
|
case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// If last item is user-role, check for prior non-user items
|
||||||
|
for _, item := range arr {
|
||||||
|
if role := item.Get("role").String(); role == "assistant" {
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
switch item.Get("type").String() {
|
switch item.Get("type").String() {
|
||||||
case "function_call", "function_call_arguments", "computer_call":
|
case "function_call", "function_call_output", "function_call_response",
|
||||||
return "assistant"
|
"function_call_arguments", "computer_call", "computer_call_output":
|
||||||
case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
|
return true
|
||||||
return "tool"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ""
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// detectVisionContent checks if the request body contains vision/image content.
|
// detectVisionContent checks if the request body contains vision/image content.
|
||||||
@@ -572,6 +692,85 @@ func (e *GitHubCopilotExecutor) normalizeModel(model string, body []byte) []byte
|
|||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// copilotUnsupportedBetas lists beta headers that are Anthropic-specific and
|
||||||
|
// must not be forwarded to GitHub Copilot. The context-1m beta enables 1M
|
||||||
|
// context on Anthropic's API, but Copilot's Claude models are limited to
|
||||||
|
// ~128K-200K. Passing it through would not enable 1M on Copilot, but stripping
|
||||||
|
// it from the translated body avoids confusing downstream translators.
|
||||||
|
var copilotUnsupportedBetas = []string{
|
||||||
|
"context-1m-2025-08-07",
|
||||||
|
}
|
||||||
|
|
||||||
|
// stripUnsupportedBetas removes Anthropic-specific beta entries from the
|
||||||
|
// translated request body. In OpenAI format the betas may appear under
|
||||||
|
// "metadata.betas" or a top-level "betas" array; in Claude format they sit at
|
||||||
|
// "betas". This function checks all known locations.
|
||||||
|
func stripUnsupportedBetas(body []byte) []byte {
|
||||||
|
betaPaths := []string{"betas", "metadata.betas"}
|
||||||
|
for _, path := range betaPaths {
|
||||||
|
arr := gjson.GetBytes(body, path)
|
||||||
|
if !arr.Exists() || !arr.IsArray() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var filtered []string
|
||||||
|
changed := false
|
||||||
|
for _, item := range arr.Array() {
|
||||||
|
beta := item.String()
|
||||||
|
if isCopilotUnsupportedBeta(beta) {
|
||||||
|
changed = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
filtered = append(filtered, beta)
|
||||||
|
}
|
||||||
|
if !changed {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if len(filtered) == 0 {
|
||||||
|
body, _ = sjson.DeleteBytes(body, path)
|
||||||
|
} else {
|
||||||
|
body, _ = sjson.SetBytes(body, path, filtered)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
func isCopilotUnsupportedBeta(beta string) bool {
|
||||||
|
return slices.Contains(copilotUnsupportedBetas, beta)
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeGitHubCopilotReasoningField maps Copilot's non-standard
|
||||||
|
// 'reasoning_text' field to the standard OpenAI 'reasoning_content' field
|
||||||
|
// that the SDK translator expects. This handles both streaming deltas
|
||||||
|
// (choices[].delta.reasoning_text) and non-streaming messages
|
||||||
|
// (choices[].message.reasoning_text). The field is only renamed when
|
||||||
|
// 'reasoning_content' is absent or null, preserving standard responses.
|
||||||
|
// All choices are processed to support n>1 requests.
|
||||||
|
func normalizeGitHubCopilotReasoningField(data []byte) []byte {
|
||||||
|
choices := gjson.GetBytes(data, "choices")
|
||||||
|
if !choices.Exists() || !choices.IsArray() {
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
for i := range choices.Array() {
|
||||||
|
// Non-streaming: choices[i].message.reasoning_text
|
||||||
|
msgRT := fmt.Sprintf("choices.%d.message.reasoning_text", i)
|
||||||
|
msgRC := fmt.Sprintf("choices.%d.message.reasoning_content", i)
|
||||||
|
if rt := gjson.GetBytes(data, msgRT); rt.Exists() && rt.String() != "" {
|
||||||
|
if rc := gjson.GetBytes(data, msgRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
|
||||||
|
data, _ = sjson.SetBytes(data, msgRC, rt.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Streaming: choices[i].delta.reasoning_text
|
||||||
|
deltaRT := fmt.Sprintf("choices.%d.delta.reasoning_text", i)
|
||||||
|
deltaRC := fmt.Sprintf("choices.%d.delta.reasoning_content", i)
|
||||||
|
if rt := gjson.GetBytes(data, deltaRT); rt.Exists() && rt.String() != "" {
|
||||||
|
if rc := gjson.GetBytes(data, deltaRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
|
||||||
|
data, _ = sjson.SetBytes(data, deltaRC, rt.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format, model string) bool {
|
func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format, model string) bool {
|
||||||
if sourceFormat.String() == "openai-response" {
|
if sourceFormat.String() == "openai-response" {
|
||||||
return true
|
return true
|
||||||
@@ -596,12 +795,7 @@ func lookupGitHubCopilotStaticModelInfo(model string) *registry.ModelInfo {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func containsEndpoint(endpoints []string, endpoint string) bool {
|
func containsEndpoint(endpoints []string, endpoint string) bool {
|
||||||
for _, item := range endpoints {
|
return slices.Contains(endpoints, endpoint)
|
||||||
if item == endpoint {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// flattenAssistantContent converts assistant message content from array format
|
// flattenAssistantContent converts assistant message content from array format
|
||||||
@@ -856,6 +1050,32 @@ func stripGitHubCopilotResponsesUnsupportedFields(body []byte) []byte {
|
|||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// applyGitHubCopilotResponsesDefaults sets required fields for the Responses API
|
||||||
|
// that both vscode-copilot-chat and pi-ai always include.
|
||||||
|
//
|
||||||
|
// References:
|
||||||
|
// - vscode-copilot-chat: src/platform/endpoint/node/responsesApi.ts
|
||||||
|
// - pi-ai (badlogic/pi-mono): packages/ai/src/providers/openai-responses.ts
|
||||||
|
func applyGitHubCopilotResponsesDefaults(body []byte) []byte {
|
||||||
|
// store: false — prevents request/response storage
|
||||||
|
if !gjson.GetBytes(body, "store").Exists() {
|
||||||
|
body, _ = sjson.SetBytes(body, "store", false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// include: ["reasoning.encrypted_content"] — enables reasoning content
|
||||||
|
// reuse across turns, avoiding redundant computation
|
||||||
|
if !gjson.GetBytes(body, "include").Exists() {
|
||||||
|
body, _ = sjson.SetRawBytes(body, "include", []byte(`["reasoning.encrypted_content"]`))
|
||||||
|
}
|
||||||
|
|
||||||
|
// If reasoning.effort is set but reasoning.summary is not, default to "auto"
|
||||||
|
if gjson.GetBytes(body, "reasoning.effort").Exists() && !gjson.GetBytes(body, "reasoning.summary").Exists() {
|
||||||
|
body, _ = sjson.SetBytes(body, "reasoning.summary", "auto")
|
||||||
|
}
|
||||||
|
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
func normalizeGitHubCopilotResponsesTools(body []byte) []byte {
|
func normalizeGitHubCopilotResponsesTools(body []byte) []byte {
|
||||||
tools := gjson.GetBytes(body, "tools")
|
tools := gjson.GetBytes(body, "tools")
|
||||||
if tools.Exists() {
|
if tools.Exists() {
|
||||||
@@ -1406,6 +1626,21 @@ func FetchGitHubCopilotModels(ctx context.Context, auth *cliproxyauth.Auth, cfg
|
|||||||
m.MaxCompletionTokens = defaultCopilotMaxCompletionTokens
|
m.MaxCompletionTokens = defaultCopilotMaxCompletionTokens
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Override with real limits from the Copilot API when available.
|
||||||
|
// The API returns per-account limits (individual vs business) under
|
||||||
|
// capabilities.limits, which are more accurate than our static
|
||||||
|
// fallback values. We use max_prompt_tokens as ContextLength because
|
||||||
|
// that's the hard limit the Copilot API enforces on prompt size —
|
||||||
|
// exceeding it triggers "prompt token count exceeds the limit" errors.
|
||||||
|
if limits := entry.Limits(); limits != nil {
|
||||||
|
if limits.MaxPromptTokens > 0 {
|
||||||
|
m.ContextLength = limits.MaxPromptTokens
|
||||||
|
}
|
||||||
|
if limits.MaxOutputTokens > 0 {
|
||||||
|
m.MaxCompletionTokens = limits.MaxOutputTokens
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
models = append(models, m)
|
models = append(models, m)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,14 @@
|
|||||||
package executor
|
package executor
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
copilotauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||||
|
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||||
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
)
|
)
|
||||||
@@ -72,7 +75,7 @@ func TestUseGitHubCopilotResponsesEndpoint_CodexModel(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing.T) {
|
func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing.T) {
|
||||||
t.Parallel()
|
// Not parallel: shares global model registry with DynamicRegistryWinsOverStatic.
|
||||||
if !useGitHubCopilotResponsesEndpoint(sdktranslator.FromString("openai"), "gpt-5.4") {
|
if !useGitHubCopilotResponsesEndpoint(sdktranslator.FromString("openai"), "gpt-5.4") {
|
||||||
t.Fatal("expected responses-only registry model to use /responses")
|
t.Fatal("expected responses-only registry model to use /responses")
|
||||||
}
|
}
|
||||||
@@ -82,7 +85,7 @@ func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestUseGitHubCopilotResponsesEndpoint_DynamicRegistryWinsOverStatic(t *testing.T) {
|
func TestUseGitHubCopilotResponsesEndpoint_DynamicRegistryWinsOverStatic(t *testing.T) {
|
||||||
t.Parallel()
|
// Not parallel: mutates global model registry, conflicts with RegistryResponsesOnlyModel.
|
||||||
|
|
||||||
reg := registry.GetGlobalRegistry()
|
reg := registry.GetGlobalRegistry()
|
||||||
clientID := "github-copilot-test-client"
|
clientID := "github-copilot-test-client"
|
||||||
@@ -251,14 +254,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_TextMapping(t *testing
|
|||||||
t.Parallel()
|
t.Parallel()
|
||||||
resp := []byte(`{"id":"resp_1","model":"gpt-5-codex","output":[{"type":"message","content":[{"type":"output_text","text":"hello"}]}],"usage":{"input_tokens":3,"output_tokens":5}}`)
|
resp := []byte(`{"id":"resp_1","model":"gpt-5-codex","output":[{"type":"message","content":[{"type":"output_text","text":"hello"}]}],"usage":{"input_tokens":3,"output_tokens":5}}`)
|
||||||
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
|
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
|
||||||
if gjson.Get(out, "type").String() != "message" {
|
if gjson.GetBytes(out, "type").String() != "message" {
|
||||||
t.Fatalf("type = %q, want message", gjson.Get(out, "type").String())
|
t.Fatalf("type = %q, want message", gjson.GetBytes(out, "type").String())
|
||||||
}
|
}
|
||||||
if gjson.Get(out, "content.0.type").String() != "text" {
|
if gjson.GetBytes(out, "content.0.type").String() != "text" {
|
||||||
t.Fatalf("content.0.type = %q, want text", gjson.Get(out, "content.0.type").String())
|
t.Fatalf("content.0.type = %q, want text", gjson.GetBytes(out, "content.0.type").String())
|
||||||
}
|
}
|
||||||
if gjson.Get(out, "content.0.text").String() != "hello" {
|
if gjson.GetBytes(out, "content.0.text").String() != "hello" {
|
||||||
t.Fatalf("content.0.text = %q, want hello", gjson.Get(out, "content.0.text").String())
|
t.Fatalf("content.0.text = %q, want hello", gjson.GetBytes(out, "content.0.text").String())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -266,14 +269,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_ToolUseMapping(t *test
|
|||||||
t.Parallel()
|
t.Parallel()
|
||||||
resp := []byte(`{"id":"resp_2","model":"gpt-5-codex","output":[{"type":"function_call","id":"fc_1","call_id":"call_1","name":"sum","arguments":"{\"a\":1}"}],"usage":{"input_tokens":1,"output_tokens":2}}`)
|
resp := []byte(`{"id":"resp_2","model":"gpt-5-codex","output":[{"type":"function_call","id":"fc_1","call_id":"call_1","name":"sum","arguments":"{\"a\":1}"}],"usage":{"input_tokens":1,"output_tokens":2}}`)
|
||||||
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
|
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
|
||||||
if gjson.Get(out, "content.0.type").String() != "tool_use" {
|
if gjson.GetBytes(out, "content.0.type").String() != "tool_use" {
|
||||||
t.Fatalf("content.0.type = %q, want tool_use", gjson.Get(out, "content.0.type").String())
|
t.Fatalf("content.0.type = %q, want tool_use", gjson.GetBytes(out, "content.0.type").String())
|
||||||
}
|
}
|
||||||
if gjson.Get(out, "content.0.name").String() != "sum" {
|
if gjson.GetBytes(out, "content.0.name").String() != "sum" {
|
||||||
t.Fatalf("content.0.name = %q, want sum", gjson.Get(out, "content.0.name").String())
|
t.Fatalf("content.0.name = %q, want sum", gjson.GetBytes(out, "content.0.name").String())
|
||||||
}
|
}
|
||||||
if gjson.Get(out, "stop_reason").String() != "tool_use" {
|
if gjson.GetBytes(out, "stop_reason").String() != "tool_use" {
|
||||||
t.Fatalf("stop_reason = %q, want tool_use", gjson.Get(out, "stop_reason").String())
|
t.Fatalf("stop_reason = %q, want tool_use", gjson.GetBytes(out, "stop_reason").String())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -282,18 +285,24 @@ func TestTranslateGitHubCopilotResponsesStreamToClaude_TextLifecycle(t *testing.
|
|||||||
var param any
|
var param any
|
||||||
|
|
||||||
created := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.created","response":{"id":"resp_1","model":"gpt-5-codex"}}`), ¶m)
|
created := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.created","response":{"id":"resp_1","model":"gpt-5-codex"}}`), ¶m)
|
||||||
if len(created) == 0 || !strings.Contains(created[0], "message_start") {
|
if len(created) == 0 || !strings.Contains(string(created[0]), "message_start") {
|
||||||
t.Fatalf("created events = %#v, want message_start", created)
|
t.Fatalf("created events = %#v, want message_start", created)
|
||||||
}
|
}
|
||||||
|
|
||||||
delta := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.output_text.delta","delta":"he"}`), ¶m)
|
delta := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.output_text.delta","delta":"he"}`), ¶m)
|
||||||
joinedDelta := strings.Join(delta, "")
|
var joinedDelta string
|
||||||
|
for _, d := range delta {
|
||||||
|
joinedDelta += string(d)
|
||||||
|
}
|
||||||
if !strings.Contains(joinedDelta, "content_block_start") || !strings.Contains(joinedDelta, "text_delta") {
|
if !strings.Contains(joinedDelta, "content_block_start") || !strings.Contains(joinedDelta, "text_delta") {
|
||||||
t.Fatalf("delta events = %#v, want content_block_start + text_delta", delta)
|
t.Fatalf("delta events = %#v, want content_block_start + text_delta", delta)
|
||||||
}
|
}
|
||||||
|
|
||||||
completed := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.completed","response":{"usage":{"input_tokens":7,"output_tokens":9}}}`), ¶m)
|
completed := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.completed","response":{"usage":{"input_tokens":7,"output_tokens":9}}}`), ¶m)
|
||||||
joinedCompleted := strings.Join(completed, "")
|
var joinedCompleted string
|
||||||
|
for _, c := range completed {
|
||||||
|
joinedCompleted += string(c)
|
||||||
|
}
|
||||||
if !strings.Contains(joinedCompleted, "message_delta") || !strings.Contains(joinedCompleted, "message_stop") {
|
if !strings.Contains(joinedCompleted, "message_delta") || !strings.Contains(joinedCompleted, "message_stop") {
|
||||||
t.Fatalf("completed events = %#v, want message_delta + message_stop", completed)
|
t.Fatalf("completed events = %#v, want message_delta + message_stop", completed)
|
||||||
}
|
}
|
||||||
@@ -312,15 +321,17 @@ func TestApplyHeaders_XInitiator_UserOnly(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestApplyHeaders_XInitiator_UserWhenLastRoleIsUser(t *testing.T) {
|
func TestApplyHeaders_XInitiator_AgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
e := &GitHubCopilotExecutor{}
|
e := &GitHubCopilotExecutor{}
|
||||||
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
||||||
// Last role governs the initiator decision.
|
// When the last role is "user" and the message contains tool_result content,
|
||||||
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":"tool result here"}]}`)
|
// the request is a continuation (e.g. Claude tool result translated to a
|
||||||
|
// synthetic user message). Should be "agent".
|
||||||
|
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":[{"type":"tool_result","tool_use_id":"tu1","content":"file contents..."}]}]}`)
|
||||||
e.applyHeaders(req, "token", body)
|
e.applyHeaders(req, "token", body)
|
||||||
if got := req.Header.Get("X-Initiator"); got != "user" {
|
if got := req.Header.Get("X-Initiator"); got != "agent" {
|
||||||
t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
|
t.Fatalf("X-Initiator = %q, want agent (last user contains tool_result)", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -328,10 +339,11 @@ func TestApplyHeaders_XInitiator_AgentWithToolRole(t *testing.T) {
|
|||||||
t.Parallel()
|
t.Parallel()
|
||||||
e := &GitHubCopilotExecutor{}
|
e := &GitHubCopilotExecutor{}
|
||||||
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
||||||
|
// When the last message has role "tool", it's clearly agent-initiated.
|
||||||
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"tool","content":"result"}]}`)
|
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"tool","content":"result"}]}`)
|
||||||
e.applyHeaders(req, "token", body)
|
e.applyHeaders(req, "token", body)
|
||||||
if got := req.Header.Get("X-Initiator"); got != "agent" {
|
if got := req.Header.Get("X-Initiator"); got != "agent" {
|
||||||
t.Fatalf("X-Initiator = %q, want agent (tool role exists)", got)
|
t.Fatalf("X-Initiator = %q, want agent (last role is tool)", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -346,14 +358,15 @@ func TestApplyHeaders_XInitiator_InputArrayLastAssistantMessage(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestApplyHeaders_XInitiator_InputArrayLastUserMessage(t *testing.T) {
|
func TestApplyHeaders_XInitiator_InputArrayAgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
e := &GitHubCopilotExecutor{}
|
e := &GitHubCopilotExecutor{}
|
||||||
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
||||||
|
// Responses API: last item is user-role but history contains assistant → agent.
|
||||||
body := []byte(`{"input":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I can help"}]},{"type":"message","role":"user","content":[{"type":"input_text","text":"Do X"}]}]}`)
|
body := []byte(`{"input":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I can help"}]},{"type":"message","role":"user","content":[{"type":"input_text","text":"Do X"}]}]}`)
|
||||||
e.applyHeaders(req, "token", body)
|
e.applyHeaders(req, "token", body)
|
||||||
if got := req.Header.Get("X-Initiator"); got != "user" {
|
if got := req.Header.Get("X-Initiator"); got != "agent" {
|
||||||
t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
|
t.Fatalf("X-Initiator = %q, want agent (history has assistant)", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -368,6 +381,33 @@ func TestApplyHeaders_XInitiator_InputArrayLastFunctionCallOutput(t *testing.T)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestApplyHeaders_XInitiator_UserInMultiTurnNoTools(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
e := &GitHubCopilotExecutor{}
|
||||||
|
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
||||||
|
// Genuine multi-turn: user → assistant (plain text) → user follow-up.
|
||||||
|
// No tool messages → should be "user" (not a false-positive).
|
||||||
|
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"Hi there!"},{"role":"user","content":"what is 2+2?"}]}`)
|
||||||
|
e.applyHeaders(req, "token", body)
|
||||||
|
if got := req.Header.Get("X-Initiator"); got != "user" {
|
||||||
|
t.Fatalf("X-Initiator = %q, want user (genuine multi-turn, no tools)", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyHeaders_XInitiator_UserFollowUpAfterToolHistory(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
e := &GitHubCopilotExecutor{}
|
||||||
|
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
||||||
|
// User follow-up after a completed tool-use conversation.
|
||||||
|
// The last message is a genuine user question — should be "user", not "agent".
|
||||||
|
// This aligns with opencode's behavior: only active tool loops are agent-initiated.
|
||||||
|
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":[{"type":"tool_use","id":"tu1","name":"Read","input":{}}]},{"role":"tool","tool_call_id":"tu1","content":"file data"},{"role":"assistant","content":"I read the file."},{"role":"user","content":"What did we do so far?"}]}`)
|
||||||
|
e.applyHeaders(req, "token", body)
|
||||||
|
if got := req.Header.Get("X-Initiator"); got != "user" {
|
||||||
|
t.Fatalf("X-Initiator = %q, want user (genuine follow-up after tool history)", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// --- Tests for x-github-api-version header (Problem M) ---
|
// --- Tests for x-github-api-version header (Problem M) ---
|
||||||
|
|
||||||
func TestApplyHeaders_GitHubAPIVersion(t *testing.T) {
|
func TestApplyHeaders_GitHubAPIVersion(t *testing.T) {
|
||||||
@@ -414,3 +454,364 @@ func TestDetectVisionContent_NoMessages(t *testing.T) {
|
|||||||
t.Fatal("expected no vision content when messages field is absent")
|
t.Fatal("expected no vision content when messages field is absent")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --- Tests for applyGitHubCopilotResponsesDefaults ---
|
||||||
|
|
||||||
|
func TestApplyGitHubCopilotResponsesDefaults_SetsAllDefaults(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
body := []byte(`{"input":"hello","reasoning":{"effort":"medium"}}`)
|
||||||
|
got := applyGitHubCopilotResponsesDefaults(body)
|
||||||
|
|
||||||
|
if gjson.GetBytes(got, "store").Bool() != false {
|
||||||
|
t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
|
||||||
|
}
|
||||||
|
inc := gjson.GetBytes(got, "include")
|
||||||
|
if !inc.IsArray() || inc.Array()[0].String() != "reasoning.encrypted_content" {
|
||||||
|
t.Fatalf("include = %s, want [\"reasoning.encrypted_content\"]", inc.Raw)
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(got, "reasoning.summary").String() != "auto" {
|
||||||
|
t.Fatalf("reasoning.summary = %q, want auto", gjson.GetBytes(got, "reasoning.summary").String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyGitHubCopilotResponsesDefaults_DoesNotOverrideExisting(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
body := []byte(`{"input":"hello","store":true,"include":["other"],"reasoning":{"effort":"high","summary":"concise"}}`)
|
||||||
|
got := applyGitHubCopilotResponsesDefaults(body)
|
||||||
|
|
||||||
|
if gjson.GetBytes(got, "store").Bool() != true {
|
||||||
|
t.Fatalf("store should not be overridden, got %s", gjson.GetBytes(got, "store").Raw)
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(got, "include").Array()[0].String() != "other" {
|
||||||
|
t.Fatalf("include should not be overridden, got %s", gjson.GetBytes(got, "include").Raw)
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(got, "reasoning.summary").String() != "concise" {
|
||||||
|
t.Fatalf("reasoning.summary should not be overridden, got %q", gjson.GetBytes(got, "reasoning.summary").String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyGitHubCopilotResponsesDefaults_NoReasoningEffort(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
body := []byte(`{"input":"hello"}`)
|
||||||
|
got := applyGitHubCopilotResponsesDefaults(body)
|
||||||
|
|
||||||
|
if gjson.GetBytes(got, "store").Bool() != false {
|
||||||
|
t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
|
||||||
|
}
|
||||||
|
// reasoning.summary should NOT be set when reasoning.effort is absent
|
||||||
|
if gjson.GetBytes(got, "reasoning.summary").Exists() {
|
||||||
|
t.Fatalf("reasoning.summary should not be set when reasoning.effort is absent, got %q", gjson.GetBytes(got, "reasoning.summary").String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Tests for normalizeGitHubCopilotReasoningField ---
|
||||||
|
|
||||||
|
func TestNormalizeReasoningField_NonStreaming(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
data := []byte(`{"choices":[{"message":{"content":"hello","reasoning_text":"I think..."}}]}`)
|
||||||
|
got := normalizeGitHubCopilotReasoningField(data)
|
||||||
|
rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
|
||||||
|
if rc != "I think..." {
|
||||||
|
t.Fatalf("reasoning_content = %q, want %q", rc, "I think...")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeReasoningField_Streaming(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
data := []byte(`{"choices":[{"delta":{"reasoning_text":"thinking delta"}}]}`)
|
||||||
|
got := normalizeGitHubCopilotReasoningField(data)
|
||||||
|
rc := gjson.GetBytes(got, "choices.0.delta.reasoning_content").String()
|
||||||
|
if rc != "thinking delta" {
|
||||||
|
t.Fatalf("reasoning_content = %q, want %q", rc, "thinking delta")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeReasoningField_PreservesExistingReasoningContent(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
data := []byte(`{"choices":[{"message":{"reasoning_text":"old","reasoning_content":"existing"}}]}`)
|
||||||
|
got := normalizeGitHubCopilotReasoningField(data)
|
||||||
|
rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
|
||||||
|
if rc != "existing" {
|
||||||
|
t.Fatalf("reasoning_content = %q, want %q (should not overwrite)", rc, "existing")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeReasoningField_MultiChoice(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
data := []byte(`{"choices":[{"message":{"reasoning_text":"thought-0"}},{"message":{"reasoning_text":"thought-1"}}]}`)
|
||||||
|
got := normalizeGitHubCopilotReasoningField(data)
|
||||||
|
rc0 := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
|
||||||
|
rc1 := gjson.GetBytes(got, "choices.1.message.reasoning_content").String()
|
||||||
|
if rc0 != "thought-0" {
|
||||||
|
t.Fatalf("choices[0].reasoning_content = %q, want %q", rc0, "thought-0")
|
||||||
|
}
|
||||||
|
if rc1 != "thought-1" {
|
||||||
|
t.Fatalf("choices[1].reasoning_content = %q, want %q", rc1, "thought-1")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeReasoningField_NoChoices(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
data := []byte(`{"id":"chatcmpl-123"}`)
|
||||||
|
got := normalizeGitHubCopilotReasoningField(data)
|
||||||
|
if string(got) != string(data) {
|
||||||
|
t.Fatalf("expected no change, got %s", string(got))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestApplyHeaders_OpenAIIntentValue(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
e := &GitHubCopilotExecutor{}
|
||||||
|
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
|
||||||
|
e.applyHeaders(req, "token", nil)
|
||||||
|
if got := req.Header.Get("Openai-Intent"); got != "conversation-edits" {
|
||||||
|
t.Fatalf("Openai-Intent = %q, want conversation-edits", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Tests for CountTokens (local tiktoken estimation) ---
|
||||||
|
|
||||||
|
func TestCountTokens_ReturnsPositiveCount(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
e := &GitHubCopilotExecutor{}
|
||||||
|
body := []byte(`{"model":"gpt-4o","messages":[{"role":"user","content":"Hello, world!"}]}`)
|
||||||
|
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
|
||||||
|
Model: "gpt-4o",
|
||||||
|
Payload: body,
|
||||||
|
}, cliproxyexecutor.Options{
|
||||||
|
SourceFormat: sdktranslator.FromString("openai"),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CountTokens() error: %v", err)
|
||||||
|
}
|
||||||
|
if len(resp.Payload) == 0 {
|
||||||
|
t.Fatal("CountTokens() returned empty payload")
|
||||||
|
}
|
||||||
|
// The response should contain a positive token count.
|
||||||
|
tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
|
||||||
|
if tokens <= 0 {
|
||||||
|
t.Fatalf("expected positive token count, got %d", tokens)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCountTokens_ClaudeSourceFormatTranslates(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
e := &GitHubCopilotExecutor{}
|
||||||
|
body := []byte(`{"model":"claude-sonnet-4","messages":[{"role":"user","content":"Tell me a joke"}],"max_tokens":1024}`)
|
||||||
|
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
|
||||||
|
Model: "claude-sonnet-4",
|
||||||
|
Payload: body,
|
||||||
|
}, cliproxyexecutor.Options{
|
||||||
|
SourceFormat: sdktranslator.FromString("claude"),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CountTokens() error: %v", err)
|
||||||
|
}
|
||||||
|
// Claude source format → should get input_tokens in response
|
||||||
|
inputTokens := gjson.GetBytes(resp.Payload, "input_tokens").Int()
|
||||||
|
if inputTokens <= 0 {
|
||||||
|
// Fallback: check usage.prompt_tokens (depends on translator registration)
|
||||||
|
promptTokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
|
||||||
|
if promptTokens <= 0 {
|
||||||
|
t.Fatalf("expected positive token count, got payload: %s", resp.Payload)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCountTokens_EmptyPayload(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
e := &GitHubCopilotExecutor{}
|
||||||
|
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
|
||||||
|
Model: "gpt-4o",
|
||||||
|
Payload: []byte(`{"model":"gpt-4o","messages":[]}`),
|
||||||
|
}, cliproxyexecutor.Options{
|
||||||
|
SourceFormat: sdktranslator.FromString("openai"),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("CountTokens() error: %v", err)
|
||||||
|
}
|
||||||
|
tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
|
||||||
|
// Empty messages should return 0 tokens.
|
||||||
|
if tokens != 0 {
|
||||||
|
t.Fatalf("expected 0 tokens for empty messages, got %d", tokens)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStripUnsupportedBetas_RemovesContext1M(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
body := []byte(`{"model":"claude-opus-4.6","betas":["interleaved-thinking-2025-05-14","context-1m-2025-08-07","claude-code-20250219"],"messages":[]}`)
|
||||||
|
result := stripUnsupportedBetas(body)
|
||||||
|
|
||||||
|
betas := gjson.GetBytes(result, "betas")
|
||||||
|
if !betas.Exists() {
|
||||||
|
t.Fatal("betas field should still exist after stripping")
|
||||||
|
}
|
||||||
|
for _, item := range betas.Array() {
|
||||||
|
if item.String() == "context-1m-2025-08-07" {
|
||||||
|
t.Fatal("context-1m-2025-08-07 should have been stripped")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Other betas should be preserved
|
||||||
|
found := false
|
||||||
|
for _, item := range betas.Array() {
|
||||||
|
if item.String() == "interleaved-thinking-2025-05-14" {
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
t.Fatal("other betas should be preserved")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStripUnsupportedBetas_NoBetasField(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
body := []byte(`{"model":"gpt-4o","messages":[]}`)
|
||||||
|
result := stripUnsupportedBetas(body)
|
||||||
|
|
||||||
|
// Should be unchanged
|
||||||
|
if string(result) != string(body) {
|
||||||
|
t.Fatalf("body should be unchanged when no betas field exists, got %s", string(result))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStripUnsupportedBetas_MetadataBetas(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
body := []byte(`{"model":"claude-opus-4.6","metadata":{"betas":["context-1m-2025-08-07","other-beta"]},"messages":[]}`)
|
||||||
|
result := stripUnsupportedBetas(body)
|
||||||
|
|
||||||
|
betas := gjson.GetBytes(result, "metadata.betas")
|
||||||
|
if !betas.Exists() {
|
||||||
|
t.Fatal("metadata.betas field should still exist after stripping")
|
||||||
|
}
|
||||||
|
for _, item := range betas.Array() {
|
||||||
|
if item.String() == "context-1m-2025-08-07" {
|
||||||
|
t.Fatal("context-1m-2025-08-07 should have been stripped from metadata.betas")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if betas.Array()[0].String() != "other-beta" {
|
||||||
|
t.Fatal("other betas in metadata.betas should be preserved")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStripUnsupportedBetas_AllBetasStripped(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
body := []byte(`{"model":"claude-opus-4.6","betas":["context-1m-2025-08-07"],"messages":[]}`)
|
||||||
|
result := stripUnsupportedBetas(body)
|
||||||
|
|
||||||
|
betas := gjson.GetBytes(result, "betas")
|
||||||
|
if betas.Exists() {
|
||||||
|
t.Fatal("betas field should be deleted when all betas are stripped")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCopilotModelEntry_Limits(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
capabilities map[string]any
|
||||||
|
wantNil bool
|
||||||
|
wantPrompt int
|
||||||
|
wantOutput int
|
||||||
|
wantContext int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "nil capabilities",
|
||||||
|
capabilities: nil,
|
||||||
|
wantNil: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no limits key",
|
||||||
|
capabilities: map[string]any{"family": "claude-opus-4.6"},
|
||||||
|
wantNil: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "limits is not a map",
|
||||||
|
capabilities: map[string]any{"limits": "invalid"},
|
||||||
|
wantNil: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "all zero values",
|
||||||
|
capabilities: map[string]any{
|
||||||
|
"limits": map[string]any{
|
||||||
|
"max_context_window_tokens": float64(0),
|
||||||
|
"max_prompt_tokens": float64(0),
|
||||||
|
"max_output_tokens": float64(0),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantNil: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "individual account limits (128K prompt)",
|
||||||
|
capabilities: map[string]any{
|
||||||
|
"limits": map[string]any{
|
||||||
|
"max_context_window_tokens": float64(144000),
|
||||||
|
"max_prompt_tokens": float64(128000),
|
||||||
|
"max_output_tokens": float64(64000),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantNil: false,
|
||||||
|
wantPrompt: 128000,
|
||||||
|
wantOutput: 64000,
|
||||||
|
wantContext: 144000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "business account limits (168K prompt)",
|
||||||
|
capabilities: map[string]any{
|
||||||
|
"limits": map[string]any{
|
||||||
|
"max_context_window_tokens": float64(200000),
|
||||||
|
"max_prompt_tokens": float64(168000),
|
||||||
|
"max_output_tokens": float64(32000),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantNil: false,
|
||||||
|
wantPrompt: 168000,
|
||||||
|
wantOutput: 32000,
|
||||||
|
wantContext: 200000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "partial limits (only prompt)",
|
||||||
|
capabilities: map[string]any{
|
||||||
|
"limits": map[string]any{
|
||||||
|
"max_prompt_tokens": float64(128000),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wantNil: false,
|
||||||
|
wantPrompt: 128000,
|
||||||
|
wantOutput: 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
entry := copilotauth.CopilotModelEntry{
|
||||||
|
ID: "claude-opus-4.6",
|
||||||
|
Capabilities: tt.capabilities,
|
||||||
|
}
|
||||||
|
limits := entry.Limits()
|
||||||
|
if tt.wantNil {
|
||||||
|
if limits != nil {
|
||||||
|
t.Fatalf("expected nil limits, got %+v", limits)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if limits == nil {
|
||||||
|
t.Fatal("expected non-nil limits, got nil")
|
||||||
|
}
|
||||||
|
if limits.MaxPromptTokens != tt.wantPrompt {
|
||||||
|
t.Errorf("MaxPromptTokens = %d, want %d", limits.MaxPromptTokens, tt.wantPrompt)
|
||||||
|
}
|
||||||
|
if limits.MaxOutputTokens != tt.wantOutput {
|
||||||
|
t.Errorf("MaxOutputTokens = %d, want %d", limits.MaxOutputTokens, tt.wantOutput)
|
||||||
|
}
|
||||||
|
if tt.wantContext > 0 && limits.MaxContextWindowTokens != tt.wantContext {
|
||||||
|
t.Errorf("MaxContextWindowTokens = %d, want %d", limits.MaxContextWindowTokens, tt.wantContext)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -30,6 +30,8 @@ const (
|
|||||||
qwenRateLimitWindow = time.Minute // sliding window duration
|
qwenRateLimitWindow = time.Minute // sliding window duration
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`)
|
||||||
|
|
||||||
// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
|
// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
|
||||||
var qwenBeijingLoc = func() *time.Location {
|
var qwenBeijingLoc = func() *time.Location {
|
||||||
loc, err := time.LoadLocation("Asia/Shanghai")
|
loc, err := time.LoadLocation("Asia/Shanghai")
|
||||||
@@ -170,6 +172,105 @@ func timeUntilNextDay() time.Duration {
|
|||||||
return tomorrow.Sub(now)
|
return tomorrow.Sub(now)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ensureQwenSystemMessage ensures the request has a single system message at the beginning.
|
||||||
|
// It always injects the default system prompt and merges any user-provided system messages
|
||||||
|
// into the injected system message content to satisfy Qwen's strict message ordering rules.
|
||||||
|
func ensureQwenSystemMessage(payload []byte) ([]byte, error) {
|
||||||
|
isInjectedSystemPart := func(part gjson.Result) bool {
|
||||||
|
if !part.Exists() || !part.IsObject() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !strings.EqualFold(part.Get("type").String(), "text") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if !strings.EqualFold(part.Get("cache_control.type").String(), "ephemeral") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
text := part.Get("text").String()
|
||||||
|
return text == "" || text == "You are Qwen Code."
|
||||||
|
}
|
||||||
|
|
||||||
|
defaultParts := gjson.ParseBytes(qwenDefaultSystemMessage).Get("content")
|
||||||
|
var systemParts []any
|
||||||
|
if defaultParts.Exists() && defaultParts.IsArray() {
|
||||||
|
for _, part := range defaultParts.Array() {
|
||||||
|
systemParts = append(systemParts, part.Value())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(systemParts) == 0 {
|
||||||
|
systemParts = append(systemParts, map[string]any{
|
||||||
|
"type": "text",
|
||||||
|
"text": "You are Qwen Code.",
|
||||||
|
"cache_control": map[string]any{
|
||||||
|
"type": "ephemeral",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
appendSystemContent := func(content gjson.Result) {
|
||||||
|
makeTextPart := func(text string) map[string]any {
|
||||||
|
return map[string]any{
|
||||||
|
"type": "text",
|
||||||
|
"text": text,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !content.Exists() || content.Type == gjson.Null {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if content.IsArray() {
|
||||||
|
for _, part := range content.Array() {
|
||||||
|
if part.Type == gjson.String {
|
||||||
|
systemParts = append(systemParts, makeTextPart(part.String()))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if isInjectedSystemPart(part) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
systemParts = append(systemParts, part.Value())
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if content.Type == gjson.String {
|
||||||
|
systemParts = append(systemParts, makeTextPart(content.String()))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if content.IsObject() {
|
||||||
|
if isInjectedSystemPart(content) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
systemParts = append(systemParts, content.Value())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
systemParts = append(systemParts, makeTextPart(content.String()))
|
||||||
|
}
|
||||||
|
|
||||||
|
messages := gjson.GetBytes(payload, "messages")
|
||||||
|
var nonSystemMessages []any
|
||||||
|
if messages.Exists() && messages.IsArray() {
|
||||||
|
for _, msg := range messages.Array() {
|
||||||
|
if strings.EqualFold(msg.Get("role").String(), "system") {
|
||||||
|
appendSystemContent(msg.Get("content"))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
nonSystemMessages = append(nonSystemMessages, msg.Value())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
newMessages := make([]any, 0, 1+len(nonSystemMessages))
|
||||||
|
newMessages = append(newMessages, map[string]any{
|
||||||
|
"role": "system",
|
||||||
|
"content": systemParts,
|
||||||
|
})
|
||||||
|
newMessages = append(newMessages, nonSystemMessages...)
|
||||||
|
|
||||||
|
updated, errSet := sjson.SetBytes(payload, "messages", newMessages)
|
||||||
|
if errSet != nil {
|
||||||
|
return nil, fmt.Errorf("qwen executor: set system message failed: %w", errSet)
|
||||||
|
}
|
||||||
|
return updated, nil
|
||||||
|
}
|
||||||
|
|
||||||
// QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions.
|
// QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions.
|
||||||
// If access token is unavailable, it falls back to legacy via ClientAdapter.
|
// If access token is unavailable, it falls back to legacy via ClientAdapter.
|
||||||
type QwenExecutor struct {
|
type QwenExecutor struct {
|
||||||
@@ -251,6 +352,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
|||||||
|
|
||||||
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
|
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
|
||||||
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
|
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
|
||||||
|
body, err = ensureQwenSystemMessage(body)
|
||||||
|
if err != nil {
|
||||||
|
return resp, err
|
||||||
|
}
|
||||||
|
|
||||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||||
@@ -357,15 +462,19 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
toolsResult := gjson.GetBytes(body, "tools")
|
// toolsResult := gjson.GetBytes(body, "tools")
|
||||||
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
|
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
|
||||||
// This will have no real consequences. It's just to scare Qwen3.
|
// This will have no real consequences. It's just to scare Qwen3.
|
||||||
if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
|
// if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
|
||||||
body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
|
// body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
|
||||||
}
|
// }
|
||||||
body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
|
body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
|
||||||
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
|
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
|
||||||
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
|
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
|
||||||
|
body, err = ensureQwenSystemMessage(body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||||
|
"github.com/tidwall/gjson"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestQwenExecutorParseSuffix(t *testing.T) {
|
func TestQwenExecutorParseSuffix(t *testing.T) {
|
||||||
@@ -28,3 +29,123 @@ func TestQwenExecutorParseSuffix(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestEnsureQwenSystemMessage_MergeStringSystem(t *testing.T) {
|
||||||
|
payload := []byte(`{
|
||||||
|
"model": "qwen3.6-plus",
|
||||||
|
"stream": true,
|
||||||
|
"messages": [
|
||||||
|
{ "role": "system", "content": "ABCDEFG" },
|
||||||
|
{ "role": "user", "content": [ { "type": "text", "text": "你好" } ] }
|
||||||
|
]
|
||||||
|
}`)
|
||||||
|
|
||||||
|
out, err := ensureQwenSystemMessage(payload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ensureQwenSystemMessage() error = %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
msgs := gjson.GetBytes(out, "messages").Array()
|
||||||
|
if len(msgs) != 2 {
|
||||||
|
t.Fatalf("messages length = %d, want 2", len(msgs))
|
||||||
|
}
|
||||||
|
if msgs[0].Get("role").String() != "system" {
|
||||||
|
t.Fatalf("messages[0].role = %q, want %q", msgs[0].Get("role").String(), "system")
|
||||||
|
}
|
||||||
|
parts := msgs[0].Get("content").Array()
|
||||||
|
if len(parts) != 2 {
|
||||||
|
t.Fatalf("messages[0].content length = %d, want 2", len(parts))
|
||||||
|
}
|
||||||
|
if parts[0].Get("text").String() != "You are Qwen Code." || parts[0].Get("cache_control.type").String() != "ephemeral" {
|
||||||
|
t.Fatalf("messages[0].content[0] = %s, want injected system part", parts[0].Raw)
|
||||||
|
}
|
||||||
|
if parts[1].Get("type").String() != "text" || parts[1].Get("text").String() != "ABCDEFG" {
|
||||||
|
t.Fatalf("messages[0].content[1] = %s, want text part with ABCDEFG", parts[1].Raw)
|
||||||
|
}
|
||||||
|
if msgs[1].Get("role").String() != "user" {
|
||||||
|
t.Fatalf("messages[1].role = %q, want %q", msgs[1].Get("role").String(), "user")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEnsureQwenSystemMessage_MergeObjectSystem(t *testing.T) {
|
||||||
|
payload := []byte(`{
|
||||||
|
"messages": [
|
||||||
|
{ "role": "system", "content": { "type": "text", "text": "ABCDEFG" } },
|
||||||
|
{ "role": "user", "content": [ { "type": "text", "text": "你好" } ] }
|
||||||
|
]
|
||||||
|
}`)
|
||||||
|
|
||||||
|
out, err := ensureQwenSystemMessage(payload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ensureQwenSystemMessage() error = %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
msgs := gjson.GetBytes(out, "messages").Array()
|
||||||
|
if len(msgs) != 2 {
|
||||||
|
t.Fatalf("messages length = %d, want 2", len(msgs))
|
||||||
|
}
|
||||||
|
parts := msgs[0].Get("content").Array()
|
||||||
|
if len(parts) != 2 {
|
||||||
|
t.Fatalf("messages[0].content length = %d, want 2", len(parts))
|
||||||
|
}
|
||||||
|
if parts[1].Get("text").String() != "ABCDEFG" {
|
||||||
|
t.Fatalf("messages[0].content[1].text = %q, want %q", parts[1].Get("text").String(), "ABCDEFG")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEnsureQwenSystemMessage_PrependsWhenMissing(t *testing.T) {
|
||||||
|
payload := []byte(`{
|
||||||
|
"messages": [
|
||||||
|
{ "role": "user", "content": [ { "type": "text", "text": "你好" } ] }
|
||||||
|
]
|
||||||
|
}`)
|
||||||
|
|
||||||
|
out, err := ensureQwenSystemMessage(payload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ensureQwenSystemMessage() error = %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
msgs := gjson.GetBytes(out, "messages").Array()
|
||||||
|
if len(msgs) != 2 {
|
||||||
|
t.Fatalf("messages length = %d, want 2", len(msgs))
|
||||||
|
}
|
||||||
|
if msgs[0].Get("role").String() != "system" {
|
||||||
|
t.Fatalf("messages[0].role = %q, want %q", msgs[0].Get("role").String(), "system")
|
||||||
|
}
|
||||||
|
if !msgs[0].Get("content").IsArray() || len(msgs[0].Get("content").Array()) == 0 {
|
||||||
|
t.Fatalf("messages[0].content = %s, want non-empty array", msgs[0].Get("content").Raw)
|
||||||
|
}
|
||||||
|
if msgs[1].Get("role").String() != "user" {
|
||||||
|
t.Fatalf("messages[1].role = %q, want %q", msgs[1].Get("role").String(), "user")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEnsureQwenSystemMessage_MergesMultipleSystemMessages(t *testing.T) {
|
||||||
|
payload := []byte(`{
|
||||||
|
"messages": [
|
||||||
|
{ "role": "system", "content": "A" },
|
||||||
|
{ "role": "user", "content": [ { "type": "text", "text": "hi" } ] },
|
||||||
|
{ "role": "system", "content": "B" }
|
||||||
|
]
|
||||||
|
}`)
|
||||||
|
|
||||||
|
out, err := ensureQwenSystemMessage(payload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ensureQwenSystemMessage() error = %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
msgs := gjson.GetBytes(out, "messages").Array()
|
||||||
|
if len(msgs) != 2 {
|
||||||
|
t.Fatalf("messages length = %d, want 2", len(msgs))
|
||||||
|
}
|
||||||
|
parts := msgs[0].Get("content").Array()
|
||||||
|
if len(parts) != 3 {
|
||||||
|
t.Fatalf("messages[0].content length = %d, want 3", len(parts))
|
||||||
|
}
|
||||||
|
if parts[1].Get("text").String() != "A" {
|
||||||
|
t.Fatalf("messages[0].content[1].text = %q, want %q", parts[1].Get("text").String(), "A")
|
||||||
|
}
|
||||||
|
if parts[2].Get("text").String() != "B" {
|
||||||
|
t.Fatalf("messages[0].content[2].text = %q, want %q", parts[2].Get("text").String(), "B")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
package claude
|
package claude
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||||
@@ -31,8 +31,6 @@ const geminiClaudeThoughtSignature = "skip_thought_signature_validator"
|
|||||||
// - []byte: The transformed request in Gemini CLI format.
|
// - []byte: The transformed request in Gemini CLI format.
|
||||||
func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
|
func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
|
||||||
rawJSON := inputRawJSON
|
rawJSON := inputRawJSON
|
||||||
rawJSON = bytes.Replace(rawJSON, []byte(`"url":{"type":"string","format":"uri",`), []byte(`"url":{"type":"string",`), -1)
|
|
||||||
|
|
||||||
// Build output Gemini CLI request JSON
|
// Build output Gemini CLI request JSON
|
||||||
out := []byte(`{"contents":[]}`)
|
out := []byte(`{"contents":[]}`)
|
||||||
out, _ = sjson.SetBytes(out, "model", modelName)
|
out, _ = sjson.SetBytes(out, "model", modelName)
|
||||||
@@ -146,13 +144,37 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// strip trailing model turn with unanswered function calls —
|
||||||
|
// Gemini returns empty responses when the last turn is a model
|
||||||
|
// functionCall with no corresponding user functionResponse.
|
||||||
|
contents := gjson.GetBytes(out, "contents")
|
||||||
|
if contents.Exists() && contents.IsArray() {
|
||||||
|
arr := contents.Array()
|
||||||
|
if len(arr) > 0 {
|
||||||
|
last := arr[len(arr)-1]
|
||||||
|
if last.Get("role").String() == "model" {
|
||||||
|
hasFC := false
|
||||||
|
last.Get("parts").ForEach(func(_, part gjson.Result) bool {
|
||||||
|
if part.Get("functionCall").Exists() {
|
||||||
|
hasFC = true
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
if hasFC {
|
||||||
|
out, _ = sjson.DeleteBytes(out, fmt.Sprintf("contents.%d", len(arr)-1))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// tools
|
// tools
|
||||||
if toolsResult := gjson.GetBytes(rawJSON, "tools"); toolsResult.IsArray() {
|
if toolsResult := gjson.GetBytes(rawJSON, "tools"); toolsResult.IsArray() {
|
||||||
hasTools := false
|
hasTools := false
|
||||||
toolsResult.ForEach(func(_, toolResult gjson.Result) bool {
|
toolsResult.ForEach(func(_, toolResult gjson.Result) bool {
|
||||||
inputSchemaResult := toolResult.Get("input_schema")
|
inputSchemaResult := toolResult.Get("input_schema")
|
||||||
if inputSchemaResult.Exists() && inputSchemaResult.IsObject() {
|
if inputSchemaResult.Exists() && inputSchemaResult.IsObject() {
|
||||||
inputSchema := inputSchemaResult.Raw
|
inputSchema := util.CleanJSONSchemaForGemini(inputSchemaResult.Raw)
|
||||||
tool := []byte(toolResult.Raw)
|
tool := []byte(toolResult.Raw)
|
||||||
var err error
|
var err error
|
||||||
tool, err = sjson.DeleteBytes(tool, "input_schema")
|
tool, err = sjson.DeleteBytes(tool, "input_schema")
|
||||||
@@ -168,6 +190,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
|||||||
tool, _ = sjson.DeleteBytes(tool, "type")
|
tool, _ = sjson.DeleteBytes(tool, "type")
|
||||||
tool, _ = sjson.DeleteBytes(tool, "cache_control")
|
tool, _ = sjson.DeleteBytes(tool, "cache_control")
|
||||||
tool, _ = sjson.DeleteBytes(tool, "defer_loading")
|
tool, _ = sjson.DeleteBytes(tool, "defer_loading")
|
||||||
|
tool, _ = sjson.DeleteBytes(tool, "eager_input_streaming")
|
||||||
tool, _ = sjson.SetBytes(tool, "name", util.SanitizeFunctionName(gjson.GetBytes(tool, "name").String()))
|
tool, _ = sjson.SetBytes(tool, "name", util.SanitizeFunctionName(gjson.GetBytes(tool, "name").String()))
|
||||||
if gjson.ValidBytes(tool) && gjson.ParseBytes(tool).IsObject() {
|
if gjson.ValidBytes(tool) && gjson.ParseBytes(tool).IsObject() {
|
||||||
if !hasTools {
|
if !hasTools {
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -49,7 +50,23 @@ func (h *GeminiCLIAPIHandler) Models() []map[string]any {
|
|||||||
// CLIHandler handles CLI-specific requests for Gemini API operations.
|
// CLIHandler handles CLI-specific requests for Gemini API operations.
|
||||||
// It restricts access to localhost only and routes requests to appropriate internal handlers.
|
// It restricts access to localhost only and routes requests to appropriate internal handlers.
|
||||||
func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
|
func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
|
||||||
if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") {
|
if h.Cfg == nil || !h.Cfg.EnableGeminiCLIEndpoint {
|
||||||
|
c.JSON(http.StatusForbidden, handlers.ErrorResponse{
|
||||||
|
Error: handlers.ErrorDetail{
|
||||||
|
Message: "Gemini CLI endpoint is disabled",
|
||||||
|
Type: "forbidden",
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
requestHost := c.Request.Host
|
||||||
|
requestHostname := requestHost
|
||||||
|
if hostname, _, errSplitHostPort := net.SplitHostPort(requestHost); errSplitHostPort == nil {
|
||||||
|
requestHostname = hostname
|
||||||
|
}
|
||||||
|
|
||||||
|
if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") || requestHostname != "127.0.0.1" {
|
||||||
c.JSON(http.StatusForbidden, handlers.ErrorResponse{
|
c.JSON(http.StatusForbidden, handlers.ErrorResponse{
|
||||||
Error: handlers.ErrorDetail{
|
Error: handlers.ErrorDetail{
|
||||||
Message: "CLI reply only allow local access",
|
Message: "CLI reply only allow local access",
|
||||||
|
|||||||
@@ -379,7 +379,7 @@ func shouldReplaceWebsocketTranscript(rawJSON []byte, nextInput gjson.Result) bo
|
|||||||
|
|
||||||
for _, item := range nextInput.Array() {
|
for _, item := range nextInput.Array() {
|
||||||
switch strings.TrimSpace(item.Get("type").String()) {
|
switch strings.TrimSpace(item.Get("type").String()) {
|
||||||
case "function_call":
|
case "function_call", "custom_tool_call":
|
||||||
return true
|
return true
|
||||||
case "message":
|
case "message":
|
||||||
role := strings.TrimSpace(item.Get("role").String())
|
role := strings.TrimSpace(item.Get("role").String())
|
||||||
@@ -431,7 +431,7 @@ func dedupeFunctionCallsByCallID(rawArray string) (string, error) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
|
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
|
||||||
if itemType == "function_call" {
|
if isResponsesToolCallType(itemType) {
|
||||||
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
||||||
if callID != "" {
|
if callID != "" {
|
||||||
if _, ok := seenCallIDs[callID]; ok {
|
if _, ok := seenCallIDs[callID]; ok {
|
||||||
|
|||||||
@@ -520,6 +520,92 @@ func TestRepairResponsesWebsocketToolCallsDropsOrphanOutputWhenCallMissing(t *te
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRepairResponsesWebsocketToolCallsInsertsCachedCustomToolOutput(t *testing.T) {
|
||||||
|
cache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
sessionKey := "session-1"
|
||||||
|
|
||||||
|
cacheWarm := []byte(`{"previous_response_id":"resp-1","input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"}]}`)
|
||||||
|
warmed := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, cacheWarm)
|
||||||
|
if gjson.GetBytes(warmed, "input.0.call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("expected warmup output to remain")
|
||||||
|
}
|
||||||
|
|
||||||
|
raw := []byte(`{"input":[{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"},{"type":"message","id":"msg-1"}]}`)
|
||||||
|
repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw)
|
||||||
|
|
||||||
|
input := gjson.GetBytes(repaired, "input").Array()
|
||||||
|
if len(input) != 3 {
|
||||||
|
t.Fatalf("repaired input len = %d, want 3", len(input))
|
||||||
|
}
|
||||||
|
if input[0].Get("type").String() != "custom_tool_call" || input[0].Get("call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("unexpected first item: %s", input[0].Raw)
|
||||||
|
}
|
||||||
|
if input[1].Get("type").String() != "custom_tool_call_output" || input[1].Get("call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("missing inserted output: %s", input[1].Raw)
|
||||||
|
}
|
||||||
|
if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" {
|
||||||
|
t.Fatalf("unexpected trailing item: %s", input[2].Raw)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRepairResponsesWebsocketToolCallsDropsOrphanCustomToolCall(t *testing.T) {
|
||||||
|
cache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
sessionKey := "session-1"
|
||||||
|
|
||||||
|
raw := []byte(`{"input":[{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"},{"type":"message","id":"msg-1"}]}`)
|
||||||
|
repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw)
|
||||||
|
|
||||||
|
input := gjson.GetBytes(repaired, "input").Array()
|
||||||
|
if len(input) != 1 {
|
||||||
|
t.Fatalf("repaired input len = %d, want 1", len(input))
|
||||||
|
}
|
||||||
|
if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" {
|
||||||
|
t.Fatalf("unexpected remaining item: %s", input[0].Raw)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRepairResponsesWebsocketToolCallsInsertsCachedCustomToolCallForOrphanOutput(t *testing.T) {
|
||||||
|
outputCache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
callCache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
sessionKey := "session-1"
|
||||||
|
|
||||||
|
callCache.record(sessionKey, "call-1", []byte(`{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"}`))
|
||||||
|
|
||||||
|
raw := []byte(`{"input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`)
|
||||||
|
repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw)
|
||||||
|
|
||||||
|
input := gjson.GetBytes(repaired, "input").Array()
|
||||||
|
if len(input) != 3 {
|
||||||
|
t.Fatalf("repaired input len = %d, want 3", len(input))
|
||||||
|
}
|
||||||
|
if input[0].Get("type").String() != "custom_tool_call" || input[0].Get("call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("missing inserted call: %s", input[0].Raw)
|
||||||
|
}
|
||||||
|
if input[1].Get("type").String() != "custom_tool_call_output" || input[1].Get("call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("unexpected output item: %s", input[1].Raw)
|
||||||
|
}
|
||||||
|
if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" {
|
||||||
|
t.Fatalf("unexpected trailing item: %s", input[2].Raw)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRepairResponsesWebsocketToolCallsDropsOrphanCustomToolOutputWhenCallMissing(t *testing.T) {
|
||||||
|
outputCache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
callCache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
sessionKey := "session-1"
|
||||||
|
|
||||||
|
raw := []byte(`{"input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`)
|
||||||
|
repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw)
|
||||||
|
|
||||||
|
input := gjson.GetBytes(repaired, "input").Array()
|
||||||
|
if len(input) != 1 {
|
||||||
|
t.Fatalf("repaired input len = %d, want 1", len(input))
|
||||||
|
}
|
||||||
|
if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" {
|
||||||
|
t.Fatalf("unexpected remaining item: %s", input[0].Raw)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
|
func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
|
||||||
cache := newWebsocketToolOutputCache(time.Minute, 10)
|
cache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
sessionKey := "session-1"
|
sessionKey := "session-1"
|
||||||
@@ -536,6 +622,38 @@ func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRecordResponsesWebsocketCustomToolCallsFromCompletedPayloadWithCache(t *testing.T) {
|
||||||
|
cache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
sessionKey := "session-1"
|
||||||
|
|
||||||
|
payload := []byte(`{"type":"response.completed","response":{"id":"resp-1","output":[{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch","input":"*** Begin Patch"}]}}`)
|
||||||
|
recordResponsesWebsocketToolCallsFromPayloadWithCache(cache, sessionKey, payload)
|
||||||
|
|
||||||
|
cached, ok := cache.get(sessionKey, "call-1")
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("expected cached custom tool call")
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(cached, "type").String() != "custom_tool_call" || gjson.GetBytes(cached, "call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("unexpected cached custom tool call: %s", cached)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordResponsesWebsocketCustomToolCallsFromOutputItemDoneWithCache(t *testing.T) {
|
||||||
|
cache := newWebsocketToolOutputCache(time.Minute, 10)
|
||||||
|
sessionKey := "session-1"
|
||||||
|
|
||||||
|
payload := []byte(`{"type":"response.output_item.done","item":{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch","input":"*** Begin Patch"}}`)
|
||||||
|
recordResponsesWebsocketToolCallsFromPayloadWithCache(cache, sessionKey, payload)
|
||||||
|
|
||||||
|
cached, ok := cache.get(sessionKey, "call-1")
|
||||||
|
if !ok {
|
||||||
|
t.Fatalf("expected cached custom tool call")
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(cached, "type").String() != "custom_tool_call" || gjson.GetBytes(cached, "call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("unexpected cached custom tool call: %s", cached)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) {
|
func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) {
|
||||||
gin.SetMode(gin.TestMode)
|
gin.SetMode(gin.TestMode)
|
||||||
|
|
||||||
@@ -1023,6 +1141,161 @@ func TestNormalizeResponsesWebsocketRequestDropsDuplicateFunctionCallsByCallID(t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNormalizeResponsesWebsocketRequestTreatsCustomToolTranscriptReplacementAsReset(t *testing.T) {
|
||||||
|
lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"message","id":"msg-1"},{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-1","call_id":"call-1"},{"type":"message","id":"assistant-1","role":"assistant"}]}`)
|
||||||
|
lastResponseOutput := []byte(`[
|
||||||
|
{"type":"message","id":"assistant-1","role":"assistant"}
|
||||||
|
]`)
|
||||||
|
raw := []byte(`{"type":"response.create","input":[{"type":"custom_tool_call","id":"ctc-compact","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-compact","call_id":"call-1"},{"type":"message","id":"msg-2"}]}`)
|
||||||
|
|
||||||
|
normalized, next, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput)
|
||||||
|
if errMsg != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", errMsg.Error)
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(normalized, "previous_response_id").Exists() {
|
||||||
|
t.Fatalf("previous_response_id must not exist in transcript replacement mode")
|
||||||
|
}
|
||||||
|
items := gjson.GetBytes(normalized, "input").Array()
|
||||||
|
if len(items) != 3 {
|
||||||
|
t.Fatalf("replacement input len = %d, want 3: %s", len(items), normalized)
|
||||||
|
}
|
||||||
|
if items[0].Get("id").String() != "ctc-compact" ||
|
||||||
|
items[1].Get("id").String() != "tool-out-compact" ||
|
||||||
|
items[2].Get("id").String() != "msg-2" {
|
||||||
|
t.Fatalf("replacement transcript was not preserved as-is: %s", normalized)
|
||||||
|
}
|
||||||
|
if !bytes.Equal(next, normalized) {
|
||||||
|
t.Fatalf("next request snapshot should match replacement request")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeResponsesWebsocketRequestDropsDuplicateCustomToolCallsByCallID(t *testing.T) {
|
||||||
|
lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-1","call_id":"call-1"}]}`)
|
||||||
|
lastResponseOutput := []byte(`[
|
||||||
|
{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"}
|
||||||
|
]`)
|
||||||
|
raw := []byte(`{"type":"response.create","input":[{"type":"message","id":"msg-2"}]}`)
|
||||||
|
|
||||||
|
normalized, _, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput)
|
||||||
|
if errMsg != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", errMsg.Error)
|
||||||
|
}
|
||||||
|
|
||||||
|
items := gjson.GetBytes(normalized, "input").Array()
|
||||||
|
if len(items) != 3 {
|
||||||
|
t.Fatalf("merged input len = %d, want 3: %s", len(items), normalized)
|
||||||
|
}
|
||||||
|
if items[0].Get("id").String() != "ctc-1" ||
|
||||||
|
items[1].Get("id").String() != "tool-out-1" ||
|
||||||
|
items[2].Get("id").String() != "msg-2" {
|
||||||
|
t.Fatalf("unexpected merged input order: %s", normalized)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResponsesWebsocketCompactionResetsTurnStateOnCustomToolTranscriptReplacement(t *testing.T) {
|
||||||
|
gin.SetMode(gin.TestMode)
|
||||||
|
|
||||||
|
executor := &websocketCompactionCaptureExecutor{}
|
||||||
|
manager := coreauth.NewManager(nil, nil, nil)
|
||||||
|
manager.RegisterExecutor(executor)
|
||||||
|
auth := &coreauth.Auth{ID: "auth-sse", Provider: executor.Identifier(), Status: coreauth.StatusActive}
|
||||||
|
if _, err := manager.Register(context.Background(), auth); err != nil {
|
||||||
|
t.Fatalf("Register auth: %v", err)
|
||||||
|
}
|
||||||
|
registry.GetGlobalRegistry().RegisterClient(auth.ID, auth.Provider, []*registry.ModelInfo{{ID: "test-model"}})
|
||||||
|
t.Cleanup(func() {
|
||||||
|
registry.GetGlobalRegistry().UnregisterClient(auth.ID)
|
||||||
|
})
|
||||||
|
|
||||||
|
base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager)
|
||||||
|
h := NewOpenAIResponsesAPIHandler(base)
|
||||||
|
router := gin.New()
|
||||||
|
router.GET("/v1/responses/ws", h.ResponsesWebsocket)
|
||||||
|
router.POST("/v1/responses/compact", h.Compact)
|
||||||
|
|
||||||
|
server := httptest.NewServer(router)
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
wsURL := "ws" + strings.TrimPrefix(server.URL, "http") + "/v1/responses/ws"
|
||||||
|
conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("dial websocket: %v", err)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if errClose := conn.Close(); errClose != nil {
|
||||||
|
t.Fatalf("close websocket: %v", errClose)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
requests := []string{
|
||||||
|
`{"type":"response.create","model":"test-model","input":[{"type":"message","id":"msg-1"}]}`,
|
||||||
|
`{"type":"response.create","input":[{"type":"custom_tool_call_output","call_id":"call-1","id":"tool-out-1"}]}`,
|
||||||
|
}
|
||||||
|
for i := range requests {
|
||||||
|
if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(requests[i])); errWrite != nil {
|
||||||
|
t.Fatalf("write websocket message %d: %v", i+1, errWrite)
|
||||||
|
}
|
||||||
|
_, payload, errReadMessage := conn.ReadMessage()
|
||||||
|
if errReadMessage != nil {
|
||||||
|
t.Fatalf("read websocket message %d: %v", i+1, errReadMessage)
|
||||||
|
}
|
||||||
|
if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted {
|
||||||
|
t.Fatalf("message %d payload type = %s, want %s", i+1, got, wsEventTypeCompleted)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
compactResp, errPost := server.Client().Post(
|
||||||
|
server.URL+"/v1/responses/compact",
|
||||||
|
"application/json",
|
||||||
|
strings.NewReader(`{"model":"test-model","input":[{"type":"message","id":"summary-1"}]}`),
|
||||||
|
)
|
||||||
|
if errPost != nil {
|
||||||
|
t.Fatalf("compact request failed: %v", errPost)
|
||||||
|
}
|
||||||
|
if errClose := compactResp.Body.Close(); errClose != nil {
|
||||||
|
t.Fatalf("close compact response body: %v", errClose)
|
||||||
|
}
|
||||||
|
if compactResp.StatusCode != http.StatusOK {
|
||||||
|
t.Fatalf("compact status = %d, want %d", compactResp.StatusCode, http.StatusOK)
|
||||||
|
}
|
||||||
|
|
||||||
|
postCompact := `{"type":"response.create","input":[{"type":"custom_tool_call","id":"ctc-compact","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-compact","call_id":"call-1"},{"type":"message","id":"msg-2"}]}`
|
||||||
|
if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(postCompact)); errWrite != nil {
|
||||||
|
t.Fatalf("write post-compact websocket message: %v", errWrite)
|
||||||
|
}
|
||||||
|
_, payload, errReadMessage := conn.ReadMessage()
|
||||||
|
if errReadMessage != nil {
|
||||||
|
t.Fatalf("read post-compact websocket message: %v", errReadMessage)
|
||||||
|
}
|
||||||
|
if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted {
|
||||||
|
t.Fatalf("post-compact payload type = %s, want %s", got, wsEventTypeCompleted)
|
||||||
|
}
|
||||||
|
|
||||||
|
executor.mu.Lock()
|
||||||
|
defer executor.mu.Unlock()
|
||||||
|
|
||||||
|
if executor.compactPayload == nil {
|
||||||
|
t.Fatalf("compact payload was not captured")
|
||||||
|
}
|
||||||
|
if len(executor.streamPayloads) != 3 {
|
||||||
|
t.Fatalf("stream payload count = %d, want 3", len(executor.streamPayloads))
|
||||||
|
}
|
||||||
|
|
||||||
|
merged := executor.streamPayloads[2]
|
||||||
|
items := gjson.GetBytes(merged, "input").Array()
|
||||||
|
if len(items) != 3 {
|
||||||
|
t.Fatalf("merged input len = %d, want 3: %s", len(items), merged)
|
||||||
|
}
|
||||||
|
if items[0].Get("id").String() != "ctc-compact" ||
|
||||||
|
items[1].Get("id").String() != "tool-out-compact" ||
|
||||||
|
items[2].Get("id").String() != "msg-2" {
|
||||||
|
t.Fatalf("unexpected post-compact input order: %s", merged)
|
||||||
|
}
|
||||||
|
if items[0].Get("call_id").String() != "call-1" {
|
||||||
|
t.Fatalf("post-compact custom tool call id = %s, want call-1", items[0].Get("call_id").String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestResponsesWebsocketCompactionResetsTurnStateOnTranscriptReplacement(t *testing.T) {
|
func TestResponsesWebsocketCompactionResetsTurnStateOnTranscriptReplacement(t *testing.T) {
|
||||||
gin.SetMode(gin.TestMode)
|
gin.SetMode(gin.TestMode)
|
||||||
|
|
||||||
|
|||||||
@@ -266,15 +266,15 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
|
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
|
||||||
switch itemType {
|
switch {
|
||||||
case "function_call_output":
|
case isResponsesToolCallOutputType(itemType):
|
||||||
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
||||||
if callID == "" {
|
if callID == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
outputPresent[callID] = struct{}{}
|
outputPresent[callID] = struct{}{}
|
||||||
outputCache.record(sessionKey, callID, item)
|
outputCache.record(sessionKey, callID, item)
|
||||||
case "function_call":
|
case isResponsesToolCallType(itemType):
|
||||||
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
||||||
if callID == "" {
|
if callID == "" {
|
||||||
continue
|
continue
|
||||||
@@ -293,7 +293,7 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
|
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
|
||||||
if itemType == "function_call_output" {
|
if isResponsesToolCallOutputType(itemType) {
|
||||||
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
|
||||||
if callID == "" {
|
if callID == "" {
|
||||||
// Upstream rejects tool outputs without a call_id; drop it.
|
// Upstream rejects tool outputs without a call_id; drop it.
|
||||||
@@ -325,7 +325,7 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
|
|||||||
// Drop orphaned function_call_output items; upstream rejects transcripts with missing calls.
|
// Drop orphaned function_call_output items; upstream rejects transcripts with missing calls.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if itemType != "function_call" {
|
if !isResponsesToolCallType(itemType) {
|
||||||
filtered = append(filtered, item)
|
filtered = append(filtered, item)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -376,7 +376,7 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
for _, item := range output.Array() {
|
for _, item := range output.Array() {
|
||||||
if strings.TrimSpace(item.Get("type").String()) != "function_call" {
|
if !isResponsesToolCallType(item.Get("type").String()) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
callID := strings.TrimSpace(item.Get("call_id").String())
|
callID := strings.TrimSpace(item.Get("call_id").String())
|
||||||
@@ -390,7 +390,7 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
|
|||||||
if !item.Exists() || !item.IsObject() {
|
if !item.Exists() || !item.IsObject() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if strings.TrimSpace(item.Get("type").String()) != "function_call" {
|
if !isResponsesToolCallType(item.Get("type").String()) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
callID := strings.TrimSpace(item.Get("call_id").String())
|
callID := strings.TrimSpace(item.Get("call_id").String())
|
||||||
@@ -400,3 +400,21 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
|
|||||||
cache.record(sessionKey, callID, json.RawMessage(item.Raw))
|
cache.record(sessionKey, callID, json.RawMessage(item.Raw))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isResponsesToolCallType(itemType string) bool {
|
||||||
|
switch strings.TrimSpace(itemType) {
|
||||||
|
case "function_call", "custom_tool_call":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func isResponsesToolCallOutputType(itemType string) bool {
|
||||||
|
switch strings.TrimSpace(itemType) {
|
||||||
|
case "function_call_output", "custom_tool_call_output":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user