Compare commits

..

45 Commits

Author SHA1 Message Date
Luis Pater
98509f615c Merge pull request #485 from kunish/fix/copilot-premium-request-inflation
fix(copilot): reduce premium request inflation, enable thinking, and use dynamic API limits
2026-04-04 02:19:56 +08:00
Luis Pater
e7a66ae504 Merge branch 'router-for-me:main' into main 2026-04-04 02:18:06 +08:00
Luis Pater
754b126944 fix(executor): remove commented-out code in QwenExecutor 2026-04-04 02:14:48 +08:00
Luis Pater
ae37ccffbf Merge pull request #2520 from Arronlong/main
fix:qwen invalid_parameter_error
2026-04-04 02:13:09 +08:00
Luis Pater
42c062bb5b Merge pull request #2509 from adamhelfgott/fix-claude-thinking-temperature
Normalize Claude temperature when thinking is enabled
2026-04-03 23:55:50 +08:00
kunish
87bf0b73d5 fix(copilot): use dynamic API limits to prevent prompt token overflow
The Copilot API enforces per-account prompt token limits (128K individual,
168K business) that differ from the static 200K context length advertised
by the proxy. This mismatch caused Claude Code to accumulate context
beyond the actual limit, triggering "prompt token count exceeds the limit
of 128000" errors.

Changes:
- Extract max_prompt_tokens and max_output_tokens from the Copilot
  /models API response (capabilities.limits) and use them as the
  authoritative ContextLength and MaxCompletionTokens values
- Add CopilotModelLimits struct and Limits() helper to parse limits
  from the existing Capabilities map
- Fix GitLab Duo context-1m beta header not being set when routing
  through the Anthropic gateway (gitlab_duo_force_context_1m attr
  was set but only gin headers were checked)
- Fix flaky parallel tests that shared global model registry state
2026-04-03 23:54:17 +08:00
Luis Pater
f389667ec3 Merge pull request #2513 from lonr-6/codex/fix-ws-custom-tool-repair-v2
fix: repair responses websocket custom tool call pairing
2026-04-03 23:45:38 +08:00
Arronlong
29dba0399b Comment out system message check in Qwen executor
fix qwen invalid_parameter_error
2026-04-03 23:07:33 +08:00
Luis Pater
a824e7cd0b feat(models): add GPT-5.3, GPT-5.4, and GPT-5.4-mini with enhanced "thinking" levels 2026-04-03 23:05:10 +08:00
Luis Pater
140faef7dc Merge branch 'router-for-me:main' into main 2026-04-03 21:48:23 +08:00
Luis Pater
adb580b344 feat(security): add configuration to toggle Gemini CLI endpoint access
Closes: #2445
2026-04-03 21:46:49 +08:00
Luis Pater
06405f2129 fix(security): enforce stricter localhost validation for GeminiCLIAPIHandler
Closes: #2445
2026-04-03 21:22:03 +08:00
kunish
b849bf79d6 fix(copilot): address code review — SSE reasoning, multi-choice, agent detection
- Strip SSE `data:` prefix before normalizing reasoning_text→reasoning_content
  in streaming mode; re-wrap afterward for the translator
- Iterate all choices in normalizeGitHubCopilotReasoningField (not just
  choices[0]) to support n>1 requests
- Remove over-broad tool-role fallback in isAgentInitiated that scanned
  all messages for role:"tool", aligning with opencode's approach of only
  detecting active tool loops — genuine user follow-ups after tool use are
  no longer mis-classified as agent-initiated
- Add 5 reasoning normalization tests; update 2 X-Initiator tests to match
  refined semantics
2026-04-03 20:51:19 +08:00
kunish
59af2c57b1 fix(copilot): reduce premium request inflation and enable thinking
This commit addresses three issues with Claude Code through GitHub
Copilot:

1. **Premium request inflation**: Responses API requests were missing
   Openai-Intent headers and proper defaults, causing Copilot to bill
   each tool-loop continuation as a new premium request. Fixed by adding
   isAgentInitiated() heuristic (checks for tool_result content or
   preceding assistant tool_use), applying Responses API defaults
   (store, include, reasoning.summary), and local tiktoken-based token
   counting to avoid extra API calls.

2. **Context overflow**: Claude Code's modelSupports1M() hardcodes
   opus-4-6 as 1M-capable, but Copilot only supports ~128K-200K.
   Fixed by stripping the context-1m-2025-08-07 beta from translated
   request bodies. Also forwards response headers in non-streaming
   Execute() and registers the GET /copilot-quota management API route.

3. **Thinking not working**: Add ThinkingSupport with level-based
   reasoning to Claude models in the static definitions. Normalize
   Copilot's non-standard 'reasoning_text' response field to
   'reasoning_content' before passing to the SDK translator. Use
   caller-provided context in CountTokens instead of Background().
2026-04-03 20:24:30 +08:00
Kai Wang
d1fd2c4ad4 fix: repair websocket custom tool calls 2026-04-03 17:11:44 +08:00
Kai Wang
b6c6379bfa fix: repair websocket custom tool calls 2026-04-03 17:11:42 +08:00
Kai Wang
8f0e66b72e fix: repair websocket custom tool calls 2026-04-03 17:11:41 +08:00
Adam Helfgott
f63cf6ff7a Normalize Claude temperature for thinking 2026-04-03 03:45:51 -04:00
Luis Pater
d2419ed49d feat(executor): ensure default system message in QwenExecutor payload 2026-04-03 11:18:48 +08:00
Luis Pater
516d22c695 Merge pull request #484 from Ve-ria/main
更新CodeBuddy CN的模型列表
2026-04-03 11:10:32 +08:00
rensumo
73cda6e836 Update CodeBuddy DeepSeek model description 2026-04-03 11:03:33 +08:00
rensumo
0805989ee5 更新CodeBuddy CN的模型列表 2026-04-03 10:59:27 +08:00
Luis Pater
75da02af55 Merge branch 'router-for-me:main' into main 2026-04-02 22:34:47 +08:00
Luis Pater
ab9ebea592 Merge PR #2474
# Conflicts:
#	internal/api/modules/amp/response_rewriter.go
#	internal/api/modules/amp/response_rewriter_test.go
2026-04-02 22:31:12 +08:00
Luis Pater
7ee37ee4b9 feat: add /healthz endpoint and test coverage for health check
Closes: #2493
2026-04-02 21:56:27 +08:00
Luis Pater
837afffb31 docs: remove README_JA.md and clean up related links from README files 2026-04-02 21:37:47 +08:00
Luis Pater
03a1bac898 Merge upstream v6.9.9 (PR #483) 2026-04-02 21:31:21 +08:00
Luis Pater
3171d524f0 docs: fix duplicated ProxyPal entry in README files 2026-04-02 21:22:40 +08:00
Luis Pater
3e78a8d500 Merge branch 'main' into dev 2026-04-02 21:21:26 +08:00
Luis Pater
fcba912cc4 Merge pull request #2492 from davidwushi1145/main
fix(responses): reassemble split SSE event/data frames before streaming
2026-04-02 21:20:31 +08:00
Luis Pater
7170eeea5f Merge pull request #2454 from buddingnewinsights/add-proxypal-to-readme
docs: add ProxyPal to "Who is with us?" section
2026-04-02 21:18:22 +08:00
Luis Pater
e3eb048c7a Merge pull request #2489 from Soein/upstream-pr
fix: 增强 Claude 反代检测对抗能力
2026-04-02 21:16:58 +08:00
Luis Pater
a59e92435b Merge pull request #2490 from router-for-me/logs
Refactor websocket logging and error handling
2026-04-02 20:47:31 +08:00
davidwushi1145
108895fc04 Harden Responses SSE framing against partial chunk boundaries
Follow-up review found two real framing hazards in the handler-layer
framer: it could flush a partial `data:` payload before the JSON was
complete, and it could inject an extra newline before chunks that
already began with `\n`/`\r\n`. This commit tightens the framer so it
only emits undelimited events when the buffered `data:` payload is
already valid JSON (or `[DONE]`), skips newline injection for chunks
that already start with a line break, and avoids the heavier
`bytes.Split` path while scanning SSE fields.

The regression suite now covers split `data:` payload chunks,
newline-prefixed chunks, and dropping incomplete trailing data on
flush, so the original Responses fix remains intact while the review
concerns are explicitly locked down.

Constraint: Keep the follow-up limited to handler-layer framing and tests
Rejected: Ignore the review and rely on current executor chunk shapes | leaves partial data payload corruption possible
Rejected: Build a fully generic SSE parser | wider change than needed for the identified risks
Confidence: high
Scope-risk: narrow
Reversibility: clean
Directive: Do not emit undelimited Responses SSE events unless buffered `data:` content is already complete and valid
Tested: /tmp/go1.26.1/go/bin/go test ./sdk/api/handlers/openai -count=1
Tested: /tmp/go1.26.1/go/bin/go test ./sdk/api/handlers -count=1
Tested: /tmp/go1.26.1/go/bin/go vet ./sdk/api/handlers/...
Not-tested: Full repository test suite outside sdk/api/handlers packages
2026-04-02 20:39:49 +08:00
davidwushi1145
abc293c642 Prevent malformed Responses SSE frames from breaking stream clients
Line-oriented upstream executors can emit `event:` and `data:` as
separate chunks, but the Responses handler had started terminating
each incoming chunk as a full SSE event. That split `response.created`
into an empty event plus a later data block, which broke downstream
clients like OpenClaw.

This keeps the fix in the handler layer: a small stateful framer now
buffers standalone `event:` lines until the matching `data:` arrives,
preserves already-framed events, and ignores delimiter-only leftovers.
The regression suite now covers split event/data framing, full-event
passthrough, terminal errors, and the bootstrap path that forwards
line-oriented openai-response streams from non-Codex executors too.

Constraint: Keep the fix localized to Responses handler framing instead of patching every executor
Rejected: Revert to v6.9.7 chunk writing | would reintroduce data-only framing regressions
Rejected: Patch each line-oriented executor separately | duplicates fragile SSE assembly logic
Confidence: high
Scope-risk: narrow
Reversibility: clean
Directive: Do not assume incoming Responses stream chunks are already complete SSE events; preserve handler-layer reassembly for split `event:`/`data:` inputs
Tested: /tmp/go1.26.1/go/bin/go test ./sdk/api/handlers/openai -count=1
Tested: /tmp/go1.26.1/go/bin/go test ./sdk/api/handlers -count=1
Tested: /tmp/go1.26.1/go test ./sdk/api/handlers/... -count=1
Tested: /tmp/go1.26.1/go/bin/go vet ./sdk/api/handlers/...
Tested: Temporary patched server on 127.0.0.1:18317 -> /v1/models 200, /v1/responses non-stream 200, /v1/responses stream emitted combined `event:` + `data:` frames
Not-tested: Full repository test suite outside sdk/api/handlers packages
2026-04-02 20:26:42 +08:00
pzy
bb44671845 fix: 修复反代检测对抗的 3 个问题
- computeFingerprint 使用 rune 索引替代字节索引,修复多字节字符指纹不匹配
- utls Chrome TLS 指纹仅对 Anthropic 官方域名生效,自定义 base_url 走标准 transport
- IPv6 地址使用 net.JoinHostPort 正确拼接端口
2026-04-02 19:12:55 +08:00
Luis Pater
09e480036a feat(auth): add support for managing custom headers in auth files
Closes #2457
2026-04-02 19:11:09 +08:00
pzy
249f969110 fix: Claude API 请求使用 utls Chrome TLS 指纹
Claude executor 的 API 请求之前使用 Go 标准库 crypto/tls,JA3 指纹
与真实 Claude Code(Bun/BoringSSL)不匹配,可被 Cloudflare 识别。

- 新增 helps/utls_client.go,封装 utls Chrome 指纹 + HTTP/2 + 代理支持
- Claude executor 的 4 处 NewProxyAwareHTTPClient 替换为 NewUtlsHTTPClient
- 其他 executor(Gemini/Codex/iFlow 等)不受影响,仍用标准 TLS
- 非 HTTPS 请求自动回退到标准 transport
2026-04-02 19:09:56 +08:00
hkfires
4f8acec2d8 refactor(logging): centralize websocket handshake recording 2026-04-02 18:39:32 +08:00
hkfires
34339f61ee Refactor websocket logging and error handling
- Introduced new logging functions for websocket requests, handshakes, errors, and responses in `logging_helpers.go`.
- Updated `CodexWebsocketsExecutor` to utilize the new logging functions for improved clarity and consistency in websocket operations.
- Modified the handling of websocket upgrade rejections to log relevant metadata.
- Changed the request body key to a timeline body key in `openai_responses_websocket.go` to better reflect its purpose.
- Enhanced tests to verify the correct logging of websocket events and responses, including disconnect events and error handling scenarios.
2026-04-02 17:30:51 +08:00
pzy
4045378cb4 fix: 增强 Claude 反代检测对抗能力
基于 Claude Code v2.1.88 源码分析,修复多个可被 Anthropic 检测的差距:

- 实现消息指纹算法(SHA256 盐值 + 字符索引),替代随机 buildHash
- billing header cc_version 从设备 profile 动态取版本号,不再硬编码
- billing header cc_entrypoint 从客户端 UA 解析,支持 cli/vscode/local-agent
- billing header 新增 cc_workload 支持(通过 X-CPA-Claude-Workload 头传入)
- 新增 X-Claude-Code-Session-Id 头(每 apiKey 缓存 UUID,TTL=1h)
- 新增 x-client-request-id 头(仅 api.anthropic.com,每请求 UUID)
- 补全 4 个缺失的 beta flags(structured-outputs/fast-mode/redact-thinking/token-efficient-tools)
- OAuth scope 对齐 Claude Code 2.1.88(移除 org:create_api_key,添加 sessions/mcp/file_upload)
- Anthropic-Dangerous-Direct-Browser-Access 仅在 API key 模式发送
- 响应头网关指纹清洗(剥离 litellm/helicone/portkey/cloudflare/kong/braintrust 前缀头)
2026-04-02 15:55:22 +08:00
Aikins Laryea
f5e9f01811 test(amp): update tests to expect thinking blocks to pass through during streaming 2026-04-01 20:35:23 +00:00
Aikins Laryea
ff7dbb5867 test(amp): update tests to expect thinking blocks to pass through during streaming 2026-04-01 20:21:39 +00:00
Aikins Laryea
e34b2b4f1d fix(gemini): clean tool schemas and eager_input_streaming
delegate schema sanitization to util.CleanJSONSchemaForGemini and drop the top-level eager_input_streaming key to prevent validation errors when sending claude tools to the gemini api
2026-04-01 19:49:38 +00:00
huynhgiabuu
6fdff8227d docs: add ProxyPal to 'Who is with us?' section
Add ProxyPal (https://github.com/buddingnewinsights/proxypal) to the
community projects list in all three README files (EN, CN, JA).
Placed after CCS, restoring its original position.

ProxyPal is a cross-platform desktop app (macOS, Windows, Linux) that
wraps CLIProxyAPI with a native GUI, supporting multiple AI providers,
usage analytics, request monitoring, and auto-configuration for popular
coding tools.

Closes #2420
2026-04-01 10:23:22 +07:00
54 changed files with 3695 additions and 746 deletions

2
.gitignore vendored
View File

@@ -37,13 +37,13 @@ GEMINI.md
# Tooling metadata
.vscode/*
.worktrees/
.codex/*
.claude/*
.gemini/*
.serena/*
.agent/*
.agents/*
.agents/*
.opencode/*
.idea/*
.bmad/*

View File

@@ -14,95 +14,6 @@ This project only accepts pull requests that relate to third-party provider supp
If you need to submit any non-third-party provider changes, please open them against the [mainline](https://github.com/router-for-me/CLIProxyAPI) repository.
1. Fork the repository
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
4. Push to the branch (`git push origin feature/amazing-feature`)
5. Open a Pull Request
## Who is with us?
Those projects are based on CLIProxyAPI:
### [vibeproxy](https://github.com/automazeio/vibeproxy)
Native macOS menu bar app to use your Claude Code & ChatGPT subscriptions with AI coding tools - no API keys needed
### [Subtitle Translator](https://github.com/VjayC/SRT-Subtitle-Translator-Validator)
Browser-based tool to translate SRT subtitles using your Gemini subscription via CLIProxyAPI with automatic validation/error correction - no API keys needed
### [CCS (Claude Code Switch)](https://github.com/kaitranntt/ccs)
CLI wrapper for instant switching between multiple Claude accounts and alternative models (Gemini, Codex, Antigravity) via CLIProxyAPI OAuth - no API keys needed
### [Quotio](https://github.com/nguyenphutrong/quotio)
Native macOS menu bar app that unifies Claude, Gemini, OpenAI, Qwen, and Antigravity subscriptions with real-time quota tracking and smart auto-failover for AI coding tools like Claude Code, OpenCode, and Droid - no API keys needed.
### [CodMate](https://github.com/loocor/CodMate)
Native macOS SwiftUI app for managing CLI AI sessions (Codex, Claude Code, Gemini CLI) with unified provider management, Git review, project organization, global search, and terminal integration. Integrates CLIProxyAPI to provide OAuth authentication for Codex, Claude, Gemini, Antigravity, and Qwen Code, with built-in and third-party provider rerouting through a single proxy endpoint - no API keys needed for OAuth providers.
### [ProxyPilot](https://github.com/Finesssee/ProxyPilot)
Windows-native CLIProxyAPI fork with TUI, system tray, and multi-provider OAuth for AI coding tools - no API keys needed.
### [Claude Proxy VSCode](https://github.com/uzhao/claude-proxy-vscode)
VSCode extension for quick switching between Claude Code models, featuring integrated CLIProxyAPI as its backend with automatic background lifecycle management.
### [ZeroLimit](https://github.com/0xtbug/zero-limit)
Windows desktop app built with Tauri + React for monitoring AI coding assistant quotas via CLIProxyAPI. Track usage across Gemini, Claude, OpenAI Codex, and Antigravity accounts with real-time dashboard, system tray integration, and one-click proxy control - no API keys needed.
### [CPA-XXX Panel](https://github.com/ferretgeek/CPA-X)
A lightweight web admin panel for CLIProxyAPI with health checks, resource monitoring, real-time logs, auto-update, request statistics and pricing display. Supports one-click installation and systemd service.
### [CLIProxyAPI Tray](https://github.com/kitephp/CLIProxyAPI_Tray)
A Windows tray application implemented using PowerShell scripts, without relying on any third-party libraries. The main features include: automatic creation of shortcuts, silent running, password management, channel switching (Main / Plus), and automatic downloading and updating.
### [霖君](https://github.com/wangdabaoqq/LinJun)
霖君 is a cross-platform desktop application for managing AI programming assistants, supporting macOS, Windows, and Linux systems. Unified management of Claude Code, Gemini CLI, OpenAI Codex, Qwen Code, and other AI coding tools, with local proxy for multi-account quota tracking and one-click configuration.
### [CLIProxyAPI Dashboard](https://github.com/itsmylife44/cliproxyapi-dashboard)
A modern web-based management dashboard for CLIProxyAPI built with Next.js, React, and PostgreSQL. Features real-time log streaming, structured configuration editing, API key management, OAuth provider integration for Claude/Gemini/Codex, usage analytics, container management, and config sync with OpenCode via companion plugin - no manual YAML editing needed.
### [All API Hub](https://github.com/qixing-jk/all-api-hub)
Browser extension for one-stop management of New API-compatible relay site accounts, featuring balance and usage dashboards, auto check-in, one-click key export to common apps, in-page API availability testing, and channel/model sync and redirection. It integrates with CLIProxyAPI through the Management API for one-click provider import and config sync.
### [Shadow AI](https://github.com/HEUDavid/shadow-ai)
Shadow AI is an AI assistant tool designed specifically for restricted environments. It provides a stealthy operation
mode without windows or traces, and enables cross-device AI Q&A interaction and control via the local area network (
LAN). Essentially, it is an automated collaboration layer of "screen/audio capture + AI inference + low-friction delivery",
helping users to immersively use AI assistants across applications on controlled devices or in restricted environments.
> [!NOTE]
> If you developed a project based on CLIProxyAPI, please open a PR to add it to this list.
## More choices
Those projects are ports of CLIProxyAPI or inspired by it:
### [9Router](https://github.com/decolua/9router)
A Next.js implementation inspired by CLIProxyAPI, easy to install and use, built from scratch with format translation (OpenAI/Claude/Gemini/Ollama), combo system with auto-fallback, multi-account management with exponential backoff, a Next.js web dashboard, and support for CLI tools (Cursor, Claude Code, Cline, RooCode) - no API keys needed.
### [OmniRoute](https://github.com/diegosouzapw/OmniRoute)
Never stop coding. Smart routing to FREE & low-cost AI models with automatic fallback.
OmniRoute is an AI gateway for multi-provider LLMs: an OpenAI-compatible endpoint with smart routing, load balancing, retries, and fallbacks. Add policies, rate limits, caching, and observability for reliable, cost-aware inference.
> [!NOTE]
> If you have developed a port of CLIProxyAPI or a project inspired by it, please open a PR to add it to this list.
## License
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

View File

@@ -1,6 +1,6 @@
# CLIProxyAPI Plus
[English](README.md) | 中文 | [日本語](README_JA.md)
[English](README.md) | 中文
这是 [CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI) 的 Plus 版本,在原有基础上增加了第三方供应商的支持。
@@ -12,92 +12,6 @@
如果需要提交任何非第三方供应商支持的 Pull Request请提交到[主线](https://github.com/router-for-me/CLIProxyAPI)版本。
1. Fork 仓库
2. 创建您的功能分支(`git checkout -b feature/amazing-feature`
3. 提交您的更改(`git commit -m 'Add some amazing feature'`
4. 推送到分支(`git push origin feature/amazing-feature`
5. 打开 Pull Request
## 谁与我们在一起?
这些项目基于 CLIProxyAPI:
### [vibeproxy](https://github.com/automazeio/vibeproxy)
一个原生 macOS 菜单栏应用,让您可以使用 Claude Code & ChatGPT 订阅服务和 AI 编程工具,无需 API 密钥。
### [Subtitle Translator](https://github.com/VjayC/SRT-Subtitle-Translator-Validator)
一款基于浏览器的 SRT 字幕翻译工具,可通过 CLI 代理 API 使用您的 Gemini 订阅。内置自动验证与错误修正功能,无需 API 密钥。
### [CCS (Claude Code Switch)](https://github.com/kaitranntt/ccs)
CLI 封装器,用于通过 CLIProxyAPI OAuth 即时切换多个 Claude 账户和替代模型Gemini, Codex, Antigravity无需 API 密钥。
### [Quotio](https://github.com/nguyenphutrong/quotio)
原生 macOS 菜单栏应用,统一管理 Claude、Gemini、OpenAI、Qwen 和 Antigravity 订阅,提供实时配额追踪和智能自动故障转移,支持 Claude Code、OpenCode 和 Droid 等 AI 编程工具,无需 API 密钥。
### [CodMate](https://github.com/loocor/CodMate)
原生 macOS SwiftUI 应用,用于管理 CLI AI 会话Claude Code、Codex、Gemini CLI提供统一的提供商管理、Git 审查、项目组织、全局搜索和终端集成。集成 CLIProxyAPI 为 Codex、Claude、Gemini、Antigravity 和 Qwen Code 提供统一的 OAuth 认证,支持内置和第三方提供商通过单一代理端点重路由 - OAuth 提供商无需 API 密钥。
### [ProxyPilot](https://github.com/Finesssee/ProxyPilot)
原生 Windows CLIProxyAPI 分支,集成 TUI、系统托盘及多服务商 OAuth 认证,专为 AI 编程工具打造,无需 API 密钥。
### [Claude Proxy VSCode](https://github.com/uzhao/claude-proxy-vscode)
一款 VSCode 扩展,提供了在 VSCode 中快速切换 Claude Code 模型的功能,内置 CLIProxyAPI 作为其后端,支持后台自动启动和关闭。
### [ZeroLimit](https://github.com/0xtbug/zero-limit)
Windows 桌面应用,基于 Tauri + React 构建,用于通过 CLIProxyAPI 监控 AI 编程助手配额。支持跨 Gemini、Claude、OpenAI Codex 和 Antigravity 账户的使用量追踪,提供实时仪表盘、系统托盘集成和一键代理控制,无需 API 密钥。
### [CPA-XXX Panel](https://github.com/ferretgeek/CPA-X)
面向 CLIProxyAPI 的 Web 管理面板,提供健康检查、资源监控、日志查看、自动更新、请求统计与定价展示,支持一键安装与 systemd 服务。
### [CLIProxyAPI Tray](https://github.com/kitephp/CLIProxyAPI_Tray)
Windows 托盘应用,基于 PowerShell 脚本实现不依赖任何第三方库。主要功能包括自动创建快捷方式、静默运行、密码管理、通道切换Main / Plus以及自动下载与更新。
### [霖君](https://github.com/wangdabaoqq/LinJun)
霖君是一款用于管理AI编程助手的跨平台桌面应用支持macOS、Windows、Linux系统。统一管理Claude Code、Gemini CLI、OpenAI Codex、Qwen Code等AI编程工具本地代理实现多账户配额跟踪和一键配置。
### [CLIProxyAPI Dashboard](https://github.com/itsmylife44/cliproxyapi-dashboard)
一个面向 CLIProxyAPI 的现代化 Web 管理仪表盘,基于 Next.js、React 和 PostgreSQL 构建。支持实时日志流、结构化配置编辑、API Key 管理、Claude/Gemini/Codex 的 OAuth 提供方集成、使用量分析、容器管理,并可通过配套插件与 OpenCode 同步配置,无需手动编辑 YAML。
### [All API Hub](https://github.com/qixing-jk/all-api-hub)
用于一站式管理 New API 兼容中转站账号的浏览器扩展,提供余额与用量看板、自动签到、密钥一键导出到常用应用、网页内 API 可用性测试,以及渠道与模型同步和重定向。支持通过 CLIProxyAPI Management API 一键导入 Provider 与同步配置。
### [Shadow AI](https://github.com/HEUDavid/shadow-ai)
Shadow AI 是一款专为受限环境设计的 AI 辅助工具。提供无窗口、无痕迹的隐蔽运行方式,并通过局域网实现跨设备的 AI 问答交互与控制。本质上是一个「屏幕/音频采集 + AI 推理 + 低摩擦投送」的自动化协作层,帮助用户在受控设备/受限环境下沉浸式跨应用地使用 AI 助手。
> [!NOTE]
> 如果你开发了基于 CLIProxyAPI 的项目,请提交一个 PR拉取请求将其添加到此列表中。
## 更多选择
以下项目是 CLIProxyAPI 的移植版或受其启发:
### [9Router](https://github.com/decolua/9router)
基于 Next.js 的实现,灵感来自 CLIProxyAPI易于安装使用自研格式转换OpenAI/Claude/Gemini/Ollama、组合系统与自动回退、多账户管理指数退避、Next.js Web 控制台,并支持 Cursor、Claude Code、Cline、RooCode 等 CLI 工具,无需 API 密钥。
### [OmniRoute](https://github.com/diegosouzapw/OmniRoute)
代码不止,创新不停。智能路由至免费及低成本 AI 模型,并支持自动故障转移。
OmniRoute 是一个面向多供应商大语言模型的 AI 网关:它提供兼容 OpenAI 的端点,具备智能路由、负载均衡、重试及回退机制。通过添加策略、速率限制、缓存和可观测性,确保推理过程既可靠又具备成本意识。
> [!NOTE]
> 如果你开发了 CLIProxyAPI 的移植或衍生项目,请提交 PR 将其添加到此列表中。
## 许可证
此项目根据 MIT 许可证授权 - 有关详细信息,请参阅 [LICENSE](LICENSE) 文件。

View File

@@ -1,195 +0,0 @@
# CLI Proxy API
[English](README.md) | [中文](README_CN.md) | 日本語
CLI向けのOpenAI/Gemini/Claude/Codex互換APIインターフェースを提供するプロキシサーバーです。
OAuth経由でOpenAI CodexGPTモデルおよびClaude Codeもサポートしています。
ローカルまたはマルチアカウントのCLIアクセスを、OpenAIResponses含む/Gemini/Claude互換のクライアントやSDKで利用できます。
## スポンサー
[![z.ai](https://assets.router-for.me/english-5-0.jpg)](https://z.ai/subscribe?ic=8JVLJQFSKB)
本プロジェクトはZ.aiにスポンサーされており、GLM CODING PLANの提供を受けています。
GLM CODING PLANはAIコーディング向けに設計されたサブスクリプションサービスで、月額わずか$10から利用可能です。フラッグシップのGLM-4.7およびGLM-5はProユーザーのみ利用可能モデルを10以上の人気AIコーディングツールClaude Code、Cline、Roo Codeなどで利用でき、開発者にトップクラスの高速かつ安定したコーディング体験を提供します。
GLM CODING PLANを10%割引で取得https://z.ai/subscribe?ic=8JVLJQFSKB
---
<table>
<tbody>
<tr>
<td width="180"><a href="https://www.packyapi.com/register?aff=cliproxyapi"><img src="./assets/packycode.png" alt="PackyCode" width="150"></a></td>
<td>PackyCodeのスポンサーシップに感謝しますPackyCodeは信頼性が高く効率的なAPIリレーサービスプロバイダーで、Claude Code、Codex、Geminiなどのリレーサービスを提供しています。PackyCodeは当ソフトウェアのユーザーに特別割引を提供しています<a href="https://www.packyapi.com/register?aff=cliproxyapi">こちらのリンク</a>から登録し、チャージ時にプロモーションコード「cliproxyapi」を入力すると10%割引になります。</td>
</tr>
<tr>
<td width="180"><a href="https://www.aicodemirror.com/register?invitecode=TJNAIF"><img src="./assets/aicodemirror.png" alt="AICodeMirror" width="150"></a></td>
<td>AICodeMirrorのスポンサーシップに感謝しますAICodeMirrorはClaude Code / Codex / Gemini CLI向けの公式高安定性リレーサービスを提供しており、エンタープライズグレードの同時接続、迅速な請求書発行、24時間365日の専任技術サポートを備えています。Claude Code / Codex / Geminiの公式チャネルが元の価格の38% / 2% / 9%で利用でき、チャージ時にはさらに割引がありますCLIProxyAPIユーザー向けの特別特典<a href="https://www.aicodemirror.com/register?invitecode=TJNAIF">こちらのリンク</a>から登録すると、初回チャージが20%割引になり、エンタープライズのお客様は最大25%割引を受けられます!</td>
</tr>
<tr>
<td width="180"><a href="https://shop.bmoplus.com/?utm_source=github"><img src="./assets/bmoplus.png" alt="BmoPlus" width="150"></a></td>
<td>本プロジェクトにご支援いただいた BmoPlus に感謝いたしますBmoPlusは、AIサブスクリプションのヘビーユーザー向けに特化した信頼性の高いAIアカウントサービスプロバイダーであり、安定した ChatGPT Plus / ChatGPT Pro (完全保証) / Claude Pro / Super Grok / Gemini Pro の公式代行チャージおよび即納アカウントを提供しています。こちらの<a href="https://shop.bmoplus.com/?utm_source=github">BmoPlus AIアカウント専門店/代行チャージ</a>経由でご登録・ご注文いただいたユーザー様は、GPTを <b>公式サイト価格の約1割90% OFF</b> という驚異的な価格でご利用いただけます!</td>
</tr>
<tr>
<td width="180"><a href="https://www.lingtrue.com/register"><img src="./assets/lingtrue.png" alt="LingtrueAPI" width="150"></a></td>
<td>LingtrueAPIのスポンサーシップに感謝しますLingtrueAPIはグローバルな大規模モデルAPIリレーサービスプラットフォームで、Claude Code、Codex、GeminiなどのトップモデルAPI呼び出しサービスを提供し、ユーザーが低コストかつ高い安定性で世界中のAI能力に接続できるよう支援しています。LingtrueAPIは本ソフトウェアのユーザーに特別割引を提供しています<a href="https://www.lingtrue.com/register">こちらのリンク</a>から登録し、初回チャージ時にプロモーションコード「LingtrueAPI」を入力すると10%割引になります。</td>
</tr>
</tbody>
</table>
## 概要
- CLIモデル向けのOpenAI/Gemini/Claude互換APIエンドポイント
- OAuthログインによるOpenAI CodexサポートGPTモデル
- OAuthログインによるClaude Codeサポート
- OAuthログインによるQwen Codeサポート
- OAuthログインによるiFlowサポート
- プロバイダールーティングによるAmp CLIおよびIDE拡張機能のサポート
- ストリーミングおよび非ストリーミングレスポンス
- 関数呼び出し/ツールのサポート
- マルチモーダル入力サポート(テキストと画像)
- ラウンドロビン負荷分散による複数アカウント対応Gemini、OpenAI、Claude、QwenおよびiFlow
- シンプルなCLI認証フローGemini、OpenAI、Claude、QwenおよびiFlow
- Generative Language APIキーのサポート
- AI Studioビルドのマルチアカウント負荷分散
- Gemini CLIのマルチアカウント負荷分散
- Claude Codeのマルチアカウント負荷分散
- Qwen Codeのマルチアカウント負荷分散
- iFlowのマルチアカウント負荷分散
- OpenAI Codexのマルチアカウント負荷分散
- 設定によるOpenAI互換アップストリームプロバイダーOpenRouter
- プロキシ埋め込み用の再利用可能なGo SDK`docs/sdk-usage.md`を参照)
## はじめに
CLIProxyAPIガイド[https://help.router-for.me/](https://help.router-for.me/)
## 管理API
[MANAGEMENT_API.md](https://help.router-for.me/management/api)を参照
## Amp CLIサポート
CLIProxyAPIは[Amp CLI](https://ampcode.com)およびAmp IDE拡張機能の統合サポートを含んでおり、Google/ChatGPT/ClaudeのOAuthサブスクリプションをAmpのコーディングツールで使用できます
- Ampの APIパターン用のプロバイダールートエイリアス`/api/provider/{provider}/v1...`
- OAuth認証およびアカウント機能用の管理プロキシ
- 自動ルーティングによるスマートモデルフォールバック
- 利用できないモデルを代替モデルにルーティングする**モデルマッピング**(例:`claude-opus-4.5``claude-sonnet-4`
- localhostのみの管理エンドポイントによるセキュリティファーストの設計
特定のバックエンド系統のリクエスト/レスポンス形状が必要な場合は、統合された `/v1/...` エンドポイントよりも provider-specific のパスを優先してください。
- messages 系のバックエンドには `/api/provider/{provider}/v1/messages`
- モデル単位の generate 系エンドポイントには `/api/provider/{provider}/v1beta/models/...`
- chat-completions 系のバックエンドには `/api/provider/{provider}/v1/chat/completions`
これらのパスはプロトコル面の選択には役立ちますが、同じクライアント向けモデル名が複数バックエンドで再利用されている場合、それだけで推論実行系が一意に固定されるわけではありません。実際の推論ルーティングは、引き続きリクエスト内の model/alias 解決に従います。厳密にバックエンドを固定したい場合は、一意な alias や prefix を使うか、クライアント向けモデル名の重複自体を避けてください。
**→ [Amp CLI統合ガイドの完全版](https://help.router-for.me/agent-client/amp-cli.html)**
## SDKドキュメント
- 使い方:[docs/sdk-usage.md](docs/sdk-usage.md)
- 上級(エグゼキューターとトランスレーター):[docs/sdk-advanced.md](docs/sdk-advanced.md)
- アクセス:[docs/sdk-access.md](docs/sdk-access.md)
- ウォッチャー:[docs/sdk-watcher.md](docs/sdk-watcher.md)
- カスタムプロバイダーの例:`examples/custom-provider`
## コントリビューション
コントリビューションを歓迎しますお気軽にPull Requestを送ってください。
1. リポジトリをフォーク
2. フィーチャーブランチを作成(`git checkout -b feature/amazing-feature`
3. 変更をコミット(`git commit -m 'Add some amazing feature'`
4. ブランチにプッシュ(`git push origin feature/amazing-feature`
5. Pull Requestを作成
## 関連プロジェクト
CLIProxyAPIをベースにした以下のプロジェクトがあります
### [vibeproxy](https://github.com/automazeio/vibeproxy)
macOSネイティブのメニューバーアプリで、Claude CodeとChatGPTのサブスクリプションをAIコーディングツールで使用可能 - APIキー不要
### [Subtitle Translator](https://github.com/VjayC/SRT-Subtitle-Translator-Validator)
CLIProxyAPI経由でGeminiサブスクリプションを使用してSRT字幕を翻訳するブラウザベースのツール。自動検証/エラー修正機能付き - APIキー不要
### [CCS (Claude Code Switch)](https://github.com/kaitranntt/ccs)
CLIProxyAPI OAuthを使用して複数のClaudeアカウントや代替モデルGemini、Codex、Antigravityを即座に切り替えるCLIラッパー - APIキー不要
### [Quotio](https://github.com/nguyenphutrong/quotio)
Claude、Gemini、OpenAI、Qwen、Antigravityのサブスクリプションを統合し、リアルタイムのクォータ追跡とスマート自動フェイルオーバーを備えたmacOSネイティブのメニューバーアプリ。Claude Code、OpenCode、Droidなどのコーディングツール向け - APIキー不要
### [CodMate](https://github.com/loocor/CodMate)
CLI AIセッションCodex、Claude Code、Gemini CLIを管理するmacOS SwiftUIネイティブアプリ。統合プロバイダー管理、Gitレビュー、プロジェクト整理、グローバル検索、ターミナル統合機能を搭載。CLIProxyAPIと統合し、Codex、Claude、Gemini、Antigravity、Qwen CodeのOAuth認証を提供。単一のプロキシエンドポイントを通じた組み込みおよびサードパーティプロバイダーの再ルーティングに対応 - OAuthプロバイダーではAPIキー不要
### [ProxyPilot](https://github.com/Finesssee/ProxyPilot)
TUI、システムトレイ、マルチプロバイダーOAuthを備えたWindows向けCLIProxyAPIフォーク - AIコーディングツール用、APIキー不要
### [Claude Proxy VSCode](https://github.com/uzhao/claude-proxy-vscode)
Claude Codeモデルを素早く切り替えるVSCode拡張機能。バックエンドとしてCLIProxyAPIを統合し、バックグラウンドでの自動ライフサイクル管理を搭載
### [ZeroLimit](https://github.com/0xtbug/zero-limit)
CLIProxyAPIを使用してAIコーディングアシスタントのクォータを監視するTauri + React製のWindowsデスクトップアプリ。Gemini、Claude、OpenAI Codex、Antigravityアカウントの使用量をリアルタイムダッシュボード、システムトレイ統合、ワンクリックプロキシコントロールで追跡 - APIキー不要
### [CPA-XXX Panel](https://github.com/ferretgeek/CPA-X)
CLIProxyAPI向けの軽量Web管理パネル。ヘルスチェック、リソース監視、リアルタイムログ、自動更新、リクエスト統計、料金表示機能を搭載。ワンクリックインストールとsystemdサービスに対応
### [CLIProxyAPI Tray](https://github.com/kitephp/CLIProxyAPI_Tray)
PowerShellスクリプトで実装されたWindowsトレイアプリケーション。サードパーティライブラリに依存せず、ショートカットの自動作成、サイレント実行、パスワード管理、チャネル切り替えMain / Plus、自動ダウンロードおよび自動更新に対応
### [霖君](https://github.com/wangdabaoqq/LinJun)
霖君はAIプログラミングアシスタントを管理するクロスプラットフォームデスクトップアプリケーションで、macOS、Windows、Linuxシステムに対応。Claude Code、Gemini CLI、OpenAI Codex、Qwen Codeなどのコーディングツールを統合管理し、ローカルプロキシによるマルチアカウントクォータ追跡とワンクリック設定が可能
### [CLIProxyAPI Dashboard](https://github.com/itsmylife44/cliproxyapi-dashboard)
Next.js、React、PostgreSQLで構築されたCLIProxyAPI用のモダンなWebベース管理ダッシュボード。リアルタイムログストリーミング、構造化された設定編集、APIキー管理、Claude/Gemini/Codex向けOAuthプロバイダー統合、使用量分析、コンテナ管理、コンパニオンプラグインによるOpenCodeとの設定同期機能を搭載 - 手動でのYAML編集は不要
### [All API Hub](https://github.com/qixing-jk/all-api-hub)
New API互換リレーサイトアカウントをワンストップで管理するブラウザ拡張機能。残高と使用量のダッシュボード、自動チェックイン、一般的なアプリへのワンクリックキーエクスポート、ページ内API可用性テスト、チャネル/モデルの同期とリダイレクト機能を搭載。Management APIを通じてCLIProxyAPIと統合し、ワンクリックでプロバイダーのインポートと設定同期が可能
### [Shadow AI](https://github.com/HEUDavid/shadow-ai)
Shadow AIは制限された環境向けに特別に設計されたAIアシスタントツールです。ウィンドウや痕跡のないステルス動作モードを提供し、LANローカルエリアネットワークを介したクロスデバイスAI質疑応答のインタラクションと制御を可能にします。本質的には「画面/音声キャプチャ + AI推論 + 低摩擦デリバリー」の自動化コラボレーションレイヤーであり、制御されたデバイスや制限された環境でアプリケーション横断的にAIアシスタントを没入的に使用できるようユーザーを支援します。
> [!NOTE]
> CLIProxyAPIをベースにプロジェクトを開発した場合は、PRを送ってこのリストに追加してください。
## その他の選択肢
以下のプロジェクトはCLIProxyAPIの移植版またはそれに触発されたものです
### [9Router](https://github.com/decolua/9router)
CLIProxyAPIに触発されたNext.js実装。インストールと使用が簡単で、フォーマット変換OpenAI/Claude/Gemini/Ollama、自動フォールバック付きコンボシステム、指数バックオフ付きマルチアカウント管理、Next.js Webダッシュボード、CLIツールCursor、Claude Code、Cline、RooCodeのサポートをゼロから構築 - APIキー不要
### [OmniRoute](https://github.com/diegosouzapw/OmniRoute)
コーディングを止めない。無料および低コストのAIモデルへのスマートルーティングと自動フォールバック。
OmniRouteはマルチプロバイダーLLM向けのAIゲートウェイですスマートルーティング、負荷分散、リトライ、フォールバックを備えたOpenAI互換エンドポイント。ポリシー、レート制限、キャッシュ、可観測性を追加して、信頼性が高くコストを意識した推論を実現します。
> [!NOTE]
> CLIProxyAPIの移植版またはそれに触発されたプロジェクトを開発した場合は、PRを送ってこのリストに追加してください。
## ライセンス
本プロジェクトはMITライセンスの下でライセンスされています - 詳細は[LICENSE](LICENSE)ファイルを参照してください。

View File

@@ -105,6 +105,10 @@ routing:
# When true, enable authentication for the WebSocket API (/v1/ws).
ws-auth: false
# When true, enable Gemini CLI internal endpoints (/v1internal:*).
# Default is false for safety.
enable-gemini-cli-endpoint: false
# When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
nonstream-keepalive-interval: 0

View File

@@ -1047,6 +1047,7 @@ func (h *Handler) buildAuthFromFileData(path string, data []byte) (*coreauth.Aut
auth.Runtime = existing.Runtime
}
}
coreauth.ApplyCustomHeadersFromMetadata(auth)
return auth, nil
}
@@ -1129,7 +1130,7 @@ func (h *Handler) PatchAuthFileStatus(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"status": "ok", "disabled": *req.Disabled})
}
// PatchAuthFileFields updates editable fields (prefix, proxy_url, priority, note) of an auth file.
// PatchAuthFileFields updates editable fields (prefix, proxy_url, headers, priority, note) of an auth file.
func (h *Handler) PatchAuthFileFields(c *gin.Context) {
if h.authManager == nil {
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "core auth manager unavailable"})
@@ -1137,11 +1138,12 @@ func (h *Handler) PatchAuthFileFields(c *gin.Context) {
}
var req struct {
Name string `json:"name"`
Prefix *string `json:"prefix"`
ProxyURL *string `json:"proxy_url"`
Priority *int `json:"priority"`
Note *string `json:"note"`
Name string `json:"name"`
Prefix *string `json:"prefix"`
ProxyURL *string `json:"proxy_url"`
Headers map[string]string `json:"headers"`
Priority *int `json:"priority"`
Note *string `json:"note"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
@@ -1177,13 +1179,107 @@ func (h *Handler) PatchAuthFileFields(c *gin.Context) {
changed := false
if req.Prefix != nil {
targetAuth.Prefix = *req.Prefix
prefix := strings.TrimSpace(*req.Prefix)
targetAuth.Prefix = prefix
if targetAuth.Metadata == nil {
targetAuth.Metadata = make(map[string]any)
}
if prefix == "" {
delete(targetAuth.Metadata, "prefix")
} else {
targetAuth.Metadata["prefix"] = prefix
}
changed = true
}
if req.ProxyURL != nil {
targetAuth.ProxyURL = *req.ProxyURL
proxyURL := strings.TrimSpace(*req.ProxyURL)
targetAuth.ProxyURL = proxyURL
if targetAuth.Metadata == nil {
targetAuth.Metadata = make(map[string]any)
}
if proxyURL == "" {
delete(targetAuth.Metadata, "proxy_url")
} else {
targetAuth.Metadata["proxy_url"] = proxyURL
}
changed = true
}
if len(req.Headers) > 0 {
existingHeaders := coreauth.ExtractCustomHeadersFromMetadata(targetAuth.Metadata)
nextHeaders := make(map[string]string, len(existingHeaders))
for k, v := range existingHeaders {
nextHeaders[k] = v
}
headerChanged := false
for key, value := range req.Headers {
name := strings.TrimSpace(key)
if name == "" {
continue
}
val := strings.TrimSpace(value)
attrKey := "header:" + name
if val == "" {
if _, ok := nextHeaders[name]; ok {
delete(nextHeaders, name)
headerChanged = true
}
if targetAuth.Attributes != nil {
if _, ok := targetAuth.Attributes[attrKey]; ok {
headerChanged = true
}
}
continue
}
if prev, ok := nextHeaders[name]; !ok || prev != val {
headerChanged = true
}
nextHeaders[name] = val
if targetAuth.Attributes != nil {
if prev, ok := targetAuth.Attributes[attrKey]; !ok || prev != val {
headerChanged = true
}
} else {
headerChanged = true
}
}
if headerChanged {
if targetAuth.Metadata == nil {
targetAuth.Metadata = make(map[string]any)
}
if targetAuth.Attributes == nil {
targetAuth.Attributes = make(map[string]string)
}
for key, value := range req.Headers {
name := strings.TrimSpace(key)
if name == "" {
continue
}
val := strings.TrimSpace(value)
attrKey := "header:" + name
if val == "" {
delete(nextHeaders, name)
delete(targetAuth.Attributes, attrKey)
continue
}
nextHeaders[name] = val
targetAuth.Attributes[attrKey] = val
}
if len(nextHeaders) == 0 {
delete(targetAuth.Metadata, "headers")
} else {
metaHeaders := make(map[string]any, len(nextHeaders))
for k, v := range nextHeaders {
metaHeaders[k] = v
}
targetAuth.Metadata["headers"] = metaHeaders
}
changed = true
}
}
if req.Priority != nil || req.Note != nil {
if targetAuth.Metadata == nil {
targetAuth.Metadata = make(map[string]any)

View File

@@ -0,0 +1,164 @@
package management
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/gin-gonic/gin"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
)
func TestPatchAuthFileFields_MergeHeadersAndDeleteEmptyValues(t *testing.T) {
t.Setenv("MANAGEMENT_PASSWORD", "")
gin.SetMode(gin.TestMode)
store := &memoryAuthStore{}
manager := coreauth.NewManager(store, nil, nil)
record := &coreauth.Auth{
ID: "test.json",
FileName: "test.json",
Provider: "claude",
Attributes: map[string]string{
"path": "/tmp/test.json",
"header:X-Old": "old",
"header:X-Remove": "gone",
},
Metadata: map[string]any{
"type": "claude",
"headers": map[string]any{
"X-Old": "old",
"X-Remove": "gone",
},
},
}
if _, errRegister := manager.Register(context.Background(), record); errRegister != nil {
t.Fatalf("failed to register auth record: %v", errRegister)
}
h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: t.TempDir()}, manager)
body := `{"name":"test.json","prefix":"p1","proxy_url":"http://proxy.local","headers":{"X-Old":"new","X-New":"v","X-Remove":" ","X-Nope":""}}`
rec := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(rec)
req := httptest.NewRequest(http.MethodPatch, "/v0/management/auth-files/fields", strings.NewReader(body))
req.Header.Set("Content-Type", "application/json")
ctx.Request = req
h.PatchAuthFileFields(ctx)
if rec.Code != http.StatusOK {
t.Fatalf("expected status %d, got %d with body %s", http.StatusOK, rec.Code, rec.Body.String())
}
updated, ok := manager.GetByID("test.json")
if !ok || updated == nil {
t.Fatalf("expected auth record to exist after patch")
}
if updated.Prefix != "p1" {
t.Fatalf("prefix = %q, want %q", updated.Prefix, "p1")
}
if updated.ProxyURL != "http://proxy.local" {
t.Fatalf("proxy_url = %q, want %q", updated.ProxyURL, "http://proxy.local")
}
if updated.Metadata == nil {
t.Fatalf("expected metadata to be non-nil")
}
if got, _ := updated.Metadata["prefix"].(string); got != "p1" {
t.Fatalf("metadata.prefix = %q, want %q", got, "p1")
}
if got, _ := updated.Metadata["proxy_url"].(string); got != "http://proxy.local" {
t.Fatalf("metadata.proxy_url = %q, want %q", got, "http://proxy.local")
}
headersMeta, ok := updated.Metadata["headers"].(map[string]any)
if !ok {
raw, _ := json.Marshal(updated.Metadata["headers"])
t.Fatalf("metadata.headers = %T (%s), want map[string]any", updated.Metadata["headers"], string(raw))
}
if got := headersMeta["X-Old"]; got != "new" {
t.Fatalf("metadata.headers.X-Old = %#v, want %q", got, "new")
}
if got := headersMeta["X-New"]; got != "v" {
t.Fatalf("metadata.headers.X-New = %#v, want %q", got, "v")
}
if _, ok := headersMeta["X-Remove"]; ok {
t.Fatalf("expected metadata.headers.X-Remove to be deleted")
}
if _, ok := headersMeta["X-Nope"]; ok {
t.Fatalf("expected metadata.headers.X-Nope to be absent")
}
if got := updated.Attributes["header:X-Old"]; got != "new" {
t.Fatalf("attrs header:X-Old = %q, want %q", got, "new")
}
if got := updated.Attributes["header:X-New"]; got != "v" {
t.Fatalf("attrs header:X-New = %q, want %q", got, "v")
}
if _, ok := updated.Attributes["header:X-Remove"]; ok {
t.Fatalf("expected attrs header:X-Remove to be deleted")
}
if _, ok := updated.Attributes["header:X-Nope"]; ok {
t.Fatalf("expected attrs header:X-Nope to be absent")
}
}
func TestPatchAuthFileFields_HeadersEmptyMapIsNoop(t *testing.T) {
t.Setenv("MANAGEMENT_PASSWORD", "")
gin.SetMode(gin.TestMode)
store := &memoryAuthStore{}
manager := coreauth.NewManager(store, nil, nil)
record := &coreauth.Auth{
ID: "noop.json",
FileName: "noop.json",
Provider: "claude",
Attributes: map[string]string{
"path": "/tmp/noop.json",
"header:X-Kee": "1",
},
Metadata: map[string]any{
"type": "claude",
"headers": map[string]any{
"X-Kee": "1",
},
},
}
if _, errRegister := manager.Register(context.Background(), record); errRegister != nil {
t.Fatalf("failed to register auth record: %v", errRegister)
}
h := NewHandlerWithoutConfigFilePath(&config.Config{AuthDir: t.TempDir()}, manager)
body := `{"name":"noop.json","note":"hello","headers":{}}`
rec := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(rec)
req := httptest.NewRequest(http.MethodPatch, "/v0/management/auth-files/fields", strings.NewReader(body))
req.Header.Set("Content-Type", "application/json")
ctx.Request = req
h.PatchAuthFileFields(ctx)
if rec.Code != http.StatusOK {
t.Fatalf("expected status %d, got %d with body %s", http.StatusOK, rec.Code, rec.Body.String())
}
updated, ok := manager.GetByID("noop.json")
if !ok || updated == nil {
t.Fatalf("expected auth record to exist after patch")
}
if got := updated.Attributes["header:X-Kee"]; got != "1" {
t.Fatalf("attrs header:X-Kee = %q, want %q", got, "1")
}
headersMeta, ok := updated.Metadata["headers"].(map[string]any)
if !ok {
t.Fatalf("expected metadata.headers to remain a map, got %T", updated.Metadata["headers"])
}
if got := headersMeta["X-Kee"]; got != "1" {
t.Fatalf("metadata.headers.X-Kee = %#v, want %q", got, "1")
}
}

View File

@@ -15,6 +15,8 @@ import (
)
const requestBodyOverrideContextKey = "REQUEST_BODY_OVERRIDE"
const responseBodyOverrideContextKey = "RESPONSE_BODY_OVERRIDE"
const websocketTimelineOverrideContextKey = "WEBSOCKET_TIMELINE_OVERRIDE"
// RequestInfo holds essential details of an incoming HTTP request for logging purposes.
type RequestInfo struct {
@@ -304,6 +306,10 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
if len(apiResponse) > 0 {
_ = w.streamWriter.WriteAPIResponse(apiResponse)
}
apiWebsocketTimeline := w.extractAPIWebsocketTimeline(c)
if len(apiWebsocketTimeline) > 0 {
_ = w.streamWriter.WriteAPIWebsocketTimeline(apiWebsocketTimeline)
}
if err := w.streamWriter.Close(); err != nil {
w.streamWriter = nil
return err
@@ -312,7 +318,7 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
return nil
}
return w.logRequest(w.extractRequestBody(c), finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), w.extractAPIResponseTimestamp(c), slicesAPIResponseError, forceLog)
return w.logRequest(w.extractRequestBody(c), finalStatusCode, w.cloneHeaders(), w.extractResponseBody(c), w.extractWebsocketTimeline(c), w.extractAPIRequest(c), w.extractAPIResponse(c), w.extractAPIWebsocketTimeline(c), w.extractAPIResponseTimestamp(c), slicesAPIResponseError, forceLog)
}
func (w *ResponseWriterWrapper) cloneHeaders() map[string][]string {
@@ -352,6 +358,18 @@ func (w *ResponseWriterWrapper) extractAPIResponse(c *gin.Context) []byte {
return data
}
func (w *ResponseWriterWrapper) extractAPIWebsocketTimeline(c *gin.Context) []byte {
apiTimeline, isExist := c.Get("API_WEBSOCKET_TIMELINE")
if !isExist {
return nil
}
data, ok := apiTimeline.([]byte)
if !ok || len(data) == 0 {
return nil
}
return bytes.Clone(data)
}
func (w *ResponseWriterWrapper) extractAPIResponseTimestamp(c *gin.Context) time.Time {
ts, isExist := c.Get("API_RESPONSE_TIMESTAMP")
if !isExist {
@@ -364,19 +382,8 @@ func (w *ResponseWriterWrapper) extractAPIResponseTimestamp(c *gin.Context) time
}
func (w *ResponseWriterWrapper) extractRequestBody(c *gin.Context) []byte {
if c != nil {
if bodyOverride, isExist := c.Get(requestBodyOverrideContextKey); isExist {
switch value := bodyOverride.(type) {
case []byte:
if len(value) > 0 {
return bytes.Clone(value)
}
case string:
if strings.TrimSpace(value) != "" {
return []byte(value)
}
}
}
if body := extractBodyOverride(c, requestBodyOverrideContextKey); len(body) > 0 {
return body
}
if w.requestInfo != nil && len(w.requestInfo.Body) > 0 {
return w.requestInfo.Body
@@ -384,13 +391,48 @@ func (w *ResponseWriterWrapper) extractRequestBody(c *gin.Context) []byte {
return nil
}
func (w *ResponseWriterWrapper) logRequest(requestBody []byte, statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseTimestamp time.Time, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
func (w *ResponseWriterWrapper) extractResponseBody(c *gin.Context) []byte {
if body := extractBodyOverride(c, responseBodyOverrideContextKey); len(body) > 0 {
return body
}
if w.body == nil || w.body.Len() == 0 {
return nil
}
return bytes.Clone(w.body.Bytes())
}
func (w *ResponseWriterWrapper) extractWebsocketTimeline(c *gin.Context) []byte {
return extractBodyOverride(c, websocketTimelineOverrideContextKey)
}
func extractBodyOverride(c *gin.Context, key string) []byte {
if c == nil {
return nil
}
bodyOverride, isExist := c.Get(key)
if !isExist {
return nil
}
switch value := bodyOverride.(type) {
case []byte:
if len(value) > 0 {
return bytes.Clone(value)
}
case string:
if strings.TrimSpace(value) != "" {
return []byte(value)
}
}
return nil
}
func (w *ResponseWriterWrapper) logRequest(requestBody []byte, statusCode int, headers map[string][]string, body, websocketTimeline, apiRequestBody, apiResponseBody, apiWebsocketTimeline []byte, apiResponseTimestamp time.Time, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
if w.requestInfo == nil {
return nil
}
if loggerWithOptions, ok := w.logger.(interface {
LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string, time.Time, time.Time) error
LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool, string, time.Time, time.Time) error
}); ok {
return loggerWithOptions.LogRequestWithOptions(
w.requestInfo.URL,
@@ -400,8 +442,10 @@ func (w *ResponseWriterWrapper) logRequest(requestBody []byte, statusCode int, h
statusCode,
headers,
body,
websocketTimeline,
apiRequestBody,
apiResponseBody,
apiWebsocketTimeline,
apiResponseErrors,
forceLog,
w.requestInfo.RequestID,
@@ -418,8 +462,10 @@ func (w *ResponseWriterWrapper) logRequest(requestBody []byte, statusCode int, h
statusCode,
headers,
body,
websocketTimeline,
apiRequestBody,
apiResponseBody,
apiWebsocketTimeline,
apiResponseErrors,
w.requestInfo.RequestID,
w.requestInfo.Timestamp,

View File

@@ -1,10 +1,14 @@
package middleware
import (
"bytes"
"net/http/httptest"
"testing"
"time"
"github.com/gin-gonic/gin"
"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
"github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
)
func TestExtractRequestBodyPrefersOverride(t *testing.T) {
@@ -33,7 +37,7 @@ func TestExtractRequestBodySupportsStringOverride(t *testing.T) {
recorder := httptest.NewRecorder()
c, _ := gin.CreateTestContext(recorder)
wrapper := &ResponseWriterWrapper{}
wrapper := &ResponseWriterWrapper{body: &bytes.Buffer{}}
c.Set(requestBodyOverrideContextKey, "override-as-string")
body := wrapper.extractRequestBody(c)
@@ -41,3 +45,158 @@ func TestExtractRequestBodySupportsStringOverride(t *testing.T) {
t.Fatalf("request body = %q, want %q", string(body), "override-as-string")
}
}
func TestExtractResponseBodyPrefersOverride(t *testing.T) {
gin.SetMode(gin.TestMode)
recorder := httptest.NewRecorder()
c, _ := gin.CreateTestContext(recorder)
wrapper := &ResponseWriterWrapper{body: &bytes.Buffer{}}
wrapper.body.WriteString("original-response")
body := wrapper.extractResponseBody(c)
if string(body) != "original-response" {
t.Fatalf("response body = %q, want %q", string(body), "original-response")
}
c.Set(responseBodyOverrideContextKey, []byte("override-response"))
body = wrapper.extractResponseBody(c)
if string(body) != "override-response" {
t.Fatalf("response body = %q, want %q", string(body), "override-response")
}
body[0] = 'X'
if got := wrapper.extractResponseBody(c); string(got) != "override-response" {
t.Fatalf("response override should be cloned, got %q", string(got))
}
}
func TestExtractResponseBodySupportsStringOverride(t *testing.T) {
gin.SetMode(gin.TestMode)
recorder := httptest.NewRecorder()
c, _ := gin.CreateTestContext(recorder)
wrapper := &ResponseWriterWrapper{}
c.Set(responseBodyOverrideContextKey, "override-response-as-string")
body := wrapper.extractResponseBody(c)
if string(body) != "override-response-as-string" {
t.Fatalf("response body = %q, want %q", string(body), "override-response-as-string")
}
}
func TestExtractBodyOverrideClonesBytes(t *testing.T) {
gin.SetMode(gin.TestMode)
recorder := httptest.NewRecorder()
c, _ := gin.CreateTestContext(recorder)
override := []byte("body-override")
c.Set(requestBodyOverrideContextKey, override)
body := extractBodyOverride(c, requestBodyOverrideContextKey)
if !bytes.Equal(body, override) {
t.Fatalf("body override = %q, want %q", string(body), string(override))
}
body[0] = 'X'
if !bytes.Equal(override, []byte("body-override")) {
t.Fatalf("override mutated: %q", string(override))
}
}
func TestExtractWebsocketTimelineUsesOverride(t *testing.T) {
gin.SetMode(gin.TestMode)
recorder := httptest.NewRecorder()
c, _ := gin.CreateTestContext(recorder)
wrapper := &ResponseWriterWrapper{}
if got := wrapper.extractWebsocketTimeline(c); got != nil {
t.Fatalf("expected nil websocket timeline, got %q", string(got))
}
c.Set(websocketTimelineOverrideContextKey, []byte("timeline"))
body := wrapper.extractWebsocketTimeline(c)
if string(body) != "timeline" {
t.Fatalf("websocket timeline = %q, want %q", string(body), "timeline")
}
}
func TestFinalizeStreamingWritesAPIWebsocketTimeline(t *testing.T) {
gin.SetMode(gin.TestMode)
recorder := httptest.NewRecorder()
c, _ := gin.CreateTestContext(recorder)
streamWriter := &testStreamingLogWriter{}
wrapper := &ResponseWriterWrapper{
ResponseWriter: c.Writer,
logger: &testRequestLogger{enabled: true},
requestInfo: &RequestInfo{
URL: "/v1/responses",
Method: "POST",
Headers: map[string][]string{"Content-Type": {"application/json"}},
RequestID: "req-1",
Timestamp: time.Date(2026, time.April, 1, 12, 0, 0, 0, time.UTC),
},
isStreaming: true,
streamWriter: streamWriter,
}
c.Set("API_WEBSOCKET_TIMELINE", []byte("Timestamp: 2026-04-01T12:00:00Z\nEvent: api.websocket.request\n{}"))
if err := wrapper.Finalize(c); err != nil {
t.Fatalf("Finalize error: %v", err)
}
if string(streamWriter.apiWebsocketTimeline) != "Timestamp: 2026-04-01T12:00:00Z\nEvent: api.websocket.request\n{}" {
t.Fatalf("stream writer websocket timeline = %q", string(streamWriter.apiWebsocketTimeline))
}
if !streamWriter.closed {
t.Fatal("expected stream writer to be closed")
}
}
type testRequestLogger struct {
enabled bool
}
func (l *testRequestLogger) LogRequest(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []byte, []byte, []*interfaces.ErrorMessage, string, time.Time, time.Time) error {
return nil
}
func (l *testRequestLogger) LogStreamingRequest(string, string, map[string][]string, []byte, string) (logging.StreamingLogWriter, error) {
return &testStreamingLogWriter{}, nil
}
func (l *testRequestLogger) IsEnabled() bool {
return l.enabled
}
type testStreamingLogWriter struct {
apiWebsocketTimeline []byte
closed bool
}
func (w *testStreamingLogWriter) WriteChunkAsync([]byte) {}
func (w *testStreamingLogWriter) WriteStatus(int, map[string][]string) error {
return nil
}
func (w *testStreamingLogWriter) WriteAPIRequest([]byte) error {
return nil
}
func (w *testStreamingLogWriter) WriteAPIResponse([]byte) error {
return nil
}
func (w *testStreamingLogWriter) WriteAPIWebsocketTimeline(apiWebsocketTimeline []byte) error {
w.apiWebsocketTimeline = bytes.Clone(apiWebsocketTimeline)
return nil
}
func (w *testStreamingLogWriter) SetFirstChunkTimestamp(time.Time) {}
func (w *testStreamingLogWriter) Close() error {
w.closed = true
return nil
}

View File

@@ -253,6 +253,7 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
log.Debugf("amp model mapping: request %s -> %s", normalizedModel, resolvedModel)
logAmpRouting(RouteTypeModelMapping, modelName, resolvedModel, providerName, requestPath)
rewriter := NewResponseRewriter(c.Writer, modelName)
rewriter.suppressThinking = true
c.Writer = rewriter
// Filter Anthropic-Beta header only for local handling paths
filterAntropicBetaHeader(c)
@@ -267,6 +268,7 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
// proxies (e.g. NewAPI) may return a different model name and lack
// Amp-required fields like thinking.signature.
rewriter := NewResponseRewriter(c.Writer, modelName)
rewriter.suppressThinking = providerName != "claude"
c.Writer = rewriter
// Filter Anthropic-Beta header only for local handling paths
filterAntropicBetaHeader(c)

View File

@@ -17,19 +17,18 @@ import (
// and to keep Amp-compatible response shapes.
type ResponseRewriter struct {
gin.ResponseWriter
body *bytes.Buffer
originalModel string
isStreaming bool
suppressedContentBlock map[int]struct{}
body *bytes.Buffer
originalModel string
isStreaming bool
suppressThinking bool
}
// NewResponseRewriter creates a new response rewriter for model name substitution.
func NewResponseRewriter(w gin.ResponseWriter, originalModel string) *ResponseRewriter {
return &ResponseRewriter{
ResponseWriter: w,
body: &bytes.Buffer{},
originalModel: originalModel,
suppressedContentBlock: make(map[int]struct{}),
ResponseWriter: w,
body: &bytes.Buffer{},
originalModel: originalModel,
}
}
@@ -99,7 +98,8 @@ func (rw *ResponseRewriter) Write(data []byte) (int, error) {
}
if rw.isStreaming {
n, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(data))
rewritten := rw.rewriteStreamChunk(data)
n, err := rw.ResponseWriter.Write(rewritten)
if err == nil {
if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
flusher.Flush()
@@ -162,19 +162,10 @@ func ensureAmpSignature(data []byte) []byte {
return data
}
func (rw *ResponseRewriter) markSuppressedContentBlock(index int) {
if rw.suppressedContentBlock == nil {
rw.suppressedContentBlock = make(map[int]struct{})
}
rw.suppressedContentBlock[index] = struct{}{}
}
func (rw *ResponseRewriter) isSuppressedContentBlock(index int) bool {
_, ok := rw.suppressedContentBlock[index]
return ok
}
func (rw *ResponseRewriter) suppressAmpThinking(data []byte) []byte {
if !rw.suppressThinking {
return data
}
if gjson.GetBytes(data, `content.#(type=="tool_use")`).Exists() {
filtered := gjson.GetBytes(data, `content.#(type!="thinking")#`)
if filtered.Exists() {
@@ -185,33 +176,11 @@ func (rw *ResponseRewriter) suppressAmpThinking(data []byte) []byte {
data, err = sjson.SetBytes(data, "content", filtered.Value())
if err != nil {
log.Warnf("Amp ResponseRewriter: failed to suppress thinking blocks: %v", err)
} else {
log.Debugf("Amp ResponseRewriter: Suppressed %d thinking blocks due to tool usage", originalCount-filteredCount)
}
}
}
}
eventType := gjson.GetBytes(data, "type").String()
indexResult := gjson.GetBytes(data, "index")
if eventType == "content_block_start" && gjson.GetBytes(data, "content_block.type").String() == "thinking" && indexResult.Exists() {
rw.markSuppressedContentBlock(int(indexResult.Int()))
return nil
}
if gjson.GetBytes(data, "delta.type").String() == "thinking_delta" {
if indexResult.Exists() {
rw.markSuppressedContentBlock(int(indexResult.Int()))
}
return nil
}
if eventType == "content_block_stop" && indexResult.Exists() {
index := int(indexResult.Int())
if rw.isSuppressedContentBlock(index) {
delete(rw.suppressedContentBlock, index)
return nil
}
}
return data
}
@@ -262,6 +231,10 @@ func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte {
jsonData := bytes.TrimPrefix(bytes.TrimSpace(lines[dataIdx]), []byte("data: "))
if len(jsonData) > 0 && jsonData[0] == '{' {
rewritten := rw.rewriteStreamEvent(jsonData)
if rewritten == nil {
i = dataIdx + 1
continue
}
// Emit event line
out = append(out, line)
// Emit blank lines between event and data
@@ -287,7 +260,9 @@ func (rw *ResponseRewriter) rewriteStreamChunk(chunk []byte) []byte {
jsonData := bytes.TrimPrefix(trimmed, []byte("data: "))
if len(jsonData) > 0 && jsonData[0] == '{' {
rewritten := rw.rewriteStreamEvent(jsonData)
out = append(out, append([]byte("data: "), rewritten...))
if rewritten != nil {
out = append(out, append([]byte("data: "), rewritten...))
}
i++
continue
}

View File

@@ -102,7 +102,7 @@ func TestRewriteStreamChunk_MessageModel(t *testing.T) {
}
func TestRewriteStreamChunk_PreservesThinkingWithSignatureInjection(t *testing.T) {
rw := &ResponseRewriter{suppressedContentBlock: make(map[int]struct{})}
rw := &ResponseRewriter{}
chunk := []byte("event: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"thinking\",\"thinking\":\"\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"thinking_delta\",\"thinking\":\"abc\"}}\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0}\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":1,\"content_block\":{\"type\":\"tool_use\",\"name\":\"bash\",\"input\":{}}}\n\n")
result := rw.rewriteStreamChunk(chunk)

View File

@@ -323,6 +323,10 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
// setupRoutes configures the API routes for the server.
// It defines the endpoints and associates them with their respective handlers.
func (s *Server) setupRoutes() {
s.engine.GET("/healthz", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"status": "ok"})
})
s.engine.GET("/management.html", s.serveManagementControlPanel)
openaiHandlers := openai.NewOpenAIAPIHandler(s.handlers)
geminiHandlers := gemini.NewGeminiAPIHandler(s.handlers)
@@ -569,6 +573,8 @@ func (s *Server) registerManagementRoutes() {
mgmt.PUT("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
mgmt.PATCH("/quota-exceeded/switch-preview-model", s.mgmt.PutSwitchPreviewModel)
mgmt.GET("/copilot-quota", s.mgmt.GetCopilotQuota)
mgmt.GET("/api-keys", s.mgmt.GetAPIKeys)
mgmt.PUT("/api-keys", s.mgmt.PutAPIKeys)
mgmt.PATCH("/api-keys", s.mgmt.PatchAPIKeys)

View File

@@ -1,6 +1,7 @@
package api
import (
"encoding/json"
"net/http"
"net/http/httptest"
"os"
@@ -46,6 +47,28 @@ func newTestServer(t *testing.T) *Server {
return NewServer(cfg, authManager, accessManager, configPath)
}
func TestHealthz(t *testing.T) {
server := newTestServer(t)
req := httptest.NewRequest(http.MethodGet, "/healthz", nil)
rr := httptest.NewRecorder()
server.engine.ServeHTTP(rr, req)
if rr.Code != http.StatusOK {
t.Fatalf("unexpected status code: got %d want %d; body=%s", rr.Code, http.StatusOK, rr.Body.String())
}
var resp struct {
Status string `json:"status"`
}
if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
t.Fatalf("failed to parse response JSON: %v; body=%s", err, rr.Body.String())
}
if resp.Status != "ok" {
t.Fatalf("unexpected response status: got %q want %q", resp.Status, "ok")
}
}
func TestAmpProviderModelRoutes(t *testing.T) {
testCases := []struct {
name string
@@ -172,6 +195,8 @@ func TestDefaultRequestLoggerFactory_UsesResolvedLogDirectory(t *testing.T) {
nil,
nil,
nil,
nil,
nil,
true,
"issue-1711",
time.Now(),

View File

@@ -88,7 +88,7 @@ func (o *ClaudeAuth) GenerateAuthURL(state string, pkceCodes *PKCECodes) (string
"client_id": {ClientID},
"response_type": {"code"},
"redirect_uri": {RedirectURI},
"scope": {"org:create_api_key user:profile user:inference"},
"scope": {"user:profile user:inference user:sessions:claude_code user:mcp_servers user:file_upload"},
"code_challenge": {pkceCodes.CodeChallenge},
"code_challenge_method": {"S256"},
"state": {state},

View File

@@ -235,6 +235,74 @@ type CopilotModelEntry struct {
Capabilities map[string]any `json:"capabilities,omitempty"`
}
// CopilotModelLimits holds the token limits returned by the Copilot /models API
// under capabilities.limits. These limits vary by account type (individual vs
// business) and are the authoritative source for enforcing prompt size.
type CopilotModelLimits struct {
// MaxContextWindowTokens is the total context window (prompt + output).
MaxContextWindowTokens int
// MaxPromptTokens is the hard limit on input/prompt tokens.
// Exceeding this triggers a 400 error from the Copilot API.
MaxPromptTokens int
// MaxOutputTokens is the maximum number of output/completion tokens.
MaxOutputTokens int
}
// Limits extracts the token limits from the model's capabilities map.
// Returns nil if no limits are available or the structure is unexpected.
//
// Expected Copilot API shape:
//
// "capabilities": {
// "limits": {
// "max_context_window_tokens": 200000,
// "max_prompt_tokens": 168000,
// "max_output_tokens": 32000
// }
// }
func (e *CopilotModelEntry) Limits() *CopilotModelLimits {
if e.Capabilities == nil {
return nil
}
limitsRaw, ok := e.Capabilities["limits"]
if !ok {
return nil
}
limitsMap, ok := limitsRaw.(map[string]any)
if !ok {
return nil
}
result := &CopilotModelLimits{
MaxContextWindowTokens: anyToInt(limitsMap["max_context_window_tokens"]),
MaxPromptTokens: anyToInt(limitsMap["max_prompt_tokens"]),
MaxOutputTokens: anyToInt(limitsMap["max_output_tokens"]),
}
// Only return if at least one field is populated.
if result.MaxContextWindowTokens == 0 && result.MaxPromptTokens == 0 && result.MaxOutputTokens == 0 {
return nil
}
return result
}
// anyToInt converts a JSON-decoded numeric value to int.
// Go's encoding/json decodes numbers into float64 when the target is any/interface{}.
func anyToInt(v any) int {
switch n := v.(type) {
case float64:
return int(n)
case float32:
return int(n)
case int:
return n
case int64:
return int(n)
default:
return 0
}
}
// CopilotModelsResponse represents the response from the Copilot /models endpoint.
type CopilotModelsResponse struct {
Data []CopilotModelEntry `json:"data"`

View File

@@ -9,6 +9,10 @@ type SDKConfig struct {
// ProxyURL is the URL of an optional proxy server to use for outbound requests.
ProxyURL string `yaml:"proxy-url" json:"proxy-url"`
// EnableGeminiCLIEndpoint controls whether Gemini CLI internal endpoints (/v1internal:*) are enabled.
// Default is false for safety; when false, /v1internal:* requests are rejected.
EnableGeminiCLIEndpoint bool `yaml:"enable-gemini-cli-endpoint" json:"enable-gemini-cli-endpoint"`
// ForceModelPrefix requires explicit model prefixes (e.g., "teamA/gemini-3-pro-preview")
// to target prefixed credentials. When false, unprefixed model requests may use prefixed
// credentials as well.

View File

@@ -4,6 +4,7 @@
package logging
import (
"bufio"
"bytes"
"compress/flate"
"compress/gzip"
@@ -41,15 +42,17 @@ type RequestLogger interface {
// - statusCode: The response status code
// - responseHeaders: The response headers
// - response: The raw response data
// - websocketTimeline: Optional downstream websocket event timeline
// - apiRequest: The API request data
// - apiResponse: The API response data
// - apiWebsocketTimeline: Optional upstream websocket event timeline
// - requestID: Optional request ID for log file naming
// - requestTimestamp: When the request was received
// - apiResponseTimestamp: When the API response was received
//
// Returns:
// - error: An error if logging fails, nil otherwise
LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error
LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error
// LogStreamingRequest initiates logging for a streaming request and returns a writer for chunks.
//
@@ -111,6 +114,16 @@ type StreamingLogWriter interface {
// - error: An error if writing fails, nil otherwise
WriteAPIResponse(apiResponse []byte) error
// WriteAPIWebsocketTimeline writes the upstream websocket timeline to the log.
// This should be called when upstream communication happened over websocket.
//
// Parameters:
// - apiWebsocketTimeline: The upstream websocket event timeline
//
// Returns:
// - error: An error if writing fails, nil otherwise
WriteAPIWebsocketTimeline(apiWebsocketTimeline []byte) error
// SetFirstChunkTimestamp sets the TTFB timestamp captured when first chunk was received.
//
// Parameters:
@@ -203,17 +216,17 @@ func (l *FileRequestLogger) SetErrorLogsMaxFiles(maxFiles int) {
//
// Returns:
// - error: An error if logging fails, nil otherwise
func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false, requestID, requestTimestamp, apiResponseTimestamp)
func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline []byte, apiResponseErrors []*interfaces.ErrorMessage, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline, apiResponseErrors, false, requestID, requestTimestamp, apiResponseTimestamp)
}
// LogRequestWithOptions logs a request with optional forced logging behavior.
// The force flag allows writing error logs even when regular request logging is disabled.
func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force, requestID, requestTimestamp, apiResponseTimestamp)
func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline, apiResponseErrors, force, requestID, requestTimestamp, apiResponseTimestamp)
}
func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool, requestID string, requestTimestamp, apiResponseTimestamp time.Time) error {
if !l.enabled && !force {
return nil
}
@@ -260,8 +273,10 @@ func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[st
requestHeaders,
body,
requestBodyPath,
websocketTimeline,
apiRequest,
apiResponse,
apiWebsocketTimeline,
apiResponseErrors,
statusCode,
responseHeaders,
@@ -518,8 +533,10 @@ func (l *FileRequestLogger) writeNonStreamingLog(
requestHeaders map[string][]string,
requestBody []byte,
requestBodyPath string,
websocketTimeline []byte,
apiRequest []byte,
apiResponse []byte,
apiWebsocketTimeline []byte,
apiResponseErrors []*interfaces.ErrorMessage,
statusCode int,
responseHeaders map[string][]string,
@@ -531,7 +548,16 @@ func (l *FileRequestLogger) writeNonStreamingLog(
if requestTimestamp.IsZero() {
requestTimestamp = time.Now()
}
if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, requestTimestamp); errWrite != nil {
isWebsocketTranscript := hasSectionPayload(websocketTimeline)
downstreamTransport := inferDownstreamTransport(requestHeaders, websocketTimeline)
upstreamTransport := inferUpstreamTransport(apiRequest, apiResponse, apiWebsocketTimeline, apiResponseErrors)
if errWrite := writeRequestInfoWithBody(w, url, method, requestHeaders, requestBody, requestBodyPath, requestTimestamp, downstreamTransport, upstreamTransport, !isWebsocketTranscript); errWrite != nil {
return errWrite
}
if errWrite := writeAPISection(w, "=== WEBSOCKET TIMELINE ===\n", "=== WEBSOCKET TIMELINE", websocketTimeline, time.Time{}); errWrite != nil {
return errWrite
}
if errWrite := writeAPISection(w, "=== API WEBSOCKET TIMELINE ===\n", "=== API WEBSOCKET TIMELINE", apiWebsocketTimeline, time.Time{}); errWrite != nil {
return errWrite
}
if errWrite := writeAPISection(w, "=== API REQUEST ===\n", "=== API REQUEST", apiRequest, time.Time{}); errWrite != nil {
@@ -543,6 +569,12 @@ func (l *FileRequestLogger) writeNonStreamingLog(
if errWrite := writeAPISection(w, "=== API RESPONSE ===\n", "=== API RESPONSE", apiResponse, apiResponseTimestamp); errWrite != nil {
return errWrite
}
if isWebsocketTranscript {
// Intentionally omit the generic downstream HTTP response section for websocket
// transcripts. The durable session exchange is captured in WEBSOCKET TIMELINE,
// and appending a one-off upgrade response snapshot would dilute that transcript.
return nil
}
return writeResponseSection(w, statusCode, true, responseHeaders, bytes.NewReader(response), decompressErr, true)
}
@@ -553,6 +585,9 @@ func writeRequestInfoWithBody(
body []byte,
bodyPath string,
timestamp time.Time,
downstreamTransport string,
upstreamTransport string,
includeBody bool,
) error {
if _, errWrite := io.WriteString(w, "=== REQUEST INFO ===\n"); errWrite != nil {
return errWrite
@@ -566,10 +601,20 @@ func writeRequestInfoWithBody(
if _, errWrite := io.WriteString(w, fmt.Sprintf("Method: %s\n", method)); errWrite != nil {
return errWrite
}
if strings.TrimSpace(downstreamTransport) != "" {
if _, errWrite := io.WriteString(w, fmt.Sprintf("Downstream Transport: %s\n", downstreamTransport)); errWrite != nil {
return errWrite
}
}
if strings.TrimSpace(upstreamTransport) != "" {
if _, errWrite := io.WriteString(w, fmt.Sprintf("Upstream Transport: %s\n", upstreamTransport)); errWrite != nil {
return errWrite
}
}
if _, errWrite := io.WriteString(w, fmt.Sprintf("Timestamp: %s\n", timestamp.Format(time.RFC3339Nano))); errWrite != nil {
return errWrite
}
if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
if errWrite := writeSectionSpacing(w, 1); errWrite != nil {
return errWrite
}
@@ -584,36 +629,121 @@ func writeRequestInfoWithBody(
}
}
}
if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
if errWrite := writeSectionSpacing(w, 1); errWrite != nil {
return errWrite
}
if !includeBody {
return nil
}
if _, errWrite := io.WriteString(w, "=== REQUEST BODY ===\n"); errWrite != nil {
return errWrite
}
bodyTrailingNewlines := 1
if bodyPath != "" {
bodyFile, errOpen := os.Open(bodyPath)
if errOpen != nil {
return errOpen
}
if _, errCopy := io.Copy(w, bodyFile); errCopy != nil {
tracker := &trailingNewlineTrackingWriter{writer: w}
written, errCopy := io.Copy(tracker, bodyFile)
if errCopy != nil {
_ = bodyFile.Close()
return errCopy
}
if written > 0 {
bodyTrailingNewlines = tracker.trailingNewlines
}
if errClose := bodyFile.Close(); errClose != nil {
log.WithError(errClose).Warn("failed to close request body temp file")
}
} else if _, errWrite := w.Write(body); errWrite != nil {
return errWrite
} else if len(body) > 0 {
bodyTrailingNewlines = countTrailingNewlinesBytes(body)
}
if _, errWrite := io.WriteString(w, "\n\n"); errWrite != nil {
if errWrite := writeSectionSpacing(w, bodyTrailingNewlines); errWrite != nil {
return errWrite
}
return nil
}
func countTrailingNewlinesBytes(payload []byte) int {
count := 0
for i := len(payload) - 1; i >= 0; i-- {
if payload[i] != '\n' {
break
}
count++
}
return count
}
func writeSectionSpacing(w io.Writer, trailingNewlines int) error {
missingNewlines := 3 - trailingNewlines
if missingNewlines <= 0 {
return nil
}
_, errWrite := io.WriteString(w, strings.Repeat("\n", missingNewlines))
return errWrite
}
type trailingNewlineTrackingWriter struct {
writer io.Writer
trailingNewlines int
}
func (t *trailingNewlineTrackingWriter) Write(payload []byte) (int, error) {
written, errWrite := t.writer.Write(payload)
if written > 0 {
writtenPayload := payload[:written]
trailingNewlines := countTrailingNewlinesBytes(writtenPayload)
if trailingNewlines == len(writtenPayload) {
t.trailingNewlines += trailingNewlines
} else {
t.trailingNewlines = trailingNewlines
}
}
return written, errWrite
}
func hasSectionPayload(payload []byte) bool {
return len(bytes.TrimSpace(payload)) > 0
}
func inferDownstreamTransport(headers map[string][]string, websocketTimeline []byte) string {
if hasSectionPayload(websocketTimeline) {
return "websocket"
}
for key, values := range headers {
if strings.EqualFold(strings.TrimSpace(key), "Upgrade") {
for _, value := range values {
if strings.EqualFold(strings.TrimSpace(value), "websocket") {
return "websocket"
}
}
}
}
return "http"
}
func inferUpstreamTransport(apiRequest, apiResponse, apiWebsocketTimeline []byte, _ []*interfaces.ErrorMessage) string {
hasHTTP := hasSectionPayload(apiRequest) || hasSectionPayload(apiResponse)
hasWS := hasSectionPayload(apiWebsocketTimeline)
switch {
case hasHTTP && hasWS:
return "websocket+http"
case hasWS:
return "websocket"
case hasHTTP:
return "http"
default:
return ""
}
}
func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, payload []byte, timestamp time.Time) error {
if len(payload) == 0 {
return nil
@@ -623,11 +753,6 @@ func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, pa
if _, errWrite := w.Write(payload); errWrite != nil {
return errWrite
}
if !bytes.HasSuffix(payload, []byte("\n")) {
if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
return errWrite
}
}
} else {
if _, errWrite := io.WriteString(w, sectionHeader); errWrite != nil {
return errWrite
@@ -640,12 +765,9 @@ func writeAPISection(w io.Writer, sectionHeader string, sectionPrefix string, pa
if _, errWrite := w.Write(payload); errWrite != nil {
return errWrite
}
if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
return errWrite
}
}
if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
if errWrite := writeSectionSpacing(w, countTrailingNewlinesBytes(payload)); errWrite != nil {
return errWrite
}
return nil
@@ -662,12 +784,17 @@ func writeAPIErrorResponses(w io.Writer, apiResponseErrors []*interfaces.ErrorMe
if _, errWrite := io.WriteString(w, fmt.Sprintf("HTTP Status: %d\n", apiResponseErrors[i].StatusCode)); errWrite != nil {
return errWrite
}
trailingNewlines := 1
if apiResponseErrors[i].Error != nil {
if _, errWrite := io.WriteString(w, apiResponseErrors[i].Error.Error()); errWrite != nil {
errText := apiResponseErrors[i].Error.Error()
if _, errWrite := io.WriteString(w, errText); errWrite != nil {
return errWrite
}
if errText != "" {
trailingNewlines = countTrailingNewlinesBytes([]byte(errText))
}
}
if _, errWrite := io.WriteString(w, "\n\n"); errWrite != nil {
if errWrite := writeSectionSpacing(w, trailingNewlines); errWrite != nil {
return errWrite
}
}
@@ -694,12 +821,18 @@ func writeResponseSection(w io.Writer, statusCode int, statusWritten bool, respo
}
}
if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
return errWrite
var bufferedReader *bufio.Reader
if responseReader != nil {
bufferedReader = bufio.NewReader(responseReader)
}
if !responseBodyStartsWithLeadingNewline(bufferedReader) {
if _, errWrite := io.WriteString(w, "\n"); errWrite != nil {
return errWrite
}
}
if responseReader != nil {
if _, errCopy := io.Copy(w, responseReader); errCopy != nil {
if bufferedReader != nil {
if _, errCopy := io.Copy(w, bufferedReader); errCopy != nil {
return errCopy
}
}
@@ -717,6 +850,19 @@ func writeResponseSection(w io.Writer, statusCode int, statusWritten bool, respo
return nil
}
func responseBodyStartsWithLeadingNewline(reader *bufio.Reader) bool {
if reader == nil {
return false
}
if peeked, _ := reader.Peek(2); len(peeked) >= 2 && peeked[0] == '\r' && peeked[1] == '\n' {
return true
}
if peeked, _ := reader.Peek(1); len(peeked) >= 1 && peeked[0] == '\n' {
return true
}
return false
}
// formatLogContent creates the complete log content for non-streaming requests.
//
// Parameters:
@@ -724,6 +870,7 @@ func writeResponseSection(w io.Writer, statusCode int, statusWritten bool, respo
// - method: The HTTP method
// - headers: The request headers
// - body: The request body
// - websocketTimeline: The downstream websocket event timeline
// - apiRequest: The API request data
// - apiResponse: The API response data
// - response: The raw response data
@@ -732,11 +879,42 @@ func writeResponseSection(w io.Writer, statusCode int, statusWritten bool, respo
//
// Returns:
// - string: The formatted log content
func (l *FileRequestLogger) formatLogContent(url, method string, headers map[string][]string, body, apiRequest, apiResponse, response []byte, status int, responseHeaders map[string][]string, apiResponseErrors []*interfaces.ErrorMessage) string {
func (l *FileRequestLogger) formatLogContent(url, method string, headers map[string][]string, body, websocketTimeline, apiRequest, apiResponse, apiWebsocketTimeline, response []byte, status int, responseHeaders map[string][]string, apiResponseErrors []*interfaces.ErrorMessage) string {
var content strings.Builder
isWebsocketTranscript := hasSectionPayload(websocketTimeline)
downstreamTransport := inferDownstreamTransport(headers, websocketTimeline)
upstreamTransport := inferUpstreamTransport(apiRequest, apiResponse, apiWebsocketTimeline, apiResponseErrors)
// Request info
content.WriteString(l.formatRequestInfo(url, method, headers, body))
content.WriteString(l.formatRequestInfo(url, method, headers, body, downstreamTransport, upstreamTransport, !isWebsocketTranscript))
if len(websocketTimeline) > 0 {
if bytes.HasPrefix(websocketTimeline, []byte("=== WEBSOCKET TIMELINE")) {
content.Write(websocketTimeline)
if !bytes.HasSuffix(websocketTimeline, []byte("\n")) {
content.WriteString("\n")
}
} else {
content.WriteString("=== WEBSOCKET TIMELINE ===\n")
content.Write(websocketTimeline)
content.WriteString("\n")
}
content.WriteString("\n")
}
if len(apiWebsocketTimeline) > 0 {
if bytes.HasPrefix(apiWebsocketTimeline, []byte("=== API WEBSOCKET TIMELINE")) {
content.Write(apiWebsocketTimeline)
if !bytes.HasSuffix(apiWebsocketTimeline, []byte("\n")) {
content.WriteString("\n")
}
} else {
content.WriteString("=== API WEBSOCKET TIMELINE ===\n")
content.Write(apiWebsocketTimeline)
content.WriteString("\n")
}
content.WriteString("\n")
}
if len(apiRequest) > 0 {
if bytes.HasPrefix(apiRequest, []byte("=== API REQUEST")) {
@@ -773,6 +951,12 @@ func (l *FileRequestLogger) formatLogContent(url, method string, headers map[str
content.WriteString("\n")
}
if isWebsocketTranscript {
// Mirror writeNonStreamingLog: websocket transcripts end with the dedicated
// timeline sections instead of a generic downstream HTTP response block.
return content.String()
}
// Response section
content.WriteString("=== RESPONSE ===\n")
content.WriteString(fmt.Sprintf("Status: %d\n", status))
@@ -933,13 +1117,19 @@ func (l *FileRequestLogger) decompressZstd(data []byte) ([]byte, error) {
//
// Returns:
// - string: The formatted request information
func (l *FileRequestLogger) formatRequestInfo(url, method string, headers map[string][]string, body []byte) string {
func (l *FileRequestLogger) formatRequestInfo(url, method string, headers map[string][]string, body []byte, downstreamTransport string, upstreamTransport string, includeBody bool) string {
var content strings.Builder
content.WriteString("=== REQUEST INFO ===\n")
content.WriteString(fmt.Sprintf("Version: %s\n", buildinfo.Version))
content.WriteString(fmt.Sprintf("URL: %s\n", url))
content.WriteString(fmt.Sprintf("Method: %s\n", method))
if strings.TrimSpace(downstreamTransport) != "" {
content.WriteString(fmt.Sprintf("Downstream Transport: %s\n", downstreamTransport))
}
if strings.TrimSpace(upstreamTransport) != "" {
content.WriteString(fmt.Sprintf("Upstream Transport: %s\n", upstreamTransport))
}
content.WriteString(fmt.Sprintf("Timestamp: %s\n", time.Now().Format(time.RFC3339Nano)))
content.WriteString("\n")
@@ -952,6 +1142,10 @@ func (l *FileRequestLogger) formatRequestInfo(url, method string, headers map[st
}
content.WriteString("\n")
if !includeBody {
return content.String()
}
content.WriteString("=== REQUEST BODY ===\n")
content.Write(body)
content.WriteString("\n\n")
@@ -1011,6 +1205,9 @@ type FileStreamingLogWriter struct {
// apiResponse stores the upstream API response data.
apiResponse []byte
// apiWebsocketTimeline stores the upstream websocket event timeline.
apiWebsocketTimeline []byte
// apiResponseTimestamp captures when the API response was received.
apiResponseTimestamp time.Time
}
@@ -1092,6 +1289,21 @@ func (w *FileStreamingLogWriter) WriteAPIResponse(apiResponse []byte) error {
return nil
}
// WriteAPIWebsocketTimeline buffers the upstream websocket timeline for later writing.
//
// Parameters:
// - apiWebsocketTimeline: The upstream websocket event timeline
//
// Returns:
// - error: Always returns nil (buffering cannot fail)
func (w *FileStreamingLogWriter) WriteAPIWebsocketTimeline(apiWebsocketTimeline []byte) error {
if len(apiWebsocketTimeline) == 0 {
return nil
}
w.apiWebsocketTimeline = bytes.Clone(apiWebsocketTimeline)
return nil
}
func (w *FileStreamingLogWriter) SetFirstChunkTimestamp(timestamp time.Time) {
if !timestamp.IsZero() {
w.apiResponseTimestamp = timestamp
@@ -1100,7 +1312,7 @@ func (w *FileStreamingLogWriter) SetFirstChunkTimestamp(timestamp time.Time) {
// Close finalizes the log file and cleans up resources.
// It writes all buffered data to the file in the correct order:
// API REQUEST -> API RESPONSE -> RESPONSE (status, headers, body chunks)
// API WEBSOCKET TIMELINE -> API REQUEST -> API RESPONSE -> RESPONSE (status, headers, body chunks)
//
// Returns:
// - error: An error if closing fails, nil otherwise
@@ -1182,7 +1394,10 @@ func (w *FileStreamingLogWriter) asyncWriter() {
}
func (w *FileStreamingLogWriter) writeFinalLog(logFile *os.File) error {
if errWrite := writeRequestInfoWithBody(logFile, w.url, w.method, w.requestHeaders, nil, w.requestBodyPath, w.timestamp); errWrite != nil {
if errWrite := writeRequestInfoWithBody(logFile, w.url, w.method, w.requestHeaders, nil, w.requestBodyPath, w.timestamp, "http", inferUpstreamTransport(w.apiRequest, w.apiResponse, w.apiWebsocketTimeline, nil), true); errWrite != nil {
return errWrite
}
if errWrite := writeAPISection(logFile, "=== API WEBSOCKET TIMELINE ===\n", "=== API WEBSOCKET TIMELINE", w.apiWebsocketTimeline, time.Time{}); errWrite != nil {
return errWrite
}
if errWrite := writeAPISection(logFile, "=== API REQUEST ===\n", "=== API REQUEST", w.apiRequest, time.Time{}); errWrite != nil {
@@ -1265,6 +1480,17 @@ func (w *NoOpStreamingLogWriter) WriteAPIResponse(_ []byte) error {
return nil
}
// WriteAPIWebsocketTimeline is a no-op implementation that does nothing and always returns nil.
//
// Parameters:
// - apiWebsocketTimeline: The upstream websocket event timeline (ignored)
//
// Returns:
// - error: Always returns nil
func (w *NoOpStreamingLogWriter) WriteAPIWebsocketTimeline(_ []byte) error {
return nil
}
func (w *NoOpStreamingLogWriter) SetFirstChunkTimestamp(_ time.Time) {}
// Close is a no-op implementation that does nothing and always returns nil.

View File

@@ -93,6 +93,30 @@ func GetAntigravityModels() []*ModelInfo {
func GetCodeBuddyModels() []*ModelInfo {
now := int64(1748044800) // 2025-05-24
return []*ModelInfo{
{
ID: "auto",
Object: "model",
Created: now,
OwnedBy: "tencent",
Type: "codebuddy",
DisplayName: "Auto",
Description: "Automatic model selection via CodeBuddy",
ContextLength: 128000,
MaxCompletionTokens: 32768,
SupportedEndpoints: []string{"/chat/completions"},
},
{
ID: "glm-5.0-turbo",
Object: "model",
Created: now,
OwnedBy: "tencent",
Type: "codebuddy",
DisplayName: "GLM-5.0 Turbo",
Description: "GLM-5.0 Turbo via CodeBuddy",
ContextLength: 128000,
MaxCompletionTokens: 32768,
SupportedEndpoints: []string{"/chat/completions"},
},
{
ID: "glm-5.0",
Object: "model",
@@ -118,13 +142,13 @@ func GetCodeBuddyModels() []*ModelInfo {
SupportedEndpoints: []string{"/chat/completions"},
},
{
ID: "minimax-m2.5",
ID: "minimax-m2.7",
Object: "model",
Created: now,
OwnedBy: "tencent",
Type: "codebuddy",
DisplayName: "MiniMax M2.5",
Description: "MiniMax M2.5 via CodeBuddy",
DisplayName: "MiniMax M2.7",
Description: "MiniMax M2.7 via CodeBuddy",
ContextLength: 200000,
MaxCompletionTokens: 32768,
SupportedEndpoints: []string{"/chat/completions"},
@@ -141,6 +165,19 @@ func GetCodeBuddyModels() []*ModelInfo {
MaxCompletionTokens: 32768,
SupportedEndpoints: []string{"/chat/completions"},
},
{
ID: "kimi-k2-thinking",
Object: "model",
Created: now,
OwnedBy: "tencent",
Type: "codebuddy",
DisplayName: "Kimi K2 Thinking",
Description: "Kimi K2 Thinking via CodeBuddy",
ContextLength: 128000,
MaxCompletionTokens: 32768,
Thinking: &ThinkingSupport{ZeroAllowed: true},
SupportedEndpoints: []string{"/chat/completions"},
},
{
ID: "deepseek-v3-2-volc",
Object: "model",
@@ -148,24 +185,11 @@ func GetCodeBuddyModels() []*ModelInfo {
OwnedBy: "tencent",
Type: "codebuddy",
DisplayName: "DeepSeek V3.2 (Volc)",
Description: "DeepSeek V3.2 via CodeBuddy (Volcano Engine)",
Description: "DeepSeek V3.2 via CodeBuddy",
ContextLength: 128000,
MaxCompletionTokens: 32768,
SupportedEndpoints: []string{"/chat/completions"},
},
{
ID: "hunyuan-2.0-thinking",
Object: "model",
Created: now,
OwnedBy: "tencent",
Type: "codebuddy",
DisplayName: "Hunyuan 2.0 Thinking",
Description: "Tencent Hunyuan 2.0 Thinking via CodeBuddy",
ContextLength: 128000,
MaxCompletionTokens: 32768,
Thinking: &ThinkingSupport{ZeroAllowed: true},
SupportedEndpoints: []string{"/chat/completions"},
},
}
}
@@ -525,6 +549,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "claude-opus-4.6",
@@ -537,6 +562,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "claude-sonnet-4",
@@ -549,6 +575,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "claude-sonnet-4.5",
@@ -561,6 +588,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "claude-sonnet-4.6",
@@ -573,6 +601,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
ContextLength: 200000,
MaxCompletionTokens: 64000,
SupportedEndpoints: []string{"/chat/completions"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
},
{
ID: "gemini-2.5-pro",

View File

@@ -280,6 +280,7 @@
"dynamic_allowed": true,
"levels": [
"low",
"medium",
"high"
]
}
@@ -554,6 +555,7 @@
"dynamic_allowed": true,
"levels": [
"low",
"medium",
"high"
]
}
@@ -610,6 +612,8 @@
"dynamic_allowed": true,
"levels": [
"minimal",
"low",
"medium",
"high"
]
}
@@ -838,6 +842,7 @@
"dynamic_allowed": true,
"levels": [
"low",
"medium",
"high"
]
}
@@ -896,6 +901,8 @@
"dynamic_allowed": true,
"levels": [
"minimal",
"low",
"medium",
"high"
]
}
@@ -1070,6 +1077,8 @@
"dynamic_allowed": true,
"levels": [
"minimal",
"low",
"medium",
"high"
]
}
@@ -1371,6 +1380,75 @@
"xhigh"
]
}
},
{
"id": "gpt-5.3-codex",
"object": "model",
"created": 1770307200,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.3 Codex",
"version": "gpt-5.3",
"description": "Stable version of GPT 5.3 Codex, The best model for coding and agentic tasks across domains.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
},
{
"id": "gpt-5.4",
"object": "model",
"created": 1772668800,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4",
"version": "gpt-5.4",
"description": "Stable version of GPT 5.4",
"context_length": 1050000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
},
{
"id": "gpt-5.4-mini",
"object": "model",
"created": 1773705600,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4 Mini",
"version": "gpt-5.4-mini",
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
}
],
"codex-team": [
@@ -1623,6 +1701,29 @@
"xhigh"
]
}
},
{
"id": "gpt-5.4-mini",
"object": "model",
"created": 1773705600,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4 Mini",
"version": "gpt-5.4-mini",
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
}
],
"codex-plus": [
@@ -1898,6 +1999,29 @@
"xhigh"
]
}
},
{
"id": "gpt-5.4-mini",
"object": "model",
"created": 1773705600,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4 Mini",
"version": "gpt-5.4-mini",
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
}
],
"codex-pro": [
@@ -2173,55 +2297,40 @@
"xhigh"
]
}
},
{
"id": "gpt-5.4-mini",
"object": "model",
"created": 1773705600,
"owned_by": "openai",
"type": "openai",
"display_name": "GPT 5.4 Mini",
"version": "gpt-5.4-mini",
"description": "GPT-5.4 mini brings the strengths of GPT-5.4 to a faster, more efficient model designed for high-volume workloads.",
"context_length": 400000,
"max_completion_tokens": 128000,
"supported_parameters": [
"tools"
],
"thinking": {
"levels": [
"low",
"medium",
"high",
"xhigh"
]
}
}
],
"qwen": [
{
"id": "qwen3-coder-plus",
"object": "model",
"created": 1753228800,
"owned_by": "qwen",
"type": "qwen",
"display_name": "Qwen3 Coder Plus",
"version": "3.0",
"description": "Advanced code generation and understanding model",
"context_length": 32768,
"max_completion_tokens": 8192,
"supported_parameters": [
"temperature",
"top_p",
"max_tokens",
"stream",
"stop"
]
},
{
"id": "qwen3-coder-flash",
"object": "model",
"created": 1753228800,
"owned_by": "qwen",
"type": "qwen",
"display_name": "Qwen3 Coder Flash",
"version": "3.0",
"description": "Fast code generation model",
"context_length": 8192,
"max_completion_tokens": 2048,
"supported_parameters": [
"temperature",
"top_p",
"max_tokens",
"stream",
"stop"
]
},
{
"id": "coder-model",
"object": "model",
"created": 1771171200,
"owned_by": "qwen",
"type": "qwen",
"display_name": "Qwen 3.5 Plus",
"version": "3.5",
"display_name": "Qwen 3.6 Plus",
"version": "3.6",
"description": "efficient hybrid model with leading coding performance",
"context_length": 1048576,
"max_completion_tokens": 65536,
@@ -2232,25 +2341,6 @@
"stream",
"stop"
]
},
{
"id": "vision-model",
"object": "model",
"created": 1758672000,
"owned_by": "qwen",
"type": "qwen",
"display_name": "Qwen3 Vision Model",
"version": "3.0",
"description": "Vision model model",
"context_length": 32768,
"max_completion_tokens": 2048,
"supported_parameters": [
"temperature",
"top_p",
"max_tokens",
"stream",
"stop"
]
}
],
"iflow": [
@@ -2639,11 +2729,12 @@
"context_length": 1048576,
"max_completion_tokens": 65535,
"thinking": {
"min": 128,
"max": 32768,
"min": 1,
"max": 65535,
"dynamic_allowed": true,
"levels": [
"low",
"medium",
"high"
]
}
@@ -2659,11 +2750,12 @@
"context_length": 1048576,
"max_completion_tokens": 65535,
"thinking": {
"min": 128,
"max": 32768,
"min": 1,
"max": 65535,
"dynamic_allowed": true,
"levels": [
"low",
"medium",
"high"
]
}

View File

@@ -16,6 +16,7 @@ import (
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
"github.com/router-for-me/CLIProxyAPI/v6/internal/wsrelay"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -47,8 +48,16 @@ func NewAIStudioExecutor(cfg *config.Config, provider string, relay *wsrelay.Man
// Identifier returns the executor identifier.
func (e *AIStudioExecutor) Identifier() string { return "aistudio" }
// PrepareRequest prepares the HTTP request for execution (no-op for AI Studio).
func (e *AIStudioExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Auth) error {
// PrepareRequest prepares the HTTP request for execution.
func (e *AIStudioExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth) error {
if req == nil {
return nil
}
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(req, attrs)
return nil
}
@@ -67,6 +76,9 @@ func (e *AIStudioExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.A
return nil, fmt.Errorf("aistudio executor: missing auth")
}
httpReq := req.WithContext(ctx)
if err := e.PrepareRequest(httpReq, auth); err != nil {
return nil, err
}
if httpReq.URL == nil || strings.TrimSpace(httpReq.URL.String()) == "" {
return nil, fmt.Errorf("aistudio executor: request URL is empty")
}
@@ -131,6 +143,11 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth,
Headers: http.Header{"Content-Type": []string{"application/json"}},
Body: body.payload,
}
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(&http.Request{Header: wsReq.Headers}, attrs)
var authID, authLabel, authType, authValue string
if auth != nil {
@@ -190,6 +207,11 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth
Headers: http.Header{"Content-Type": []string{"application/json"}},
Body: body.payload,
}
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(&http.Request{Header: wsReq.Headers}, attrs)
var authID, authLabel, authType, authValue string
if auth != nil {
authID = auth.ID

View File

@@ -1320,6 +1320,11 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
if host := resolveHost(base); host != "" {
httpReq.Host = host
}
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
URL: requestURL.String(),
@@ -1614,6 +1619,11 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
if host := resolveHost(base); host != "" {
httpReq.Host = host
}
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
var authID, authLabel, authType, authValue string
if auth != nil {

View File

@@ -6,7 +6,6 @@ import (
"compress/flate"
"compress/gzip"
"context"
"crypto/rand"
"crypto/sha256"
"encoding/hex"
"encoding/json"
@@ -18,6 +17,7 @@ import (
"time"
"github.com/andybalholm/brotli"
"github.com/google/uuid"
"github.com/klauspost/compress/zstd"
claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -92,7 +92,7 @@ func (e *ClaudeExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut
if err := e.PrepareRequest(httpReq, auth); err != nil {
return nil, err
}
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
httpClient := helps.NewUtlsHTTPClient(e.cfg, auth, 0)
return httpClient.Do(httpReq)
}
@@ -137,6 +137,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
body = disableThinkingIfToolChoiceForced(body)
body = normalizeClaudeTemperatureForThinking(body)
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
if countCacheControls(body) == 0 {
@@ -188,7 +189,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
AuthValue: authValue,
})
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
httpClient := helps.NewUtlsHTTPClient(e.cfg, auth, 0)
httpResp, err := httpClient.Do(httpReq)
if err != nil {
helps.RecordAPIResponseError(ctx, e.cfg, err)
@@ -307,6 +308,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
// Disable thinking if tool_choice forces tool use (Anthropic API constraint)
body = disableThinkingIfToolChoiceForced(body)
body = normalizeClaudeTemperatureForThinking(body)
// Auto-inject cache_control if missing (optimization for ClawdBot/clients without caching support)
if countCacheControls(body) == 0 {
@@ -355,7 +357,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
AuthValue: authValue,
})
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
httpClient := helps.NewUtlsHTTPClient(e.cfg, auth, 0)
httpResp, err := httpClient.Do(httpReq)
if err != nil {
helps.RecordAPIResponseError(ctx, e.cfg, err)
@@ -522,7 +524,7 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
AuthValue: authValue,
})
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
httpClient := helps.NewUtlsHTTPClient(e.cfg, auth, 0)
resp, err := httpClient.Do(httpReq)
if err != nil {
helps.RecordAPIResponseError(ctx, e.cfg, err)
@@ -651,6 +653,25 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte {
return body
}
// normalizeClaudeTemperatureForThinking keeps Anthropic message requests valid when
// thinking is enabled. Anthropic rejects temperatures other than 1 when
// thinking.type is enabled/adaptive/auto.
func normalizeClaudeTemperatureForThinking(body []byte) []byte {
if !gjson.GetBytes(body, "temperature").Exists() {
return body
}
thinkingType := strings.ToLower(strings.TrimSpace(gjson.GetBytes(body, "thinking.type").String()))
switch thinkingType {
case "enabled", "adaptive", "auto":
if temp := gjson.GetBytes(body, "temperature"); temp.Exists() && temp.Type == gjson.Number && temp.Float() == 1 {
return body
}
body, _ = sjson.SetBytes(body, "temperature", 1)
}
return body
}
type compositeReadCloser struct {
io.Reader
closers []func() error
@@ -813,7 +834,7 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
deviceProfile = helps.ResolveClaudeDeviceProfile(auth, apiKey, ginHeaders, cfg)
}
baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05"
baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,context-management-2025-06-27,prompt-caching-scope-2026-01-05,structured-outputs-2025-12-15,fast-mode-2026-02-01,redact-thinking-2026-02-12,token-efficient-tools-2026-03-28"
if val := strings.TrimSpace(ginHeaders.Get("Anthropic-Beta")); val != "" {
baseBetas = val
if !strings.Contains(val, "oauth") {
@@ -827,6 +848,14 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
hasClaude1MHeader = true
}
}
// Also check auth attributes — GitLab Duo sets gitlab_duo_force_context_1m
// when routing through the Anthropic gateway, but the gin headers won't have
// X-CPA-CLAUDE-1M because the request is internally constructed.
if !hasClaude1MHeader && auth != nil && auth.Attributes != nil {
if auth.Attributes["gitlab_duo_force_context_1m"] == "true" {
hasClaude1MHeader = true
}
}
// Merge extra betas from request body and request flags.
if len(extraBetas) > 0 || hasClaude1MHeader {
@@ -851,13 +880,22 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
r.Header.Set("Anthropic-Beta", baseBetas)
misc.EnsureHeader(r.Header, ginHeaders, "Anthropic-Version", "2023-06-01")
misc.EnsureHeader(r.Header, ginHeaders, "Anthropic-Dangerous-Direct-Browser-Access", "true")
// Only set browser access header for API key mode; real Claude Code CLI does not send it.
if useAPIKey {
misc.EnsureHeader(r.Header, ginHeaders, "Anthropic-Dangerous-Direct-Browser-Access", "true")
}
misc.EnsureHeader(r.Header, ginHeaders, "X-App", "cli")
// Values below match Claude Code 2.1.63 / @anthropic-ai/sdk 0.74.0 (updated 2026-02-28).
misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Retry-Count", "0")
misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Runtime", "node")
misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Lang", "js")
misc.EnsureHeader(r.Header, ginHeaders, "X-Stainless-Timeout", hdrDefault(hd.Timeout, "600"))
// Session ID: stable per auth/apiKey, matches Claude Code's X-Claude-Code-Session-Id header.
misc.EnsureHeader(r.Header, ginHeaders, "X-Claude-Code-Session-Id", helps.CachedSessionID(apiKey))
// Per-request UUID, matches Claude Code's x-client-request-id for first-party API.
if isAnthropicBase {
misc.EnsureHeader(r.Header, ginHeaders, "x-client-request-id", uuid.New().String())
}
r.Header.Set("Connection", "keep-alive")
if stream {
r.Header.Set("Accept", "text/event-stream")
@@ -872,16 +910,16 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
// Legacy mode keeps OS/Arch runtime-derived; stabilized mode pins OS/Arch
// to the configured baseline while still allowing newer official
// User-Agent/package/runtime tuples to upgrade the software fingerprint.
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(r, attrs)
if stabilizeDeviceProfile {
helps.ApplyClaudeDeviceProfileHeaders(r, deviceProfile)
} else {
helps.ApplyClaudeLegacyDeviceHeaders(r, ginHeaders, cfg)
}
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(r, attrs)
// Re-enforce Accept-Encoding: identity after ApplyCustomHeadersFromAttrs, which
// may override it with a user-configured value. Compressed SSE breaks the line
// scanner regardless of user preference, so this is non-negotiable for streams.
@@ -907,7 +945,7 @@ func claudeCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) {
}
func checkSystemInstructions(payload []byte) []byte {
return checkSystemInstructionsWithSigningMode(payload, false, false)
return checkSystemInstructionsWithSigningMode(payload, false, false, "2.1.63", "", "")
}
func isClaudeOAuthToken(apiKey string) bool {
@@ -1102,6 +1140,38 @@ func getClientUserAgent(ctx context.Context) string {
return ""
}
// parseEntrypointFromUA extracts the entrypoint from a Claude Code User-Agent.
// Format: "claude-cli/x.y.z (external, cli)" → "cli"
// Format: "claude-cli/x.y.z (external, vscode)" → "vscode"
// Returns "cli" if parsing fails or UA is not Claude Code.
func parseEntrypointFromUA(userAgent string) string {
// Find content inside parentheses
start := strings.Index(userAgent, "(")
end := strings.LastIndex(userAgent, ")")
if start < 0 || end <= start {
return "cli"
}
inner := userAgent[start+1 : end]
// Split by comma, take the second part (entrypoint is at index 1, after USER_TYPE)
// Format: "(USER_TYPE, ENTRYPOINT[, extra...])"
parts := strings.Split(inner, ",")
if len(parts) >= 2 {
ep := strings.TrimSpace(parts[1])
if ep != "" {
return ep
}
}
return "cli"
}
// getWorkloadFromContext extracts workload identifier from the gin request headers.
func getWorkloadFromContext(ctx context.Context) string {
if ginCtx, ok := ctx.Value("gin").(*gin.Context); ok && ginCtx != nil && ginCtx.Request != nil {
return strings.TrimSpace(ginCtx.GetHeader("X-CPA-Claude-Workload"))
}
return ""
}
// getCloakConfigFromAuth extracts cloak configuration from auth attributes.
// Returns (cloakMode, strictMode, sensitiveWords, cacheUserID).
func getCloakConfigFromAuth(auth *cliproxyauth.Auth) (string, bool, []string, bool) {
@@ -1152,28 +1222,52 @@ func injectFakeUserID(payload []byte, apiKey string, useCache bool) []byte {
return payload
}
// fingerprintSalt is the salt used by Claude Code to compute the 3-char build fingerprint.
const fingerprintSalt = "59cf53e54c78"
// computeFingerprint computes the 3-char build fingerprint that Claude Code embeds in cc_version.
// Algorithm: SHA256(salt + messageText[4] + messageText[7] + messageText[20] + version)[:3]
func computeFingerprint(messageText, version string) string {
indices := [3]int{4, 7, 20}
runes := []rune(messageText)
var sb strings.Builder
for _, idx := range indices {
if idx < len(runes) {
sb.WriteRune(runes[idx])
} else {
sb.WriteRune('0')
}
}
input := fingerprintSalt + sb.String() + version
h := sha256.Sum256([]byte(input))
return hex.EncodeToString(h[:])[:3]
}
// generateBillingHeader creates the x-anthropic-billing-header text block that
// real Claude Code prepends to every system prompt array.
// Format: x-anthropic-billing-header: cc_version=<ver>.<build>; cc_entrypoint=cli; cch=<hash>;
func generateBillingHeader(payload []byte, experimentalCCHSigning bool) string {
// Build hash: 3-char hex, matches the pattern seen in real requests (e.g. "a43")
buildBytes := make([]byte, 2)
_, _ = rand.Read(buildBytes)
buildHash := hex.EncodeToString(buildBytes)[:3]
// Format: x-anthropic-billing-header: cc_version=<ver>.<build>; cc_entrypoint=<ep>; cch=<hash>; [cc_workload=<wl>;]
func generateBillingHeader(payload []byte, experimentalCCHSigning bool, version, messageText, entrypoint, workload string) string {
if entrypoint == "" {
entrypoint = "cli"
}
buildHash := computeFingerprint(messageText, version)
workloadPart := ""
if workload != "" {
workloadPart = fmt.Sprintf(" cc_workload=%s;", workload)
}
if experimentalCCHSigning {
return fmt.Sprintf("x-anthropic-billing-header: cc_version=2.1.63.%s; cc_entrypoint=cli; cch=00000;", buildHash)
return fmt.Sprintf("x-anthropic-billing-header: cc_version=%s.%s; cc_entrypoint=%s; cch=00000;%s", version, buildHash, entrypoint, workloadPart)
}
// Generate a deterministic cch hash from the payload content (system + messages + tools).
// Real Claude Code uses a 5-char hex hash that varies per request.
h := sha256.Sum256(payload)
cch := hex.EncodeToString(h[:])[:5]
return fmt.Sprintf("x-anthropic-billing-header: cc_version=2.1.63.%s; cc_entrypoint=cli; cch=%s;", buildHash, cch)
return fmt.Sprintf("x-anthropic-billing-header: cc_version=%s.%s; cc_entrypoint=%s; cch=%s;%s", version, buildHash, entrypoint, cch, workloadPart)
}
func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte {
return checkSystemInstructionsWithSigningMode(payload, strictMode, false)
return checkSystemInstructionsWithSigningMode(payload, strictMode, false, "2.1.63", "", "")
}
// checkSystemInstructionsWithSigningMode injects Claude Code-style system blocks:
@@ -1181,10 +1275,25 @@ func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte {
// system[0]: billing header (no cache_control)
// system[1]: agent identifier (no cache_control)
// system[2..]: user system messages (cache_control added when missing)
func checkSystemInstructionsWithSigningMode(payload []byte, strictMode bool, experimentalCCHSigning bool) []byte {
func checkSystemInstructionsWithSigningMode(payload []byte, strictMode bool, experimentalCCHSigning bool, version, entrypoint, workload string) []byte {
system := gjson.GetBytes(payload, "system")
billingText := generateBillingHeader(payload, experimentalCCHSigning)
// Extract original message text for fingerprint computation (before billing injection).
// Use the first system text block's content as the fingerprint source.
messageText := ""
if system.IsArray() {
system.ForEach(func(_, part gjson.Result) bool {
if part.Get("type").String() == "text" {
messageText = part.Get("text").String()
return false
}
return true
})
} else if system.Type == gjson.String {
messageText = system.String()
}
billingText := generateBillingHeader(payload, experimentalCCHSigning, version, messageText, entrypoint, workload)
billingBlock := fmt.Sprintf(`{"type":"text","text":"%s"}`, billingText)
// No cache_control on the agent block. It is a cloaking artifact with zero cache
// value (the last system block is what actually triggers caching of all system content).
@@ -1273,7 +1382,10 @@ func applyCloaking(ctx context.Context, cfg *config.Config, auth *cliproxyauth.A
// Skip system instructions for claude-3-5-haiku models
if !strings.HasPrefix(model, "claude-3-5-haiku") {
payload = checkSystemInstructionsWithSigningMode(payload, strictMode, useExperimentalCCHSigning)
billingVersion := helps.DefaultClaudeVersion(cfg)
entrypoint := parseEntrypointFromUA(clientUserAgent)
workload := getWorkloadFromContext(ctx)
payload = checkSystemInstructionsWithSigningMode(payload, strictMode, useExperimentalCCHSigning, billingVersion, entrypoint, workload)
}
// Inject fake user ID

View File

@@ -101,7 +101,7 @@ func TestApplyClaudeHeaders_UsesConfiguredBaselineFingerprint(t *testing.T) {
req := newClaudeHeaderTestRequest(t, incoming)
applyClaudeHeaders(req, auth, "key-baseline", false, nil, cfg)
assertClaudeFingerprint(t, req.Header, "claude-cli/2.1.70 (external, cli)", "0.80.0", "v24.5.0", "MacOS", "arm64")
assertClaudeFingerprint(t, req.Header, "evil-client/9.9", "9.9.9", "v24.5.0", "Linux", "x64")
if got := req.Header.Get("X-Stainless-Timeout"); got != "900" {
t.Fatalf("X-Stainless-Timeout = %q, want %q", got, "900")
}
@@ -1833,3 +1833,43 @@ func TestApplyCloaking_PreservesConfiguredStrictModeAndSensitiveWordsWhenModeOmi
t.Fatalf("expected configured sensitive word obfuscation to apply, got %q", got)
}
}
func TestNormalizeClaudeTemperatureForThinking_AdaptiveCoercesToOne(t *testing.T) {
payload := []byte(`{"temperature":0,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"}}`)
out := normalizeClaudeTemperatureForThinking(payload)
if got := gjson.GetBytes(out, "temperature").Float(); got != 1 {
t.Fatalf("temperature = %v, want 1", got)
}
}
func TestNormalizeClaudeTemperatureForThinking_EnabledCoercesToOne(t *testing.T) {
payload := []byte(`{"temperature":0.2,"thinking":{"type":"enabled","budget_tokens":2048}}`)
out := normalizeClaudeTemperatureForThinking(payload)
if got := gjson.GetBytes(out, "temperature").Float(); got != 1 {
t.Fatalf("temperature = %v, want 1", got)
}
}
func TestNormalizeClaudeTemperatureForThinking_NoThinkingLeavesTemperatureAlone(t *testing.T) {
payload := []byte(`{"temperature":0,"messages":[{"role":"user","content":"hi"}]}`)
out := normalizeClaudeTemperatureForThinking(payload)
if got := gjson.GetBytes(out, "temperature").Float(); got != 0 {
t.Fatalf("temperature = %v, want 0", got)
}
}
func TestNormalizeClaudeTemperatureForThinking_AfterForcedToolChoiceKeepsOriginalTemperature(t *testing.T) {
payload := []byte(`{"temperature":0,"thinking":{"type":"adaptive"},"output_config":{"effort":"max"},"tool_choice":{"type":"any"}}`)
out := disableThinkingIfToolChoiceForced(payload)
out = normalizeClaudeTemperatureForThinking(out)
if gjson.GetBytes(out, "thinking").Exists() {
t.Fatalf("thinking should be removed when tool_choice forces tool use")
}
if got := gjson.GetBytes(out, "temperature").Float(); got != 0 {
t.Fatalf("temperature = %v, want 0", got)
}
}

View File

@@ -219,7 +219,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
}
wsReqBody := buildCodexWebsocketRequestBody(body)
helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
wsReqLog := helps.UpstreamRequestLog{
URL: wsURL,
Method: "WEBSOCKET",
Headers: wsHeaders.Clone(),
@@ -229,16 +229,14 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
AuthLabel: authLabel,
AuthType: authType,
AuthValue: authValue,
})
}
helps.RecordAPIWebsocketRequest(ctx, e.cfg, wsReqLog)
conn, respHS, errDial := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
if respHS != nil {
helps.RecordAPIResponseMetadata(ctx, e.cfg, respHS.StatusCode, respHS.Header.Clone())
}
if errDial != nil {
bodyErr := websocketHandshakeBody(respHS)
if len(bodyErr) > 0 {
helps.AppendAPIResponseChunk(ctx, e.cfg, bodyErr)
if respHS != nil {
helps.RecordAPIWebsocketUpgradeRejection(ctx, e.cfg, websocketUpgradeRequestLog(wsReqLog), respHS.StatusCode, respHS.Header.Clone(), bodyErr)
}
if respHS != nil && respHS.StatusCode == http.StatusUpgradeRequired {
return e.CodexExecutor.Execute(ctx, auth, req, opts)
@@ -246,10 +244,10 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
if respHS != nil && respHS.StatusCode > 0 {
return resp, statusErr{code: respHS.StatusCode, msg: string(bodyErr)}
}
helps.RecordAPIResponseError(ctx, e.cfg, errDial)
helps.RecordAPIWebsocketError(ctx, e.cfg, "dial", errDial)
return resp, errDial
}
closeHTTPResponseBody(respHS, "codex websockets executor: close handshake response body error")
recordAPIWebsocketHandshake(ctx, e.cfg, respHS)
if sess == nil {
logCodexWebsocketConnected(executionSessionID, authID, wsURL)
defer func() {
@@ -278,10 +276,10 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
// Retry once with a fresh websocket connection. This is mainly to handle
// upstream closing the socket between sequential requests within the same
// execution session.
connRetry, _, errDialRetry := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
connRetry, respHSRetry, errDialRetry := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
if errDialRetry == nil && connRetry != nil {
wsReqBodyRetry := buildCodexWebsocketRequestBody(body)
helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
helps.RecordAPIWebsocketRequest(ctx, e.cfg, helps.UpstreamRequestLog{
URL: wsURL,
Method: "WEBSOCKET",
Headers: wsHeaders.Clone(),
@@ -292,20 +290,22 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
AuthType: authType,
AuthValue: authValue,
})
recordAPIWebsocketHandshake(ctx, e.cfg, respHSRetry)
if errSendRetry := writeCodexWebsocketMessage(sess, connRetry, wsReqBodyRetry); errSendRetry == nil {
conn = connRetry
wsReqBody = wsReqBodyRetry
} else {
e.invalidateUpstreamConn(sess, connRetry, "send_error", errSendRetry)
helps.RecordAPIResponseError(ctx, e.cfg, errSendRetry)
helps.RecordAPIWebsocketError(ctx, e.cfg, "send_retry", errSendRetry)
return resp, errSendRetry
}
} else {
helps.RecordAPIResponseError(ctx, e.cfg, errDialRetry)
closeHTTPResponseBody(respHSRetry, "codex websockets executor: close handshake response body error")
helps.RecordAPIWebsocketError(ctx, e.cfg, "dial_retry", errDialRetry)
return resp, errDialRetry
}
} else {
helps.RecordAPIResponseError(ctx, e.cfg, errSend)
helps.RecordAPIWebsocketError(ctx, e.cfg, "send", errSend)
return resp, errSend
}
}
@@ -316,7 +316,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
}
msgType, payload, errRead := readCodexWebsocketMessage(ctx, sess, conn, readCh)
if errRead != nil {
helps.RecordAPIResponseError(ctx, e.cfg, errRead)
helps.RecordAPIWebsocketError(ctx, e.cfg, "read", errRead)
return resp, errRead
}
if msgType != websocket.TextMessage {
@@ -325,7 +325,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
if sess != nil {
e.invalidateUpstreamConn(sess, conn, "unexpected_binary", err)
}
helps.RecordAPIResponseError(ctx, e.cfg, err)
helps.RecordAPIWebsocketError(ctx, e.cfg, "unexpected_binary", err)
return resp, err
}
continue
@@ -335,13 +335,13 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
if len(payload) == 0 {
continue
}
helps.AppendAPIResponseChunk(ctx, e.cfg, payload)
helps.AppendAPIWebsocketResponse(ctx, e.cfg, payload)
if wsErr, ok := parseCodexWebsocketError(payload); ok {
if sess != nil {
e.invalidateUpstreamConn(sess, conn, "upstream_error", wsErr)
}
helps.RecordAPIResponseError(ctx, e.cfg, wsErr)
helps.RecordAPIWebsocketError(ctx, e.cfg, "upstream_error", wsErr)
return resp, wsErr
}
@@ -413,7 +413,7 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
}
wsReqBody := buildCodexWebsocketRequestBody(body)
helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
wsReqLog := helps.UpstreamRequestLog{
URL: wsURL,
Method: "WEBSOCKET",
Headers: wsHeaders.Clone(),
@@ -423,18 +423,18 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
AuthLabel: authLabel,
AuthType: authType,
AuthValue: authValue,
})
}
helps.RecordAPIWebsocketRequest(ctx, e.cfg, wsReqLog)
conn, respHS, errDial := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
var upstreamHeaders http.Header
if respHS != nil {
upstreamHeaders = respHS.Header.Clone()
helps.RecordAPIResponseMetadata(ctx, e.cfg, respHS.StatusCode, respHS.Header.Clone())
}
if errDial != nil {
bodyErr := websocketHandshakeBody(respHS)
if len(bodyErr) > 0 {
helps.AppendAPIResponseChunk(ctx, e.cfg, bodyErr)
if respHS != nil {
helps.RecordAPIWebsocketUpgradeRejection(ctx, e.cfg, websocketUpgradeRequestLog(wsReqLog), respHS.StatusCode, respHS.Header.Clone(), bodyErr)
}
if respHS != nil && respHS.StatusCode == http.StatusUpgradeRequired {
return e.CodexExecutor.ExecuteStream(ctx, auth, req, opts)
@@ -442,13 +442,13 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
if respHS != nil && respHS.StatusCode > 0 {
return nil, statusErr{code: respHS.StatusCode, msg: string(bodyErr)}
}
helps.RecordAPIResponseError(ctx, e.cfg, errDial)
helps.RecordAPIWebsocketError(ctx, e.cfg, "dial", errDial)
if sess != nil {
sess.reqMu.Unlock()
}
return nil, errDial
}
closeHTTPResponseBody(respHS, "codex websockets executor: close handshake response body error")
recordAPIWebsocketHandshake(ctx, e.cfg, respHS)
if sess == nil {
logCodexWebsocketConnected(executionSessionID, authID, wsURL)
@@ -461,20 +461,21 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
}
if errSend := writeCodexWebsocketMessage(sess, conn, wsReqBody); errSend != nil {
helps.RecordAPIResponseError(ctx, e.cfg, errSend)
helps.RecordAPIWebsocketError(ctx, e.cfg, "send", errSend)
if sess != nil {
e.invalidateUpstreamConn(sess, conn, "send_error", errSend)
// Retry once with a new websocket connection for the same execution session.
connRetry, _, errDialRetry := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
connRetry, respHSRetry, errDialRetry := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
if errDialRetry != nil || connRetry == nil {
helps.RecordAPIResponseError(ctx, e.cfg, errDialRetry)
closeHTTPResponseBody(respHSRetry, "codex websockets executor: close handshake response body error")
helps.RecordAPIWebsocketError(ctx, e.cfg, "dial_retry", errDialRetry)
sess.clearActive(readCh)
sess.reqMu.Unlock()
return nil, errDialRetry
}
wsReqBodyRetry := buildCodexWebsocketRequestBody(body)
helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
helps.RecordAPIWebsocketRequest(ctx, e.cfg, helps.UpstreamRequestLog{
URL: wsURL,
Method: "WEBSOCKET",
Headers: wsHeaders.Clone(),
@@ -485,8 +486,9 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
AuthType: authType,
AuthValue: authValue,
})
recordAPIWebsocketHandshake(ctx, e.cfg, respHSRetry)
if errSendRetry := writeCodexWebsocketMessage(sess, connRetry, wsReqBodyRetry); errSendRetry != nil {
helps.RecordAPIResponseError(ctx, e.cfg, errSendRetry)
helps.RecordAPIWebsocketError(ctx, e.cfg, "send_retry", errSendRetry)
e.invalidateUpstreamConn(sess, connRetry, "send_error", errSendRetry)
sess.clearActive(readCh)
sess.reqMu.Unlock()
@@ -552,7 +554,7 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
}
terminateReason = "read_error"
terminateErr = errRead
helps.RecordAPIResponseError(ctx, e.cfg, errRead)
helps.RecordAPIWebsocketError(ctx, e.cfg, "read", errRead)
reporter.PublishFailure(ctx)
_ = send(cliproxyexecutor.StreamChunk{Err: errRead})
return
@@ -562,7 +564,7 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
err = fmt.Errorf("codex websockets executor: unexpected binary message")
terminateReason = "unexpected_binary"
terminateErr = err
helps.RecordAPIResponseError(ctx, e.cfg, err)
helps.RecordAPIWebsocketError(ctx, e.cfg, "unexpected_binary", err)
reporter.PublishFailure(ctx)
if sess != nil {
e.invalidateUpstreamConn(sess, conn, "unexpected_binary", err)
@@ -577,12 +579,12 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
if len(payload) == 0 {
continue
}
helps.AppendAPIResponseChunk(ctx, e.cfg, payload)
helps.AppendAPIWebsocketResponse(ctx, e.cfg, payload)
if wsErr, ok := parseCodexWebsocketError(payload); ok {
terminateReason = "upstream_error"
terminateErr = wsErr
helps.RecordAPIResponseError(ctx, e.cfg, wsErr)
helps.RecordAPIWebsocketError(ctx, e.cfg, "upstream_error", wsErr)
reporter.PublishFailure(ctx)
if sess != nil {
e.invalidateUpstreamConn(sess, conn, "upstream_error", wsErr)
@@ -1022,6 +1024,32 @@ func encodeCodexWebsocketAsSSE(payload []byte) []byte {
return line
}
func websocketUpgradeRequestLog(info helps.UpstreamRequestLog) helps.UpstreamRequestLog {
upgradeInfo := info
upgradeInfo.URL = helps.WebsocketUpgradeRequestURL(info.URL)
upgradeInfo.Method = http.MethodGet
upgradeInfo.Body = nil
upgradeInfo.Headers = info.Headers.Clone()
if upgradeInfo.Headers == nil {
upgradeInfo.Headers = make(http.Header)
}
if strings.TrimSpace(upgradeInfo.Headers.Get("Connection")) == "" {
upgradeInfo.Headers.Set("Connection", "Upgrade")
}
if strings.TrimSpace(upgradeInfo.Headers.Get("Upgrade")) == "" {
upgradeInfo.Headers.Set("Upgrade", "websocket")
}
return upgradeInfo
}
func recordAPIWebsocketHandshake(ctx context.Context, cfg *config.Config, resp *http.Response) {
if resp == nil {
return
}
helps.RecordAPIWebsocketHandshake(ctx, cfg, resp.StatusCode, resp.Header.Clone())
closeHTTPResponseBody(resp, "codex websockets executor: close handshake response body error")
}
func websocketHandshakeBody(resp *http.Response) []byte {
if resp == nil || resp.Body == nil {
return nil

View File

@@ -82,6 +82,11 @@ func (e *GeminiCLIExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth
}
req.Header.Set("Authorization", "Bearer "+tok.AccessToken)
applyGeminiCLIHeaders(req, "unknown")
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(req, attrs)
return nil
}
@@ -191,6 +196,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
applyGeminiCLIHeaders(reqHTTP, attemptModel)
reqHTTP.Header.Set("Accept", "application/json")
util.ApplyCustomHeadersFromAttrs(reqHTTP, auth.Attributes)
helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
URL: url,
Method: http.MethodPost,
@@ -336,6 +342,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
applyGeminiCLIHeaders(reqHTTP, attemptModel)
reqHTTP.Header.Set("Accept", "text/event-stream")
util.ApplyCustomHeadersFromAttrs(reqHTTP, auth.Attributes)
helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
URL: url,
Method: http.MethodPost,
@@ -517,6 +524,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
applyGeminiCLIHeaders(reqHTTP, baseModel)
reqHTTP.Header.Set("Accept", "application/json")
util.ApplyCustomHeadersFromAttrs(reqHTTP, auth.Attributes)
helps.RecordAPIRequest(ctx, e.cfg, helps.UpstreamRequestLog{
URL: url,
Method: http.MethodPost,

View File

@@ -18,6 +18,7 @@ import (
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -363,6 +364,11 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
return resp, statusErr{code: 500, msg: "internal server error"}
}
applyGeminiHeaders(httpReq, auth)
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
var authID, authLabel, authType, authValue string
if auth != nil {
@@ -478,6 +484,11 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
httpReq.Header.Set("x-goog-api-key", apiKey)
}
applyGeminiHeaders(httpReq, auth)
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
var authID, authLabel, authType, authValue string
if auth != nil {
@@ -582,6 +593,11 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
return nil, statusErr{code: 500, msg: "internal server error"}
}
applyGeminiHeaders(httpReq, auth)
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
var authID, authLabel, authType, authValue string
if auth != nil {
@@ -706,6 +722,11 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
httpReq.Header.Set("x-goog-api-key", apiKey)
}
applyGeminiHeaders(httpReq, auth)
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
var authID, authLabel, authType, authValue string
if auth != nil {
@@ -813,6 +834,11 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
return cliproxyexecutor.Response{}, statusErr{code: 500, msg: "internal server error"}
}
applyGeminiHeaders(httpReq, auth)
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
var authID, authLabel, authType, authValue string
if auth != nil {
@@ -897,6 +923,11 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
httpReq.Header.Set("x-goog-api-key", apiKey)
}
applyGeminiHeaders(httpReq, auth)
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
var authID, authLabel, authType, authValue string
if auth != nil {

View File

@@ -7,6 +7,7 @@ import (
"fmt"
"io"
"net/http"
"slices"
"strings"
"sync"
"time"
@@ -17,6 +18,7 @@ import (
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
log "github.com/sirupsen/logrus"
@@ -40,7 +42,7 @@ const (
copilotEditorVersion = "vscode/1.107.0"
copilotPluginVersion = "copilot-chat/0.35.0"
copilotIntegrationID = "vscode-chat"
copilotOpenAIIntent = "conversation-panel"
copilotOpenAIIntent = "conversation-edits"
copilotGitHubAPIVer = "2025-04-01"
)
@@ -126,6 +128,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
body = e.normalizeModel(req.Model, body)
body = flattenAssistantContent(body)
body = stripUnsupportedBetas(body)
// Detect vision content before input normalization removes messages
hasVision := detectVisionContent(body)
@@ -142,6 +145,7 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
if useResponses {
body = normalizeGitHubCopilotResponsesInput(body)
body = normalizeGitHubCopilotResponsesTools(body)
body = applyGitHubCopilotResponsesDefaults(body)
} else {
body = normalizeGitHubCopilotChatTools(body)
}
@@ -225,9 +229,10 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
if useResponses && from.String() == "claude" {
converted = translateGitHubCopilotResponsesNonStreamToClaude(data)
} else {
data = normalizeGitHubCopilotReasoningField(data)
converted = sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, &param)
}
resp = cliproxyexecutor.Response{Payload: converted}
resp = cliproxyexecutor.Response{Payload: converted, Headers: httpResp.Header.Clone()}
reporter.ensurePublished(ctx)
return resp, nil
}
@@ -256,6 +261,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
body = e.normalizeModel(req.Model, body)
body = flattenAssistantContent(body)
body = stripUnsupportedBetas(body)
// Detect vision content before input normalization removes messages
hasVision := detectVisionContent(body)
@@ -272,6 +278,7 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
if useResponses {
body = normalizeGitHubCopilotResponsesInput(body)
body = normalizeGitHubCopilotResponsesTools(body)
body = applyGitHubCopilotResponsesDefaults(body)
} else {
body = normalizeGitHubCopilotChatTools(body)
}
@@ -378,7 +385,20 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
if useResponses && from.String() == "claude" {
chunks = translateGitHubCopilotResponsesStreamToClaude(bytes.Clone(line), &param)
} else {
chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, bytes.Clone(line), &param)
// Strip SSE "data: " prefix before reasoning field normalization,
// since normalizeGitHubCopilotReasoningField expects pure JSON.
// Re-wrap with the prefix afterward for the translator.
normalizedLine := bytes.Clone(line)
if bytes.HasPrefix(line, dataTag) {
sseData := bytes.TrimSpace(line[len(dataTag):])
if !bytes.Equal(sseData, []byte("[DONE]")) && gjson.ValidBytes(sseData) {
normalized := normalizeGitHubCopilotReasoningField(bytes.Clone(sseData))
if !bytes.Equal(normalized, sseData) {
normalizedLine = append(append([]byte(nil), dataTag...), normalized...)
}
}
}
chunks = sdktranslator.TranslateStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, normalizedLine, &param)
}
for i := range chunks {
out <- cliproxyexecutor.StreamChunk{Payload: bytes.Clone(chunks[i])}
@@ -400,9 +420,28 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
}, nil
}
// CountTokens is not supported for GitHub Copilot.
func (e *GitHubCopilotExecutor) CountTokens(_ context.Context, _ *cliproxyauth.Auth, _ cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported for github-copilot"}
// CountTokens estimates token count locally using tiktoken, since the GitHub
// Copilot API does not expose a dedicated token counting endpoint.
func (e *GitHubCopilotExecutor) CountTokens(ctx context.Context, _ *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
baseModel := thinking.ParseSuffix(req.Model).ModelName
from := opts.SourceFormat
to := sdktranslator.FromString("openai")
translated := sdktranslator.TranslateRequest(from, to, baseModel, req.Payload, false)
enc, err := helps.TokenizerForModel(baseModel)
if err != nil {
return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: tokenizer init failed: %w", err)
}
count, err := helps.CountOpenAIChatTokens(enc, translated)
if err != nil {
return cliproxyexecutor.Response{}, fmt.Errorf("github copilot executor: token counting failed: %w", err)
}
usageJSON := helps.BuildOpenAIUsageJSON(count)
translatedUsage := sdktranslator.TranslateTokenCount(ctx, to, from, count, usageJSON)
return cliproxyexecutor.Response{Payload: translatedUsage}, nil
}
// Refresh validates the GitHub token is still working.
@@ -491,46 +530,127 @@ func (e *GitHubCopilotExecutor) applyHeaders(r *http.Request, apiToken string, b
r.Header.Set("X-Request-Id", uuid.NewString())
initiator := "user"
if role := detectLastConversationRole(body); role == "assistant" || role == "tool" {
if isAgentInitiated(body) {
initiator = "agent"
}
r.Header.Set("X-Initiator", initiator)
}
func detectLastConversationRole(body []byte) string {
// isAgentInitiated determines whether the current request is agent-initiated
// (tool callbacks, continuations) rather than user-initiated (new user prompt).
//
// GitHub Copilot uses the X-Initiator header for billing:
// - "user" → consumes premium request quota
// - "agent" → free (tool loops, continuations)
//
// The challenge: Claude Code sends tool results as role:"user" messages with
// content type "tool_result". After translation to OpenAI format, the tool_result
// part becomes a separate role:"tool" message, but if the original Claude message
// also contained text content (e.g. skill invocations, attachment descriptions),
// a role:"user" message is emitted AFTER the tool message, making the last message
// appear user-initiated when it's actually part of an agent tool loop.
//
// VSCode Copilot Chat solves this with explicit flags (iterationNumber,
// isContinuation, subAgentInvocationId). Since CPA doesn't have these flags,
// we infer agent status by checking whether the conversation contains prior
// assistant/tool messages — if it does, the current request is a continuation.
//
// References:
// - opencode#8030, opencode#15824: same root cause and fix approach
// - vscode-copilot-chat: toolCallingLoop.ts (iterationNumber === 0)
// - pi-ai: github-copilot-headers.ts (last message role check)
func isAgentInitiated(body []byte) bool {
if len(body) == 0 {
return ""
return false
}
// Chat Completions API: check messages array
if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
arr := messages.Array()
if len(arr) == 0 {
return false
}
lastRole := ""
for i := len(arr) - 1; i >= 0; i-- {
if role := arr[i].Get("role").String(); role != "" {
return role
if r := arr[i].Get("role").String(); r != "" {
lastRole = r
break
}
}
// If last message is assistant or tool, clearly agent-initiated.
if lastRole == "assistant" || lastRole == "tool" {
return true
}
// If last message is "user", check whether it contains tool results
// (indicating a tool-loop continuation) or if the preceding message
// is an assistant tool_use. This is more precise than checking for
// any prior assistant message, which would false-positive on genuine
// multi-turn follow-ups.
if lastRole == "user" {
// Check if the last user message contains tool_result content
lastContent := arr[len(arr)-1].Get("content")
if lastContent.Exists() && lastContent.IsArray() {
for _, part := range lastContent.Array() {
if part.Get("type").String() == "tool_result" {
return true
}
}
}
// Check if the second-to-last message is an assistant with tool_use
if len(arr) >= 2 {
prev := arr[len(arr)-2]
if prev.Get("role").String() == "assistant" {
prevContent := prev.Get("content")
if prevContent.Exists() && prevContent.IsArray() {
for _, part := range prevContent.Array() {
if part.Get("type").String() == "tool_use" {
return true
}
}
}
}
}
}
return false
}
// Responses API: check input array
if inputs := gjson.GetBytes(body, "input"); inputs.Exists() && inputs.IsArray() {
arr := inputs.Array()
for i := len(arr) - 1; i >= 0; i-- {
item := arr[i]
if len(arr) == 0 {
return false
}
// Most Responses input items carry a top-level role.
if role := item.Get("role").String(); role != "" {
return role
// Check last item
last := arr[len(arr)-1]
if role := last.Get("role").String(); role == "assistant" {
return true
}
switch last.Get("type").String() {
case "function_call", "function_call_arguments", "computer_call":
return true
case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
return true
}
// If last item is user-role, check for prior non-user items
for _, item := range arr {
if role := item.Get("role").String(); role == "assistant" {
return true
}
switch item.Get("type").String() {
case "function_call", "function_call_arguments", "computer_call":
return "assistant"
case "function_call_output", "function_call_response", "tool_result", "computer_call_output":
return "tool"
case "function_call", "function_call_output", "function_call_response",
"function_call_arguments", "computer_call", "computer_call_output":
return true
}
}
}
return ""
return false
}
// detectVisionContent checks if the request body contains vision/image content.
@@ -572,6 +692,85 @@ func (e *GitHubCopilotExecutor) normalizeModel(model string, body []byte) []byte
return body
}
// copilotUnsupportedBetas lists beta headers that are Anthropic-specific and
// must not be forwarded to GitHub Copilot. The context-1m beta enables 1M
// context on Anthropic's API, but Copilot's Claude models are limited to
// ~128K-200K. Passing it through would not enable 1M on Copilot, but stripping
// it from the translated body avoids confusing downstream translators.
var copilotUnsupportedBetas = []string{
"context-1m-2025-08-07",
}
// stripUnsupportedBetas removes Anthropic-specific beta entries from the
// translated request body. In OpenAI format the betas may appear under
// "metadata.betas" or a top-level "betas" array; in Claude format they sit at
// "betas". This function checks all known locations.
func stripUnsupportedBetas(body []byte) []byte {
betaPaths := []string{"betas", "metadata.betas"}
for _, path := range betaPaths {
arr := gjson.GetBytes(body, path)
if !arr.Exists() || !arr.IsArray() {
continue
}
var filtered []string
changed := false
for _, item := range arr.Array() {
beta := item.String()
if isCopilotUnsupportedBeta(beta) {
changed = true
continue
}
filtered = append(filtered, beta)
}
if !changed {
continue
}
if len(filtered) == 0 {
body, _ = sjson.DeleteBytes(body, path)
} else {
body, _ = sjson.SetBytes(body, path, filtered)
}
}
return body
}
func isCopilotUnsupportedBeta(beta string) bool {
return slices.Contains(copilotUnsupportedBetas, beta)
}
// normalizeGitHubCopilotReasoningField maps Copilot's non-standard
// 'reasoning_text' field to the standard OpenAI 'reasoning_content' field
// that the SDK translator expects. This handles both streaming deltas
// (choices[].delta.reasoning_text) and non-streaming messages
// (choices[].message.reasoning_text). The field is only renamed when
// 'reasoning_content' is absent or null, preserving standard responses.
// All choices are processed to support n>1 requests.
func normalizeGitHubCopilotReasoningField(data []byte) []byte {
choices := gjson.GetBytes(data, "choices")
if !choices.Exists() || !choices.IsArray() {
return data
}
for i := range choices.Array() {
// Non-streaming: choices[i].message.reasoning_text
msgRT := fmt.Sprintf("choices.%d.message.reasoning_text", i)
msgRC := fmt.Sprintf("choices.%d.message.reasoning_content", i)
if rt := gjson.GetBytes(data, msgRT); rt.Exists() && rt.String() != "" {
if rc := gjson.GetBytes(data, msgRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
data, _ = sjson.SetBytes(data, msgRC, rt.String())
}
}
// Streaming: choices[i].delta.reasoning_text
deltaRT := fmt.Sprintf("choices.%d.delta.reasoning_text", i)
deltaRC := fmt.Sprintf("choices.%d.delta.reasoning_content", i)
if rt := gjson.GetBytes(data, deltaRT); rt.Exists() && rt.String() != "" {
if rc := gjson.GetBytes(data, deltaRC); !rc.Exists() || rc.Type == gjson.Null || rc.String() == "" {
data, _ = sjson.SetBytes(data, deltaRC, rt.String())
}
}
}
return data
}
func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format, model string) bool {
if sourceFormat.String() == "openai-response" {
return true
@@ -596,12 +795,7 @@ func lookupGitHubCopilotStaticModelInfo(model string) *registry.ModelInfo {
}
func containsEndpoint(endpoints []string, endpoint string) bool {
for _, item := range endpoints {
if item == endpoint {
return true
}
}
return false
return slices.Contains(endpoints, endpoint)
}
// flattenAssistantContent converts assistant message content from array format
@@ -856,6 +1050,32 @@ func stripGitHubCopilotResponsesUnsupportedFields(body []byte) []byte {
return body
}
// applyGitHubCopilotResponsesDefaults sets required fields for the Responses API
// that both vscode-copilot-chat and pi-ai always include.
//
// References:
// - vscode-copilot-chat: src/platform/endpoint/node/responsesApi.ts
// - pi-ai (badlogic/pi-mono): packages/ai/src/providers/openai-responses.ts
func applyGitHubCopilotResponsesDefaults(body []byte) []byte {
// store: false — prevents request/response storage
if !gjson.GetBytes(body, "store").Exists() {
body, _ = sjson.SetBytes(body, "store", false)
}
// include: ["reasoning.encrypted_content"] — enables reasoning content
// reuse across turns, avoiding redundant computation
if !gjson.GetBytes(body, "include").Exists() {
body, _ = sjson.SetRawBytes(body, "include", []byte(`["reasoning.encrypted_content"]`))
}
// If reasoning.effort is set but reasoning.summary is not, default to "auto"
if gjson.GetBytes(body, "reasoning.effort").Exists() && !gjson.GetBytes(body, "reasoning.summary").Exists() {
body, _ = sjson.SetBytes(body, "reasoning.summary", "auto")
}
return body
}
func normalizeGitHubCopilotResponsesTools(body []byte) []byte {
tools := gjson.GetBytes(body, "tools")
if tools.Exists() {
@@ -1406,6 +1626,21 @@ func FetchGitHubCopilotModels(ctx context.Context, auth *cliproxyauth.Auth, cfg
m.MaxCompletionTokens = defaultCopilotMaxCompletionTokens
}
// Override with real limits from the Copilot API when available.
// The API returns per-account limits (individual vs business) under
// capabilities.limits, which are more accurate than our static
// fallback values. We use max_prompt_tokens as ContextLength because
// that's the hard limit the Copilot API enforces on prompt size —
// exceeding it triggers "prompt token count exceeds the limit" errors.
if limits := entry.Limits(); limits != nil {
if limits.MaxPromptTokens > 0 {
m.ContextLength = limits.MaxPromptTokens
}
if limits.MaxOutputTokens > 0 {
m.MaxCompletionTokens = limits.MaxOutputTokens
}
}
models = append(models, m)
}

View File

@@ -1,11 +1,14 @@
package executor
import (
"context"
"net/http"
"strings"
"testing"
copilotauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
"github.com/tidwall/gjson"
)
@@ -72,7 +75,7 @@ func TestUseGitHubCopilotResponsesEndpoint_CodexModel(t *testing.T) {
}
func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing.T) {
t.Parallel()
// Not parallel: shares global model registry with DynamicRegistryWinsOverStatic.
if !useGitHubCopilotResponsesEndpoint(sdktranslator.FromString("openai"), "gpt-5.4") {
t.Fatal("expected responses-only registry model to use /responses")
}
@@ -82,7 +85,7 @@ func TestUseGitHubCopilotResponsesEndpoint_RegistryResponsesOnlyModel(t *testing
}
func TestUseGitHubCopilotResponsesEndpoint_DynamicRegistryWinsOverStatic(t *testing.T) {
t.Parallel()
// Not parallel: mutates global model registry, conflicts with RegistryResponsesOnlyModel.
reg := registry.GetGlobalRegistry()
clientID := "github-copilot-test-client"
@@ -251,14 +254,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_TextMapping(t *testing
t.Parallel()
resp := []byte(`{"id":"resp_1","model":"gpt-5-codex","output":[{"type":"message","content":[{"type":"output_text","text":"hello"}]}],"usage":{"input_tokens":3,"output_tokens":5}}`)
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
if gjson.Get(out, "type").String() != "message" {
t.Fatalf("type = %q, want message", gjson.Get(out, "type").String())
if gjson.GetBytes(out, "type").String() != "message" {
t.Fatalf("type = %q, want message", gjson.GetBytes(out, "type").String())
}
if gjson.Get(out, "content.0.type").String() != "text" {
t.Fatalf("content.0.type = %q, want text", gjson.Get(out, "content.0.type").String())
if gjson.GetBytes(out, "content.0.type").String() != "text" {
t.Fatalf("content.0.type = %q, want text", gjson.GetBytes(out, "content.0.type").String())
}
if gjson.Get(out, "content.0.text").String() != "hello" {
t.Fatalf("content.0.text = %q, want hello", gjson.Get(out, "content.0.text").String())
if gjson.GetBytes(out, "content.0.text").String() != "hello" {
t.Fatalf("content.0.text = %q, want hello", gjson.GetBytes(out, "content.0.text").String())
}
}
@@ -266,14 +269,14 @@ func TestTranslateGitHubCopilotResponsesNonStreamToClaude_ToolUseMapping(t *test
t.Parallel()
resp := []byte(`{"id":"resp_2","model":"gpt-5-codex","output":[{"type":"function_call","id":"fc_1","call_id":"call_1","name":"sum","arguments":"{\"a\":1}"}],"usage":{"input_tokens":1,"output_tokens":2}}`)
out := translateGitHubCopilotResponsesNonStreamToClaude(resp)
if gjson.Get(out, "content.0.type").String() != "tool_use" {
t.Fatalf("content.0.type = %q, want tool_use", gjson.Get(out, "content.0.type").String())
if gjson.GetBytes(out, "content.0.type").String() != "tool_use" {
t.Fatalf("content.0.type = %q, want tool_use", gjson.GetBytes(out, "content.0.type").String())
}
if gjson.Get(out, "content.0.name").String() != "sum" {
t.Fatalf("content.0.name = %q, want sum", gjson.Get(out, "content.0.name").String())
if gjson.GetBytes(out, "content.0.name").String() != "sum" {
t.Fatalf("content.0.name = %q, want sum", gjson.GetBytes(out, "content.0.name").String())
}
if gjson.Get(out, "stop_reason").String() != "tool_use" {
t.Fatalf("stop_reason = %q, want tool_use", gjson.Get(out, "stop_reason").String())
if gjson.GetBytes(out, "stop_reason").String() != "tool_use" {
t.Fatalf("stop_reason = %q, want tool_use", gjson.GetBytes(out, "stop_reason").String())
}
}
@@ -282,18 +285,24 @@ func TestTranslateGitHubCopilotResponsesStreamToClaude_TextLifecycle(t *testing.
var param any
created := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.created","response":{"id":"resp_1","model":"gpt-5-codex"}}`), &param)
if len(created) == 0 || !strings.Contains(created[0], "message_start") {
if len(created) == 0 || !strings.Contains(string(created[0]), "message_start") {
t.Fatalf("created events = %#v, want message_start", created)
}
delta := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.output_text.delta","delta":"he"}`), &param)
joinedDelta := strings.Join(delta, "")
var joinedDelta string
for _, d := range delta {
joinedDelta += string(d)
}
if !strings.Contains(joinedDelta, "content_block_start") || !strings.Contains(joinedDelta, "text_delta") {
t.Fatalf("delta events = %#v, want content_block_start + text_delta", delta)
}
completed := translateGitHubCopilotResponsesStreamToClaude([]byte(`data: {"type":"response.completed","response":{"usage":{"input_tokens":7,"output_tokens":9}}}`), &param)
joinedCompleted := strings.Join(completed, "")
var joinedCompleted string
for _, c := range completed {
joinedCompleted += string(c)
}
if !strings.Contains(joinedCompleted, "message_delta") || !strings.Contains(joinedCompleted, "message_stop") {
t.Fatalf("completed events = %#v, want message_delta + message_stop", completed)
}
@@ -312,15 +321,17 @@ func TestApplyHeaders_XInitiator_UserOnly(t *testing.T) {
}
}
func TestApplyHeaders_XInitiator_UserWhenLastRoleIsUser(t *testing.T) {
func TestApplyHeaders_XInitiator_AgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Last role governs the initiator decision.
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":"tool result here"}]}`)
// When the last role is "user" and the message contains tool_result content,
// the request is a continuation (e.g. Claude tool result translated to a
// synthetic user message). Should be "agent".
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":[{"type":"tool_result","tool_use_id":"tu1","content":"file contents..."}]}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want agent (last user contains tool_result)", got)
}
}
@@ -328,10 +339,11 @@ func TestApplyHeaders_XInitiator_AgentWithToolRole(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// When the last message has role "tool", it's clearly agent-initiated.
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"tool","content":"result"}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want agent (tool role exists)", got)
t.Fatalf("X-Initiator = %q, want agent (last role is tool)", got)
}
}
@@ -346,14 +358,15 @@ func TestApplyHeaders_XInitiator_InputArrayLastAssistantMessage(t *testing.T) {
}
}
func TestApplyHeaders_XInitiator_InputArrayLastUserMessage(t *testing.T) {
func TestApplyHeaders_XInitiator_InputArrayAgentWhenLastUserButHistoryHasAssistant(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Responses API: last item is user-role but history contains assistant → agent.
body := []byte(`{"input":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I can help"}]},{"type":"message","role":"user","content":[{"type":"input_text","text":"Do X"}]}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want agent (history has assistant)", got)
}
}
@@ -368,6 +381,33 @@ func TestApplyHeaders_XInitiator_InputArrayLastFunctionCallOutput(t *testing.T)
}
}
func TestApplyHeaders_XInitiator_UserInMultiTurnNoTools(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Genuine multi-turn: user → assistant (plain text) → user follow-up.
// No tool messages → should be "user" (not a false-positive).
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"Hi there!"},{"role":"user","content":"what is 2+2?"}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (genuine multi-turn, no tools)", got)
}
}
func TestApplyHeaders_XInitiator_UserFollowUpAfterToolHistory(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// User follow-up after a completed tool-use conversation.
// The last message is a genuine user question — should be "user", not "agent".
// This aligns with opencode's behavior: only active tool loops are agent-initiated.
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":[{"type":"tool_use","id":"tu1","name":"Read","input":{}}]},{"role":"tool","tool_call_id":"tu1","content":"file data"},{"role":"assistant","content":"I read the file."},{"role":"user","content":"What did we do so far?"}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (genuine follow-up after tool history)", got)
}
}
// --- Tests for x-github-api-version header (Problem M) ---
func TestApplyHeaders_GitHubAPIVersion(t *testing.T) {
@@ -414,3 +454,364 @@ func TestDetectVisionContent_NoMessages(t *testing.T) {
t.Fatal("expected no vision content when messages field is absent")
}
}
// --- Tests for applyGitHubCopilotResponsesDefaults ---
func TestApplyGitHubCopilotResponsesDefaults_SetsAllDefaults(t *testing.T) {
t.Parallel()
body := []byte(`{"input":"hello","reasoning":{"effort":"medium"}}`)
got := applyGitHubCopilotResponsesDefaults(body)
if gjson.GetBytes(got, "store").Bool() != false {
t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
}
inc := gjson.GetBytes(got, "include")
if !inc.IsArray() || inc.Array()[0].String() != "reasoning.encrypted_content" {
t.Fatalf("include = %s, want [\"reasoning.encrypted_content\"]", inc.Raw)
}
if gjson.GetBytes(got, "reasoning.summary").String() != "auto" {
t.Fatalf("reasoning.summary = %q, want auto", gjson.GetBytes(got, "reasoning.summary").String())
}
}
func TestApplyGitHubCopilotResponsesDefaults_DoesNotOverrideExisting(t *testing.T) {
t.Parallel()
body := []byte(`{"input":"hello","store":true,"include":["other"],"reasoning":{"effort":"high","summary":"concise"}}`)
got := applyGitHubCopilotResponsesDefaults(body)
if gjson.GetBytes(got, "store").Bool() != true {
t.Fatalf("store should not be overridden, got %s", gjson.GetBytes(got, "store").Raw)
}
if gjson.GetBytes(got, "include").Array()[0].String() != "other" {
t.Fatalf("include should not be overridden, got %s", gjson.GetBytes(got, "include").Raw)
}
if gjson.GetBytes(got, "reasoning.summary").String() != "concise" {
t.Fatalf("reasoning.summary should not be overridden, got %q", gjson.GetBytes(got, "reasoning.summary").String())
}
}
func TestApplyGitHubCopilotResponsesDefaults_NoReasoningEffort(t *testing.T) {
t.Parallel()
body := []byte(`{"input":"hello"}`)
got := applyGitHubCopilotResponsesDefaults(body)
if gjson.GetBytes(got, "store").Bool() != false {
t.Fatalf("store = %v, want false", gjson.GetBytes(got, "store").Raw)
}
// reasoning.summary should NOT be set when reasoning.effort is absent
if gjson.GetBytes(got, "reasoning.summary").Exists() {
t.Fatalf("reasoning.summary should not be set when reasoning.effort is absent, got %q", gjson.GetBytes(got, "reasoning.summary").String())
}
}
// --- Tests for normalizeGitHubCopilotReasoningField ---
func TestNormalizeReasoningField_NonStreaming(t *testing.T) {
t.Parallel()
data := []byte(`{"choices":[{"message":{"content":"hello","reasoning_text":"I think..."}}]}`)
got := normalizeGitHubCopilotReasoningField(data)
rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
if rc != "I think..." {
t.Fatalf("reasoning_content = %q, want %q", rc, "I think...")
}
}
func TestNormalizeReasoningField_Streaming(t *testing.T) {
t.Parallel()
data := []byte(`{"choices":[{"delta":{"reasoning_text":"thinking delta"}}]}`)
got := normalizeGitHubCopilotReasoningField(data)
rc := gjson.GetBytes(got, "choices.0.delta.reasoning_content").String()
if rc != "thinking delta" {
t.Fatalf("reasoning_content = %q, want %q", rc, "thinking delta")
}
}
func TestNormalizeReasoningField_PreservesExistingReasoningContent(t *testing.T) {
t.Parallel()
data := []byte(`{"choices":[{"message":{"reasoning_text":"old","reasoning_content":"existing"}}]}`)
got := normalizeGitHubCopilotReasoningField(data)
rc := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
if rc != "existing" {
t.Fatalf("reasoning_content = %q, want %q (should not overwrite)", rc, "existing")
}
}
func TestNormalizeReasoningField_MultiChoice(t *testing.T) {
t.Parallel()
data := []byte(`{"choices":[{"message":{"reasoning_text":"thought-0"}},{"message":{"reasoning_text":"thought-1"}}]}`)
got := normalizeGitHubCopilotReasoningField(data)
rc0 := gjson.GetBytes(got, "choices.0.message.reasoning_content").String()
rc1 := gjson.GetBytes(got, "choices.1.message.reasoning_content").String()
if rc0 != "thought-0" {
t.Fatalf("choices[0].reasoning_content = %q, want %q", rc0, "thought-0")
}
if rc1 != "thought-1" {
t.Fatalf("choices[1].reasoning_content = %q, want %q", rc1, "thought-1")
}
}
func TestNormalizeReasoningField_NoChoices(t *testing.T) {
t.Parallel()
data := []byte(`{"id":"chatcmpl-123"}`)
got := normalizeGitHubCopilotReasoningField(data)
if string(got) != string(data) {
t.Fatalf("expected no change, got %s", string(got))
}
}
func TestApplyHeaders_OpenAIIntentValue(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
e.applyHeaders(req, "token", nil)
if got := req.Header.Get("Openai-Intent"); got != "conversation-edits" {
t.Fatalf("Openai-Intent = %q, want conversation-edits", got)
}
}
// --- Tests for CountTokens (local tiktoken estimation) ---
func TestCountTokens_ReturnsPositiveCount(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
body := []byte(`{"model":"gpt-4o","messages":[{"role":"user","content":"Hello, world!"}]}`)
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
Model: "gpt-4o",
Payload: body,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("openai"),
})
if err != nil {
t.Fatalf("CountTokens() error: %v", err)
}
if len(resp.Payload) == 0 {
t.Fatal("CountTokens() returned empty payload")
}
// The response should contain a positive token count.
tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
if tokens <= 0 {
t.Fatalf("expected positive token count, got %d", tokens)
}
}
func TestCountTokens_ClaudeSourceFormatTranslates(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
body := []byte(`{"model":"claude-sonnet-4","messages":[{"role":"user","content":"Tell me a joke"}],"max_tokens":1024}`)
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
Model: "claude-sonnet-4",
Payload: body,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("claude"),
})
if err != nil {
t.Fatalf("CountTokens() error: %v", err)
}
// Claude source format → should get input_tokens in response
inputTokens := gjson.GetBytes(resp.Payload, "input_tokens").Int()
if inputTokens <= 0 {
// Fallback: check usage.prompt_tokens (depends on translator registration)
promptTokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
if promptTokens <= 0 {
t.Fatalf("expected positive token count, got payload: %s", resp.Payload)
}
}
}
func TestCountTokens_EmptyPayload(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
resp, err := e.CountTokens(context.Background(), nil, cliproxyexecutor.Request{
Model: "gpt-4o",
Payload: []byte(`{"model":"gpt-4o","messages":[]}`),
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("openai"),
})
if err != nil {
t.Fatalf("CountTokens() error: %v", err)
}
tokens := gjson.GetBytes(resp.Payload, "usage.prompt_tokens").Int()
// Empty messages should return 0 tokens.
if tokens != 0 {
t.Fatalf("expected 0 tokens for empty messages, got %d", tokens)
}
}
func TestStripUnsupportedBetas_RemovesContext1M(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"claude-opus-4.6","betas":["interleaved-thinking-2025-05-14","context-1m-2025-08-07","claude-code-20250219"],"messages":[]}`)
result := stripUnsupportedBetas(body)
betas := gjson.GetBytes(result, "betas")
if !betas.Exists() {
t.Fatal("betas field should still exist after stripping")
}
for _, item := range betas.Array() {
if item.String() == "context-1m-2025-08-07" {
t.Fatal("context-1m-2025-08-07 should have been stripped")
}
}
// Other betas should be preserved
found := false
for _, item := range betas.Array() {
if item.String() == "interleaved-thinking-2025-05-14" {
found = true
}
}
if !found {
t.Fatal("other betas should be preserved")
}
}
func TestStripUnsupportedBetas_NoBetasField(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"gpt-4o","messages":[]}`)
result := stripUnsupportedBetas(body)
// Should be unchanged
if string(result) != string(body) {
t.Fatalf("body should be unchanged when no betas field exists, got %s", string(result))
}
}
func TestStripUnsupportedBetas_MetadataBetas(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"claude-opus-4.6","metadata":{"betas":["context-1m-2025-08-07","other-beta"]},"messages":[]}`)
result := stripUnsupportedBetas(body)
betas := gjson.GetBytes(result, "metadata.betas")
if !betas.Exists() {
t.Fatal("metadata.betas field should still exist after stripping")
}
for _, item := range betas.Array() {
if item.String() == "context-1m-2025-08-07" {
t.Fatal("context-1m-2025-08-07 should have been stripped from metadata.betas")
}
}
if betas.Array()[0].String() != "other-beta" {
t.Fatal("other betas in metadata.betas should be preserved")
}
}
func TestStripUnsupportedBetas_AllBetasStripped(t *testing.T) {
t.Parallel()
body := []byte(`{"model":"claude-opus-4.6","betas":["context-1m-2025-08-07"],"messages":[]}`)
result := stripUnsupportedBetas(body)
betas := gjson.GetBytes(result, "betas")
if betas.Exists() {
t.Fatal("betas field should be deleted when all betas are stripped")
}
}
func TestCopilotModelEntry_Limits(t *testing.T) {
t.Parallel()
tests := []struct {
name string
capabilities map[string]any
wantNil bool
wantPrompt int
wantOutput int
wantContext int
}{
{
name: "nil capabilities",
capabilities: nil,
wantNil: true,
},
{
name: "no limits key",
capabilities: map[string]any{"family": "claude-opus-4.6"},
wantNil: true,
},
{
name: "limits is not a map",
capabilities: map[string]any{"limits": "invalid"},
wantNil: true,
},
{
name: "all zero values",
capabilities: map[string]any{
"limits": map[string]any{
"max_context_window_tokens": float64(0),
"max_prompt_tokens": float64(0),
"max_output_tokens": float64(0),
},
},
wantNil: true,
},
{
name: "individual account limits (128K prompt)",
capabilities: map[string]any{
"limits": map[string]any{
"max_context_window_tokens": float64(144000),
"max_prompt_tokens": float64(128000),
"max_output_tokens": float64(64000),
},
},
wantNil: false,
wantPrompt: 128000,
wantOutput: 64000,
wantContext: 144000,
},
{
name: "business account limits (168K prompt)",
capabilities: map[string]any{
"limits": map[string]any{
"max_context_window_tokens": float64(200000),
"max_prompt_tokens": float64(168000),
"max_output_tokens": float64(32000),
},
},
wantNil: false,
wantPrompt: 168000,
wantOutput: 32000,
wantContext: 200000,
},
{
name: "partial limits (only prompt)",
capabilities: map[string]any{
"limits": map[string]any{
"max_prompt_tokens": float64(128000),
},
},
wantNil: false,
wantPrompt: 128000,
wantOutput: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
entry := copilotauth.CopilotModelEntry{
ID: "claude-opus-4.6",
Capabilities: tt.capabilities,
}
limits := entry.Limits()
if tt.wantNil {
if limits != nil {
t.Fatalf("expected nil limits, got %+v", limits)
}
return
}
if limits == nil {
t.Fatal("expected non-nil limits, got nil")
}
if limits.MaxPromptTokens != tt.wantPrompt {
t.Errorf("MaxPromptTokens = %d, want %d", limits.MaxPromptTokens, tt.wantPrompt)
}
if limits.MaxOutputTokens != tt.wantOutput {
t.Errorf("MaxOutputTokens = %d, want %d", limits.MaxOutputTokens, tt.wantOutput)
}
if tt.wantContext > 0 && limits.MaxContextWindowTokens != tt.wantContext {
t.Errorf("MaxContextWindowTokens = %d, want %d", limits.MaxContextWindowTokens, tt.wantContext)
}
})
}
}

View File

@@ -358,6 +358,16 @@ func ApplyClaudeDeviceProfileHeaders(r *http.Request, profile ClaudeDeviceProfil
r.Header.Set("X-Stainless-Arch", profile.Arch)
}
// DefaultClaudeVersion returns the version string (e.g. "2.1.63") from the
// current baseline device profile. It extracts the version from the User-Agent.
func DefaultClaudeVersion(cfg *config.Config) string {
profile := defaultClaudeDeviceProfile(cfg)
if version, ok := parseClaudeCLIVersion(profile.UserAgent); ok {
return strconv.Itoa(version.major) + "." + strconv.Itoa(version.minor) + "." + strconv.Itoa(version.patch)
}
return "2.1.63"
}
func ApplyClaudeLegacyDeviceHeaders(r *http.Request, ginHeaders http.Header, cfg *config.Config) {
if r == nil {
return

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"html"
"net/http"
"net/url"
"sort"
"strings"
"time"
@@ -19,9 +20,10 @@ import (
)
const (
apiAttemptsKey = "API_UPSTREAM_ATTEMPTS"
apiRequestKey = "API_REQUEST"
apiResponseKey = "API_RESPONSE"
apiAttemptsKey = "API_UPSTREAM_ATTEMPTS"
apiRequestKey = "API_REQUEST"
apiResponseKey = "API_RESPONSE"
apiWebsocketTimelineKey = "API_WEBSOCKET_TIMELINE"
)
// UpstreamRequestLog captures the outbound upstream request details for logging.
@@ -46,6 +48,7 @@ type upstreamAttempt struct {
headersWritten bool
bodyStarted bool
bodyHasContent bool
prevWasSSEEvent bool
errorWritten bool
}
@@ -173,15 +176,157 @@ func AppendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byt
attempt.response.WriteString("Body:\n")
attempt.bodyStarted = true
}
currentChunkIsSSEEvent := bytes.HasPrefix(data, []byte("event:"))
currentChunkIsSSEData := bytes.HasPrefix(data, []byte("data:"))
if attempt.bodyHasContent {
attempt.response.WriteString("\n\n")
separator := "\n\n"
if attempt.prevWasSSEEvent && currentChunkIsSSEData {
separator = "\n"
}
attempt.response.WriteString(separator)
}
attempt.response.WriteString(string(data))
attempt.bodyHasContent = true
attempt.prevWasSSEEvent = currentChunkIsSSEEvent
updateAggregatedResponse(ginCtx, attempts)
}
// RecordAPIWebsocketRequest stores an upstream websocket request event in Gin context.
func RecordAPIWebsocketRequest(ctx context.Context, cfg *config.Config, info UpstreamRequestLog) {
if cfg == nil || !cfg.RequestLog {
return
}
ginCtx := ginContextFrom(ctx)
if ginCtx == nil {
return
}
builder := &strings.Builder{}
builder.WriteString(fmt.Sprintf("Timestamp: %s\n", time.Now().Format(time.RFC3339Nano)))
builder.WriteString("Event: api.websocket.request\n")
if info.URL != "" {
builder.WriteString(fmt.Sprintf("Upstream URL: %s\n", info.URL))
}
if auth := formatAuthInfo(info); auth != "" {
builder.WriteString(fmt.Sprintf("Auth: %s\n", auth))
}
builder.WriteString("Headers:\n")
writeHeaders(builder, info.Headers)
builder.WriteString("\nBody:\n")
if len(info.Body) > 0 {
builder.Write(info.Body)
} else {
builder.WriteString("<empty>")
}
builder.WriteString("\n")
appendAPIWebsocketTimeline(ginCtx, []byte(builder.String()))
}
// RecordAPIWebsocketHandshake stores the upstream websocket handshake response metadata.
func RecordAPIWebsocketHandshake(ctx context.Context, cfg *config.Config, status int, headers http.Header) {
if cfg == nil || !cfg.RequestLog {
return
}
ginCtx := ginContextFrom(ctx)
if ginCtx == nil {
return
}
builder := &strings.Builder{}
builder.WriteString(fmt.Sprintf("Timestamp: %s\n", time.Now().Format(time.RFC3339Nano)))
builder.WriteString("Event: api.websocket.handshake\n")
if status > 0 {
builder.WriteString(fmt.Sprintf("Status: %d\n", status))
}
builder.WriteString("Headers:\n")
writeHeaders(builder, headers)
builder.WriteString("\n")
appendAPIWebsocketTimeline(ginCtx, []byte(builder.String()))
}
// RecordAPIWebsocketUpgradeRejection stores a rejected websocket upgrade as an HTTP attempt.
func RecordAPIWebsocketUpgradeRejection(ctx context.Context, cfg *config.Config, info UpstreamRequestLog, status int, headers http.Header, body []byte) {
if cfg == nil || !cfg.RequestLog {
return
}
ginCtx := ginContextFrom(ctx)
if ginCtx == nil {
return
}
RecordAPIRequest(ctx, cfg, info)
RecordAPIResponseMetadata(ctx, cfg, status, headers)
AppendAPIResponseChunk(ctx, cfg, body)
}
// WebsocketUpgradeRequestURL converts a websocket URL back to its HTTP handshake URL for logging.
func WebsocketUpgradeRequestURL(rawURL string) string {
trimmedURL := strings.TrimSpace(rawURL)
if trimmedURL == "" {
return ""
}
parsed, err := url.Parse(trimmedURL)
if err != nil {
return trimmedURL
}
switch strings.ToLower(parsed.Scheme) {
case "ws":
parsed.Scheme = "http"
case "wss":
parsed.Scheme = "https"
}
return parsed.String()
}
// AppendAPIWebsocketResponse stores an upstream websocket response frame in Gin context.
func AppendAPIWebsocketResponse(ctx context.Context, cfg *config.Config, payload []byte) {
if cfg == nil || !cfg.RequestLog {
return
}
data := bytes.TrimSpace(payload)
if len(data) == 0 {
return
}
ginCtx := ginContextFrom(ctx)
if ginCtx == nil {
return
}
markAPIResponseTimestamp(ginCtx)
builder := &strings.Builder{}
builder.WriteString(fmt.Sprintf("Timestamp: %s\n", time.Now().Format(time.RFC3339Nano)))
builder.WriteString("Event: api.websocket.response\n")
builder.Write(data)
builder.WriteString("\n")
appendAPIWebsocketTimeline(ginCtx, []byte(builder.String()))
}
// RecordAPIWebsocketError stores an upstream websocket error event in Gin context.
func RecordAPIWebsocketError(ctx context.Context, cfg *config.Config, stage string, err error) {
if cfg == nil || !cfg.RequestLog || err == nil {
return
}
ginCtx := ginContextFrom(ctx)
if ginCtx == nil {
return
}
markAPIResponseTimestamp(ginCtx)
builder := &strings.Builder{}
builder.WriteString(fmt.Sprintf("Timestamp: %s\n", time.Now().Format(time.RFC3339Nano)))
builder.WriteString("Event: api.websocket.error\n")
if trimmed := strings.TrimSpace(stage); trimmed != "" {
builder.WriteString(fmt.Sprintf("Stage: %s\n", trimmed))
}
builder.WriteString(fmt.Sprintf("Error: %s\n", err.Error()))
appendAPIWebsocketTimeline(ginCtx, []byte(builder.String()))
}
func ginContextFrom(ctx context.Context) *gin.Context {
ginCtx, _ := ctx.Value("gin").(*gin.Context)
return ginCtx
@@ -259,6 +404,40 @@ func updateAggregatedResponse(ginCtx *gin.Context, attempts []*upstreamAttempt)
ginCtx.Set(apiResponseKey, []byte(builder.String()))
}
func appendAPIWebsocketTimeline(ginCtx *gin.Context, chunk []byte) {
if ginCtx == nil {
return
}
data := bytes.TrimSpace(chunk)
if len(data) == 0 {
return
}
if existing, exists := ginCtx.Get(apiWebsocketTimelineKey); exists {
if existingBytes, ok := existing.([]byte); ok && len(existingBytes) > 0 {
combined := make([]byte, 0, len(existingBytes)+len(data)+2)
combined = append(combined, existingBytes...)
if !bytes.HasSuffix(existingBytes, []byte("\n")) {
combined = append(combined, '\n')
}
combined = append(combined, '\n')
combined = append(combined, data...)
ginCtx.Set(apiWebsocketTimelineKey, combined)
return
}
}
ginCtx.Set(apiWebsocketTimelineKey, bytes.Clone(data))
}
func markAPIResponseTimestamp(ginCtx *gin.Context) {
if ginCtx == nil {
return
}
if _, exists := ginCtx.Get("API_RESPONSE_TIMESTAMP"); exists {
return
}
ginCtx.Set("API_RESPONSE_TIMESTAMP", time.Now())
}
func writeHeaders(builder *strings.Builder, headers http.Header) {
if builder == nil {
return

View File

@@ -0,0 +1,92 @@
package helps
import (
"crypto/sha256"
"encoding/hex"
"sync"
"time"
"github.com/google/uuid"
)
type sessionIDCacheEntry struct {
value string
expire time.Time
}
var (
sessionIDCache = make(map[string]sessionIDCacheEntry)
sessionIDCacheMu sync.RWMutex
sessionIDCacheCleanupOnce sync.Once
)
const (
sessionIDTTL = time.Hour
sessionIDCacheCleanupPeriod = 15 * time.Minute
)
func startSessionIDCacheCleanup() {
go func() {
ticker := time.NewTicker(sessionIDCacheCleanupPeriod)
defer ticker.Stop()
for range ticker.C {
purgeExpiredSessionIDs()
}
}()
}
func purgeExpiredSessionIDs() {
now := time.Now()
sessionIDCacheMu.Lock()
for key, entry := range sessionIDCache {
if !entry.expire.After(now) {
delete(sessionIDCache, key)
}
}
sessionIDCacheMu.Unlock()
}
func sessionIDCacheKey(apiKey string) string {
sum := sha256.Sum256([]byte(apiKey))
return hex.EncodeToString(sum[:])
}
// CachedSessionID returns a stable session UUID per apiKey, refreshing the TTL on each access.
func CachedSessionID(apiKey string) string {
if apiKey == "" {
return uuid.New().String()
}
sessionIDCacheCleanupOnce.Do(startSessionIDCacheCleanup)
key := sessionIDCacheKey(apiKey)
now := time.Now()
sessionIDCacheMu.RLock()
entry, ok := sessionIDCache[key]
valid := ok && entry.value != "" && entry.expire.After(now)
sessionIDCacheMu.RUnlock()
if valid {
sessionIDCacheMu.Lock()
entry = sessionIDCache[key]
if entry.value != "" && entry.expire.After(now) {
entry.expire = now.Add(sessionIDTTL)
sessionIDCache[key] = entry
sessionIDCacheMu.Unlock()
return entry.value
}
sessionIDCacheMu.Unlock()
}
newID := uuid.New().String()
sessionIDCacheMu.Lock()
entry, ok = sessionIDCache[key]
if !ok || entry.value == "" || !entry.expire.After(now) {
entry.value = newID
}
entry.expire = now.Add(sessionIDTTL)
sessionIDCache[key] = entry
sessionIDCacheMu.Unlock()
return entry.value
}

View File

@@ -0,0 +1,188 @@
package helps
import (
"net"
"net/http"
"strings"
"sync"
"time"
tls "github.com/refraction-networking/utls"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
"github.com/router-for-me/CLIProxyAPI/v6/sdk/proxyutil"
log "github.com/sirupsen/logrus"
"golang.org/x/net/http2"
"golang.org/x/net/proxy"
)
// utlsRoundTripper implements http.RoundTripper using utls with Chrome fingerprint
// to bypass Cloudflare's TLS fingerprinting on Anthropic domains.
type utlsRoundTripper struct {
mu sync.Mutex
connections map[string]*http2.ClientConn
pending map[string]*sync.Cond
dialer proxy.Dialer
}
func newUtlsRoundTripper(proxyURL string) *utlsRoundTripper {
var dialer proxy.Dialer = proxy.Direct
if proxyURL != "" {
proxyDialer, mode, errBuild := proxyutil.BuildDialer(proxyURL)
if errBuild != nil {
log.Errorf("utls: failed to configure proxy dialer for %q: %v", proxyURL, errBuild)
} else if mode != proxyutil.ModeInherit && proxyDialer != nil {
dialer = proxyDialer
}
}
return &utlsRoundTripper{
connections: make(map[string]*http2.ClientConn),
pending: make(map[string]*sync.Cond),
dialer: dialer,
}
}
func (t *utlsRoundTripper) getOrCreateConnection(host, addr string) (*http2.ClientConn, error) {
t.mu.Lock()
if h2Conn, ok := t.connections[host]; ok && h2Conn.CanTakeNewRequest() {
t.mu.Unlock()
return h2Conn, nil
}
if cond, ok := t.pending[host]; ok {
cond.Wait()
if h2Conn, ok := t.connections[host]; ok && h2Conn.CanTakeNewRequest() {
t.mu.Unlock()
return h2Conn, nil
}
}
cond := sync.NewCond(&t.mu)
t.pending[host] = cond
t.mu.Unlock()
h2Conn, err := t.createConnection(host, addr)
t.mu.Lock()
defer t.mu.Unlock()
delete(t.pending, host)
cond.Broadcast()
if err != nil {
return nil, err
}
t.connections[host] = h2Conn
return h2Conn, nil
}
func (t *utlsRoundTripper) createConnection(host, addr string) (*http2.ClientConn, error) {
conn, err := t.dialer.Dial("tcp", addr)
if err != nil {
return nil, err
}
tlsConfig := &tls.Config{ServerName: host}
tlsConn := tls.UClient(conn, tlsConfig, tls.HelloChrome_Auto)
if err := tlsConn.Handshake(); err != nil {
conn.Close()
return nil, err
}
tr := &http2.Transport{}
h2Conn, err := tr.NewClientConn(tlsConn)
if err != nil {
tlsConn.Close()
return nil, err
}
return h2Conn, nil
}
func (t *utlsRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
hostname := req.URL.Hostname()
port := req.URL.Port()
if port == "" {
port = "443"
}
addr := net.JoinHostPort(hostname, port)
h2Conn, err := t.getOrCreateConnection(hostname, addr)
if err != nil {
return nil, err
}
resp, err := h2Conn.RoundTrip(req)
if err != nil {
t.mu.Lock()
if cached, ok := t.connections[hostname]; ok && cached == h2Conn {
delete(t.connections, hostname)
}
t.mu.Unlock()
return nil, err
}
return resp, nil
}
// anthropicHosts contains the hosts that should use utls Chrome TLS fingerprint.
var anthropicHosts = map[string]struct{}{
"api.anthropic.com": {},
}
// fallbackRoundTripper uses utls for Anthropic HTTPS hosts and falls back to
// standard transport for all other requests (non-HTTPS or non-Anthropic hosts).
type fallbackRoundTripper struct {
utls *utlsRoundTripper
fallback http.RoundTripper
}
func (f *fallbackRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
if req.URL.Scheme == "https" {
if _, ok := anthropicHosts[strings.ToLower(req.URL.Hostname())]; ok {
return f.utls.RoundTrip(req)
}
}
return f.fallback.RoundTrip(req)
}
// NewUtlsHTTPClient creates an HTTP client using utls Chrome TLS fingerprint.
// Use this for Claude API requests to match real Claude Code's TLS behavior.
// Falls back to standard transport for non-HTTPS requests.
func NewUtlsHTTPClient(cfg *config.Config, auth *cliproxyauth.Auth, timeout time.Duration) *http.Client {
var proxyURL string
if auth != nil {
proxyURL = strings.TrimSpace(auth.ProxyURL)
}
if proxyURL == "" && cfg != nil {
proxyURL = strings.TrimSpace(cfg.ProxyURL)
}
utlsRT := newUtlsRoundTripper(proxyURL)
var standardTransport http.RoundTripper = &http.Transport{
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
}
if proxyURL != "" {
if transport := buildProxyTransport(proxyURL); transport != nil {
standardTransport = transport
}
}
client := &http.Client{
Transport: &fallbackRoundTripper{
utls: utlsRT,
fallback: standardTransport,
},
}
if timeout > 0 {
client.Timeout = timeout
}
return client
}

View File

@@ -117,6 +117,11 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
return resp, err
}
applyIFlowHeaders(httpReq, apiKey, false)
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
var authID, authLabel, authType, authValue string
if auth != nil {
authID = auth.ID
@@ -225,6 +230,11 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
return nil, err
}
applyIFlowHeaders(httpReq, apiKey, true)
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
var authID, authLabel, authType, authValue string
if auth != nil {
authID = auth.ID

View File

@@ -17,6 +17,7 @@ import (
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -46,6 +47,11 @@ func (e *KimiExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth
if strings.TrimSpace(token) != "" {
req.Header.Set("Authorization", "Bearer "+token)
}
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(req, attrs)
return nil
}
@@ -114,6 +120,11 @@ func (e *KimiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
return resp, err
}
applyKimiHeadersWithAuth(httpReq, token, false, auth)
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
var authID, authLabel, authType, authValue string
if auth != nil {
authID = auth.ID
@@ -218,6 +229,11 @@ func (e *KimiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
return nil, err
}
applyKimiHeadersWithAuth(httpReq, token, true, auth)
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
var authID, authLabel, authType, authValue string
if auth != nil {
authID = auth.ID

View File

@@ -15,6 +15,7 @@ import (
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor/helps"
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -29,6 +30,8 @@ const (
qwenRateLimitWindow = time.Minute // sliding window duration
)
var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`)
// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
var qwenBeijingLoc = func() *time.Location {
loc, err := time.LoadLocation("Asia/Shanghai")
@@ -169,6 +172,36 @@ func timeUntilNextDay() time.Duration {
return tomorrow.Sub(now)
}
// ensureQwenSystemMessage prepends a default system message if none exists in "messages".
func ensureQwenSystemMessage(payload []byte) ([]byte, error) {
messages := gjson.GetBytes(payload, "messages")
if messages.Exists() && messages.IsArray() {
var buf bytes.Buffer
buf.WriteByte('[')
buf.Write(qwenDefaultSystemMessage)
for _, msg := range messages.Array() {
buf.WriteByte(',')
buf.WriteString(msg.Raw)
}
buf.WriteByte(']')
updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes())
if errSet != nil {
return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet)
}
return updated, nil
}
var buf bytes.Buffer
buf.WriteByte('[')
buf.Write(qwenDefaultSystemMessage)
buf.WriteByte(']')
updated, errSet := sjson.SetRawBytes(payload, "messages", buf.Bytes())
if errSet != nil {
return nil, fmt.Errorf("qwen executor: set default system message failed: %w", errSet)
}
return updated, nil
}
// QwenExecutor is a stateless executor for Qwen Code using OpenAI-compatible chat completions.
// If access token is unavailable, it falls back to legacy via ClientAdapter.
type QwenExecutor struct {
@@ -250,6 +283,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
body, err = ensureQwenSystemMessage(body)
if err != nil {
return resp, err
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -257,6 +294,11 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
return resp, err
}
applyQwenHeaders(httpReq, token, false)
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
var authLabel, authType, authValue string
if auth != nil {
authLabel = auth.Label
@@ -351,15 +393,19 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
return nil, err
}
toolsResult := gjson.GetBytes(body, "tools")
// toolsResult := gjson.GetBytes(body, "tools")
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
// This will have no real consequences. It's just to scare Qwen3.
if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
}
// if (toolsResult.IsArray() && len(toolsResult.Array()) == 0) || !toolsResult.Exists() {
// body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`))
// }
body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
requestedModel := helps.PayloadRequestedModel(opts, req.Model)
body = helps.ApplyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated, requestedModel)
body, err = ensureQwenSystemMessage(body)
if err != nil {
return nil, err
}
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
@@ -367,6 +413,11 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
return nil, err
}
applyQwenHeaders(httpReq, token, true)
var attrs map[string]string
if auth != nil {
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(httpReq, attrs)
var authLabel, authType, authValue string
if auth != nil {
authLabel = auth.Label

View File

@@ -446,6 +446,7 @@ func (s *GitTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
if email, ok := metadata["email"].(string); ok && email != "" {
auth.Attributes["email"] = email
}
cliproxyauth.ApplyCustomHeadersFromMetadata(auth)
return auth, nil
}

View File

@@ -595,6 +595,7 @@ func (s *ObjectTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Aut
LastRefreshedAt: time.Time{},
NextRefreshAfter: time.Time{},
}
cliproxyauth.ApplyCustomHeadersFromMetadata(auth)
return auth, nil
}

View File

@@ -310,6 +310,7 @@ func (s *PostgresStore) List(ctx context.Context) ([]*cliproxyauth.Auth, error)
LastRefreshedAt: time.Time{},
NextRefreshAfter: time.Time{},
}
cliproxyauth.ApplyCustomHeadersFromMetadata(auth)
auths = append(auths, auth)
}
if err = rows.Err(); err != nil {

View File

@@ -6,7 +6,7 @@
package claude
import (
"bytes"
"fmt"
"strings"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
@@ -31,8 +31,6 @@ const geminiClaudeThoughtSignature = "skip_thought_signature_validator"
// - []byte: The transformed request in Gemini CLI format.
func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
rawJSON := inputRawJSON
rawJSON = bytes.Replace(rawJSON, []byte(`"url":{"type":"string","format":"uri",`), []byte(`"url":{"type":"string",`), -1)
// Build output Gemini CLI request JSON
out := []byte(`{"contents":[]}`)
out, _ = sjson.SetBytes(out, "model", modelName)
@@ -146,13 +144,37 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
})
}
// strip trailing model turn with unanswered function calls —
// Gemini returns empty responses when the last turn is a model
// functionCall with no corresponding user functionResponse.
contents := gjson.GetBytes(out, "contents")
if contents.Exists() && contents.IsArray() {
arr := contents.Array()
if len(arr) > 0 {
last := arr[len(arr)-1]
if last.Get("role").String() == "model" {
hasFC := false
last.Get("parts").ForEach(func(_, part gjson.Result) bool {
if part.Get("functionCall").Exists() {
hasFC = true
return false
}
return true
})
if hasFC {
out, _ = sjson.DeleteBytes(out, fmt.Sprintf("contents.%d", len(arr)-1))
}
}
}
}
// tools
if toolsResult := gjson.GetBytes(rawJSON, "tools"); toolsResult.IsArray() {
hasTools := false
toolsResult.ForEach(func(_, toolResult gjson.Result) bool {
inputSchemaResult := toolResult.Get("input_schema")
if inputSchemaResult.Exists() && inputSchemaResult.IsObject() {
inputSchema := inputSchemaResult.Raw
inputSchema := util.CleanJSONSchemaForGemini(inputSchemaResult.Raw)
tool := []byte(toolResult.Raw)
var err error
tool, err = sjson.DeleteBytes(tool, "input_schema")
@@ -168,6 +190,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
tool, _ = sjson.DeleteBytes(tool, "type")
tool, _ = sjson.DeleteBytes(tool, "cache_control")
tool, _ = sjson.DeleteBytes(tool, "defer_loading")
tool, _ = sjson.DeleteBytes(tool, "eager_input_streaming")
tool, _ = sjson.SetBytes(tool, "name", util.SanitizeFunctionName(gjson.GetBytes(tool, "name").String()))
if gjson.ValidBytes(tool) && gjson.ParseBytes(tool).IsObject() {
if !hasTools {

View File

@@ -157,6 +157,7 @@ func synthesizeFileAuths(ctx *SynthesisContext, fullPath string, data []byte) []
}
}
}
coreauth.ApplyCustomHeadersFromMetadata(a)
ApplyAuthExcludedModelsMeta(a, cfg, perAccountExcluded, "oauth")
// For codex auth files, extract plan_type from the JWT id_token.
if provider == "codex" {
@@ -233,6 +234,11 @@ func SynthesizeGeminiVirtualAuths(primary *coreauth.Auth, metadata map[string]an
if noteVal, hasNote := primary.Attributes["note"]; hasNote && noteVal != "" {
attrs["note"] = noteVal
}
for k, v := range primary.Attributes {
if strings.HasPrefix(k, "header:") && strings.TrimSpace(v) != "" {
attrs[k] = v
}
}
metadataCopy := map[string]any{
"email": email,
"project_id": projectID,

View File

@@ -69,10 +69,14 @@ func TestFileSynthesizer_Synthesize_ValidAuthFile(t *testing.T) {
// Create a valid auth file
authData := map[string]any{
"type": "claude",
"email": "test@example.com",
"proxy_url": "http://proxy.local",
"prefix": "test-prefix",
"type": "claude",
"email": "test@example.com",
"proxy_url": "http://proxy.local",
"prefix": "test-prefix",
"headers": map[string]string{
" X-Test ": " value ",
"X-Empty": " ",
},
"disable_cooling": true,
"request_retry": 2,
}
@@ -110,6 +114,12 @@ func TestFileSynthesizer_Synthesize_ValidAuthFile(t *testing.T) {
if auths[0].ProxyURL != "http://proxy.local" {
t.Errorf("expected proxy_url http://proxy.local, got %s", auths[0].ProxyURL)
}
if got := auths[0].Attributes["header:X-Test"]; got != "value" {
t.Errorf("expected header:X-Test value, got %q", got)
}
if _, ok := auths[0].Attributes["header:X-Empty"]; ok {
t.Errorf("expected header:X-Empty to be absent, got %q", auths[0].Attributes["header:X-Empty"])
}
if v, ok := auths[0].Metadata["disable_cooling"].(bool); !ok || !v {
t.Errorf("expected disable_cooling true, got %v", auths[0].Metadata["disable_cooling"])
}
@@ -450,8 +460,9 @@ func TestSynthesizeGeminiVirtualAuths_MultiProject(t *testing.T) {
Prefix: "test-prefix",
ProxyURL: "http://proxy.local",
Attributes: map[string]string{
"source": "test-source",
"path": "/path/to/auth",
"source": "test-source",
"path": "/path/to/auth",
"header:X-Tra": "value",
},
}
metadata := map[string]any{
@@ -506,6 +517,9 @@ func TestSynthesizeGeminiVirtualAuths_MultiProject(t *testing.T) {
if v.Attributes["runtime_only"] != "true" {
t.Error("expected runtime_only=true")
}
if got := v.Attributes["header:X-Tra"]; got != "value" {
t.Errorf("expected virtual %d header:X-Tra %q, got %q", i, "value", got)
}
if v.Attributes["gemini_virtual_parent"] != "primary-id" {
t.Errorf("expected gemini_virtual_parent=primary-id, got %s", v.Attributes["gemini_virtual_parent"])
}

View File

@@ -9,6 +9,7 @@ import (
"context"
"fmt"
"io"
"net"
"net/http"
"strings"
"time"
@@ -49,7 +50,23 @@ func (h *GeminiCLIAPIHandler) Models() []map[string]any {
// CLIHandler handles CLI-specific requests for Gemini API operations.
// It restricts access to localhost only and routes requests to appropriate internal handlers.
func (h *GeminiCLIAPIHandler) CLIHandler(c *gin.Context) {
if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") {
if h.Cfg == nil || !h.Cfg.EnableGeminiCLIEndpoint {
c.JSON(http.StatusForbidden, handlers.ErrorResponse{
Error: handlers.ErrorDetail{
Message: "Gemini CLI endpoint is disabled",
Type: "forbidden",
},
})
return
}
requestHost := c.Request.Host
requestHostname := requestHost
if hostname, _, errSplitHostPort := net.SplitHostPort(requestHost); errSplitHostPort == nil {
requestHostname = hostname
}
if !strings.HasPrefix(c.Request.RemoteAddr, "127.0.0.1:") || requestHostname != "127.0.0.1" {
c.JSON(http.StatusForbidden, handlers.ErrorResponse{
Error: handlers.ErrorDetail{
Message: "CLI reply only allow local access",

View File

@@ -136,6 +136,8 @@ type authAwareStreamExecutor struct {
type invalidJSONStreamExecutor struct{}
type splitResponsesEventStreamExecutor struct{}
func (e *invalidJSONStreamExecutor) Identifier() string { return "codex" }
func (e *invalidJSONStreamExecutor) Execute(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
@@ -165,6 +167,36 @@ func (e *invalidJSONStreamExecutor) HttpRequest(ctx context.Context, auth *corea
}
}
func (e *splitResponsesEventStreamExecutor) Identifier() string { return "split-sse" }
func (e *splitResponsesEventStreamExecutor) Execute(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "Execute not implemented"}
}
func (e *splitResponsesEventStreamExecutor) ExecuteStream(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (*coreexecutor.StreamResult, error) {
ch := make(chan coreexecutor.StreamChunk, 2)
ch <- coreexecutor.StreamChunk{Payload: []byte("event: response.completed")}
ch <- coreexecutor.StreamChunk{Payload: []byte("data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp-1\",\"output\":[]}}")}
close(ch)
return &coreexecutor.StreamResult{Chunks: ch}, nil
}
func (e *splitResponsesEventStreamExecutor) Refresh(ctx context.Context, auth *coreauth.Auth) (*coreauth.Auth, error) {
return auth, nil
}
func (e *splitResponsesEventStreamExecutor) CountTokens(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "CountTokens not implemented"}
}
func (e *splitResponsesEventStreamExecutor) HttpRequest(ctx context.Context, auth *coreauth.Auth, req *http.Request) (*http.Response, error) {
return nil, &coreauth.Error{
Code: "not_implemented",
Message: "HttpRequest not implemented",
HTTPStatus: http.StatusNotImplemented,
}
}
func (e *authAwareStreamExecutor) Identifier() string { return "codex" }
func (e *authAwareStreamExecutor) Execute(context.Context, *coreauth.Auth, coreexecutor.Request, coreexecutor.Options) (coreexecutor.Response, error) {
@@ -607,3 +639,52 @@ func TestExecuteStreamWithAuthManager_ValidatesOpenAIResponsesStreamDataJSON(t *
t.Fatalf("expected terminal error")
}
}
func TestExecuteStreamWithAuthManager_AllowsSplitOpenAIResponsesSSEEventLines(t *testing.T) {
executor := &splitResponsesEventStreamExecutor{}
manager := coreauth.NewManager(nil, nil, nil)
manager.RegisterExecutor(executor)
auth1 := &coreauth.Auth{
ID: "auth1",
Provider: "split-sse",
Status: coreauth.StatusActive,
Metadata: map[string]any{"email": "test1@example.com"},
}
if _, err := manager.Register(context.Background(), auth1); err != nil {
t.Fatalf("manager.Register(auth1): %v", err)
}
registry.GetGlobalRegistry().RegisterClient(auth1.ID, auth1.Provider, []*registry.ModelInfo{{ID: "test-model"}})
t.Cleanup(func() {
registry.GetGlobalRegistry().UnregisterClient(auth1.ID)
})
handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager)
dataChan, _, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai-response", "test-model", []byte(`{"model":"test-model"}`), "")
if dataChan == nil || errChan == nil {
t.Fatalf("expected non-nil channels")
}
var got []string
for chunk := range dataChan {
got = append(got, string(chunk))
}
for msg := range errChan {
if msg != nil {
t.Fatalf("unexpected error: %+v", msg)
}
}
if len(got) != 2 {
t.Fatalf("expected 2 forwarded chunks, got %d: %#v", len(got), got)
}
if got[0] != "event: response.completed" {
t.Fatalf("unexpected first chunk: %q", got[0])
}
expectedData := "data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp-1\",\"output\":[]}}"
if got[1] != expectedData {
t.Fatalf("unexpected second chunk.\nGot: %q\nWant: %q", got[1], expectedData)
}
}

View File

@@ -5,6 +5,18 @@ import (
"strings"
)
// gatewayHeaderPrefixes lists header name prefixes injected by known AI gateway
// proxies. Claude Code's client-side telemetry detects these and reports the
// gateway type, so we strip them from upstream responses to avoid detection.
var gatewayHeaderPrefixes = []string{
"x-litellm-",
"helicone-",
"x-portkey-",
"cf-aig-",
"x-kong-",
"x-bt-",
}
// hopByHopHeaders lists RFC 7230 Section 6.1 hop-by-hop headers that MUST NOT
// be forwarded by proxies, plus security-sensitive headers that should not leak.
var hopByHopHeaders = map[string]struct{}{
@@ -40,6 +52,19 @@ func FilterUpstreamHeaders(src http.Header) http.Header {
if _, scoped := connectionScoped[canonicalKey]; scoped {
continue
}
// Strip headers injected by known AI gateway proxies to avoid
// Claude Code client-side gateway detection.
lowerKey := strings.ToLower(key)
gatewayMatch := false
for _, prefix := range gatewayHeaderPrefixes {
if strings.HasPrefix(lowerKey, prefix) {
gatewayMatch = true
break
}
}
if gatewayMatch {
continue
}
dst[key] = values
}
if len(dst) == 0 {

View File

@@ -9,6 +9,7 @@ package openai
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
@@ -30,11 +31,13 @@ func writeResponsesSSEChunk(w io.Writer, chunk []byte) {
if _, err := w.Write(chunk); err != nil {
return
}
if bytes.HasSuffix(chunk, []byte("\n\n")) {
if bytes.HasSuffix(chunk, []byte("\n\n")) || bytes.HasSuffix(chunk, []byte("\r\n\r\n")) {
return
}
suffix := []byte("\n\n")
if bytes.HasSuffix(chunk, []byte("\n")) {
if bytes.HasSuffix(chunk, []byte("\r\n")) {
suffix = []byte("\r\n")
} else if bytes.HasSuffix(chunk, []byte("\n")) {
suffix = []byte("\n")
}
if _, err := w.Write(suffix); err != nil {
@@ -42,6 +45,156 @@ func writeResponsesSSEChunk(w io.Writer, chunk []byte) {
}
}
type responsesSSEFramer struct {
pending []byte
}
func (f *responsesSSEFramer) WriteChunk(w io.Writer, chunk []byte) {
if len(chunk) == 0 {
return
}
if responsesSSENeedsLineBreak(f.pending, chunk) {
f.pending = append(f.pending, '\n')
}
f.pending = append(f.pending, chunk...)
for {
frameLen := responsesSSEFrameLen(f.pending)
if frameLen == 0 {
break
}
writeResponsesSSEChunk(w, f.pending[:frameLen])
copy(f.pending, f.pending[frameLen:])
f.pending = f.pending[:len(f.pending)-frameLen]
}
if len(bytes.TrimSpace(f.pending)) == 0 {
f.pending = f.pending[:0]
return
}
if len(f.pending) == 0 || !responsesSSECanEmitWithoutDelimiter(f.pending) {
return
}
writeResponsesSSEChunk(w, f.pending)
f.pending = f.pending[:0]
}
func (f *responsesSSEFramer) Flush(w io.Writer) {
if len(f.pending) == 0 {
return
}
if len(bytes.TrimSpace(f.pending)) == 0 {
f.pending = f.pending[:0]
return
}
if !responsesSSECanEmitWithoutDelimiter(f.pending) {
f.pending = f.pending[:0]
return
}
writeResponsesSSEChunk(w, f.pending)
f.pending = f.pending[:0]
}
func responsesSSEFrameLen(chunk []byte) int {
if len(chunk) == 0 {
return 0
}
lf := bytes.Index(chunk, []byte("\n\n"))
crlf := bytes.Index(chunk, []byte("\r\n\r\n"))
switch {
case lf < 0:
if crlf < 0 {
return 0
}
return crlf + 4
case crlf < 0:
return lf + 2
case lf < crlf:
return lf + 2
default:
return crlf + 4
}
}
func responsesSSENeedsMoreData(chunk []byte) bool {
trimmed := bytes.TrimSpace(chunk)
if len(trimmed) == 0 {
return false
}
return responsesSSEHasField(trimmed, []byte("event:")) && !responsesSSEHasField(trimmed, []byte("data:"))
}
func responsesSSEHasField(chunk []byte, prefix []byte) bool {
s := chunk
for len(s) > 0 {
line := s
if i := bytes.IndexByte(s, '\n'); i >= 0 {
line = s[:i]
s = s[i+1:]
} else {
s = nil
}
line = bytes.TrimSpace(line)
if bytes.HasPrefix(line, prefix) {
return true
}
}
return false
}
func responsesSSECanEmitWithoutDelimiter(chunk []byte) bool {
trimmed := bytes.TrimSpace(chunk)
if len(trimmed) == 0 || responsesSSENeedsMoreData(trimmed) || !responsesSSEHasField(trimmed, []byte("data:")) {
return false
}
return responsesSSEDataLinesValid(trimmed)
}
func responsesSSEDataLinesValid(chunk []byte) bool {
s := chunk
for len(s) > 0 {
line := s
if i := bytes.IndexByte(s, '\n'); i >= 0 {
line = s[:i]
s = s[i+1:]
} else {
s = nil
}
line = bytes.TrimSpace(line)
if len(line) == 0 || !bytes.HasPrefix(line, []byte("data:")) {
continue
}
data := bytes.TrimSpace(line[len("data:"):])
if len(data) == 0 || bytes.Equal(data, []byte("[DONE]")) {
continue
}
if !json.Valid(data) {
return false
}
}
return true
}
func responsesSSENeedsLineBreak(pending, chunk []byte) bool {
if len(pending) == 0 || len(chunk) == 0 {
return false
}
if bytes.HasSuffix(pending, []byte("\n")) || bytes.HasSuffix(pending, []byte("\r")) {
return false
}
if chunk[0] == '\n' || chunk[0] == '\r' {
return false
}
trimmed := bytes.TrimLeft(chunk, " \t")
if len(trimmed) == 0 {
return false
}
for _, prefix := range [][]byte{[]byte("data:"), []byte("event:"), []byte("id:"), []byte("retry:"), []byte(":")} {
if bytes.HasPrefix(trimmed, prefix) {
return true
}
}
return false
}
// OpenAIResponsesAPIHandler contains the handlers for OpenAIResponses API endpoints.
// It holds a pool of clients to interact with the backend service.
type OpenAIResponsesAPIHandler struct {
@@ -254,6 +407,7 @@ func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJ
c.Header("Connection", "keep-alive")
c.Header("Access-Control-Allow-Origin", "*")
}
framer := &responsesSSEFramer{}
// Peek at the first chunk
for {
@@ -291,11 +445,11 @@ func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJ
handlers.WriteUpstreamHeaders(c.Writer.Header(), upstreamHeaders)
// Write first chunk logic (matching forwardResponsesStream)
writeResponsesSSEChunk(c.Writer, chunk)
framer.WriteChunk(c.Writer, chunk)
flusher.Flush()
// Continue
h.forwardResponsesStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
h.forwardResponsesStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan, framer)
return
}
}
@@ -413,12 +567,16 @@ func (h *OpenAIResponsesAPIHandler) forwardChatAsResponsesStream(c *gin.Context,
})
}
func (h *OpenAIResponsesAPIHandler) forwardResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
func (h *OpenAIResponsesAPIHandler) forwardResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage, framer *responsesSSEFramer) {
if framer == nil {
framer = &responsesSSEFramer{}
}
h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
WriteChunk: func(chunk []byte) {
writeResponsesSSEChunk(c.Writer, chunk)
framer.WriteChunk(c.Writer, chunk)
},
WriteTerminalError: func(errMsg *interfaces.ErrorMessage) {
framer.Flush(c.Writer)
if errMsg == nil {
return
}
@@ -434,6 +592,7 @@ func (h *OpenAIResponsesAPIHandler) forwardResponsesStream(c *gin.Context, flush
_, _ = fmt.Fprintf(c.Writer, "\nevent: error\ndata: %s\n\n", string(chunk))
},
WriteDone: func() {
framer.Flush(c.Writer)
_, _ = c.Writer.Write([]byte("\n"))
},
})

View File

@@ -32,7 +32,7 @@ func TestForwardResponsesStreamTerminalErrorUsesResponsesErrorChunk(t *testing.T
errs <- &interfaces.ErrorMessage{StatusCode: http.StatusInternalServerError, Error: errors.New("unexpected EOF")}
close(errs)
h.forwardResponsesStream(c, flusher, func(error) {}, data, errs)
h.forwardResponsesStream(c, flusher, func(error) {}, data, errs, nil)
body := recorder.Body.String()
if !strings.Contains(body, `"type":"error"`) {
t.Fatalf("expected responses error chunk, got: %q", body)

View File

@@ -12,7 +12,9 @@ import (
sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
)
func TestForwardResponsesStreamSeparatesDataOnlySSEChunks(t *testing.T) {
func newResponsesStreamTestHandler(t *testing.T) (*OpenAIResponsesAPIHandler, *httptest.ResponseRecorder, *gin.Context, http.Flusher) {
t.Helper()
gin.SetMode(gin.TestMode)
base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, nil)
h := NewOpenAIResponsesAPIHandler(base)
@@ -26,6 +28,12 @@ func TestForwardResponsesStreamSeparatesDataOnlySSEChunks(t *testing.T) {
t.Fatalf("expected gin writer to implement http.Flusher")
}
return h, recorder, c, flusher
}
func TestForwardResponsesStreamSeparatesDataOnlySSEChunks(t *testing.T) {
h, recorder, c, flusher := newResponsesStreamTestHandler(t)
data := make(chan []byte, 2)
errs := make(chan *interfaces.ErrorMessage)
data <- []byte("data: {\"type\":\"response.output_item.done\",\"item\":{\"type\":\"function_call\",\"arguments\":\"{}\"}}")
@@ -33,7 +41,7 @@ func TestForwardResponsesStreamSeparatesDataOnlySSEChunks(t *testing.T) {
close(data)
close(errs)
h.forwardResponsesStream(c, flusher, func(error) {}, data, errs)
h.forwardResponsesStream(c, flusher, func(error) {}, data, errs, nil)
body := recorder.Body.String()
parts := strings.Split(strings.TrimSpace(body), "\n\n")
if len(parts) != 2 {
@@ -50,3 +58,85 @@ func TestForwardResponsesStreamSeparatesDataOnlySSEChunks(t *testing.T) {
t.Errorf("unexpected second event.\nGot: %q\nWant: %q", parts[1], expectedPart2)
}
}
func TestForwardResponsesStreamReassemblesSplitSSEEventChunks(t *testing.T) {
h, recorder, c, flusher := newResponsesStreamTestHandler(t)
data := make(chan []byte, 3)
errs := make(chan *interfaces.ErrorMessage)
data <- []byte("event: response.created")
data <- []byte("data: {\"type\":\"response.created\",\"response\":{\"id\":\"resp-1\"}}")
data <- []byte("\n")
close(data)
close(errs)
h.forwardResponsesStream(c, flusher, func(error) {}, data, errs, nil)
got := strings.TrimSuffix(recorder.Body.String(), "\n")
want := "event: response.created\ndata: {\"type\":\"response.created\",\"response\":{\"id\":\"resp-1\"}}\n\n"
if got != want {
t.Fatalf("unexpected split-event framing.\nGot: %q\nWant: %q", got, want)
}
}
func TestForwardResponsesStreamPreservesValidFullSSEEventChunks(t *testing.T) {
h, recorder, c, flusher := newResponsesStreamTestHandler(t)
data := make(chan []byte, 1)
errs := make(chan *interfaces.ErrorMessage)
chunk := []byte("event: response.created\ndata: {\"type\":\"response.created\",\"response\":{\"id\":\"resp-1\"}}\n\n")
data <- chunk
close(data)
close(errs)
h.forwardResponsesStream(c, flusher, func(error) {}, data, errs, nil)
got := strings.TrimSuffix(recorder.Body.String(), "\n")
if got != string(chunk) {
t.Fatalf("unexpected full-event framing.\nGot: %q\nWant: %q", got, string(chunk))
}
}
func TestForwardResponsesStreamBuffersSplitDataPayloadChunks(t *testing.T) {
h, recorder, c, flusher := newResponsesStreamTestHandler(t)
data := make(chan []byte, 2)
errs := make(chan *interfaces.ErrorMessage)
data <- []byte("data: {\"type\":\"response.created\"")
data <- []byte(",\"response\":{\"id\":\"resp-1\"}}")
close(data)
close(errs)
h.forwardResponsesStream(c, flusher, func(error) {}, data, errs, nil)
got := recorder.Body.String()
want := "data: {\"type\":\"response.created\",\"response\":{\"id\":\"resp-1\"}}\n\n\n"
if got != want {
t.Fatalf("unexpected split-data framing.\nGot: %q\nWant: %q", got, want)
}
}
func TestResponsesSSENeedsLineBreakSkipsChunksThatAlreadyStartWithNewline(t *testing.T) {
if responsesSSENeedsLineBreak([]byte("event: response.created"), []byte("\n")) {
t.Fatal("expected no injected newline before newline-only chunk")
}
if responsesSSENeedsLineBreak([]byte("event: response.created"), []byte("\r\n")) {
t.Fatal("expected no injected newline before CRLF chunk")
}
}
func TestForwardResponsesStreamDropsIncompleteTrailingDataChunkOnFlush(t *testing.T) {
h, recorder, c, flusher := newResponsesStreamTestHandler(t)
data := make(chan []byte, 1)
errs := make(chan *interfaces.ErrorMessage)
data <- []byte("data: {\"type\":\"response.created\"")
close(data)
close(errs)
h.forwardResponsesStream(c, flusher, func(error) {}, data, errs, nil)
if got := recorder.Body.String(); got != "\n" {
t.Fatalf("expected incomplete trailing data to be dropped on flush.\nGot: %q", got)
}
}

View File

@@ -32,7 +32,7 @@ const (
wsEventTypeCompleted = "response.completed"
wsDoneMarker = "[DONE]"
wsTurnStateHeader = "x-codex-turn-state"
wsRequestBodyKey = "REQUEST_BODY_OVERRIDE"
wsTimelineBodyKey = "WEBSOCKET_TIMELINE_OVERRIDE"
)
var responsesWebsocketUpgrader = websocket.Upgrader{
@@ -57,10 +57,11 @@ func (h *OpenAIResponsesAPIHandler) ResponsesWebsocket(c *gin.Context) {
clientIP := websocketClientAddress(c)
log.Infof("responses websocket: client connected id=%s remote=%s", passthroughSessionID, clientIP)
var wsTerminateErr error
var wsBodyLog strings.Builder
var wsTimelineLog strings.Builder
defer func() {
releaseResponsesWebsocketToolCaches(downstreamSessionKey)
if wsTerminateErr != nil {
appendWebsocketTimelineDisconnect(&wsTimelineLog, wsTerminateErr, time.Now())
// log.Infof("responses websocket: session closing id=%s reason=%v", passthroughSessionID, wsTerminateErr)
} else {
log.Infof("responses websocket: session closing id=%s", passthroughSessionID)
@@ -69,7 +70,7 @@ func (h *OpenAIResponsesAPIHandler) ResponsesWebsocket(c *gin.Context) {
h.AuthManager.CloseExecutionSession(passthroughSessionID)
log.Infof("responses websocket: upstream execution session closed id=%s", passthroughSessionID)
}
setWebsocketRequestBody(c, wsBodyLog.String())
setWebsocketTimelineBody(c, wsTimelineLog.String())
if errClose := conn.Close(); errClose != nil {
log.Warnf("responses websocket: close connection error: %v", errClose)
}
@@ -83,7 +84,6 @@ func (h *OpenAIResponsesAPIHandler) ResponsesWebsocket(c *gin.Context) {
msgType, payload, errReadMessage := conn.ReadMessage()
if errReadMessage != nil {
wsTerminateErr = errReadMessage
appendWebsocketEvent(&wsBodyLog, "disconnect", []byte(errReadMessage.Error()))
if websocket.IsCloseError(errReadMessage, websocket.CloseNormalClosure, websocket.CloseGoingAway, websocket.CloseNoStatusReceived) {
log.Infof("responses websocket: client disconnected id=%s error=%v", passthroughSessionID, errReadMessage)
} else {
@@ -101,7 +101,7 @@ func (h *OpenAIResponsesAPIHandler) ResponsesWebsocket(c *gin.Context) {
// websocketPayloadEventType(payload),
// websocketPayloadPreview(payload),
// )
appendWebsocketEvent(&wsBodyLog, "request", payload)
appendWebsocketTimelineEvent(&wsTimelineLog, "request", payload, time.Now())
allowIncrementalInputWithPreviousResponseID := false
if pinnedAuthID != "" && h != nil && h.AuthManager != nil {
@@ -128,8 +128,7 @@ func (h *OpenAIResponsesAPIHandler) ResponsesWebsocket(c *gin.Context) {
if errMsg != nil {
h.LoggingAPIResponseError(context.WithValue(context.Background(), "gin", c), errMsg)
markAPIResponseTimestamp(c)
errorPayload, errWrite := writeResponsesWebsocketError(conn, errMsg)
appendWebsocketEvent(&wsBodyLog, "response", errorPayload)
errorPayload, errWrite := writeResponsesWebsocketError(conn, &wsTimelineLog, errMsg)
log.Infof(
"responses websocket: downstream_out id=%s type=%d event=%s payload=%s",
passthroughSessionID,
@@ -157,9 +156,8 @@ func (h *OpenAIResponsesAPIHandler) ResponsesWebsocket(c *gin.Context) {
}
lastRequest = updatedLastRequest
lastResponseOutput = []byte("[]")
if errWrite := writeResponsesWebsocketSyntheticPrewarm(c, conn, requestJSON, &wsBodyLog, passthroughSessionID); errWrite != nil {
if errWrite := writeResponsesWebsocketSyntheticPrewarm(c, conn, requestJSON, &wsTimelineLog, passthroughSessionID); errWrite != nil {
wsTerminateErr = errWrite
appendWebsocketEvent(&wsBodyLog, "disconnect", []byte(errWrite.Error()))
return
}
continue
@@ -192,10 +190,9 @@ func (h *OpenAIResponsesAPIHandler) ResponsesWebsocket(c *gin.Context) {
}
dataChan, _, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, requestJSON, "")
completedOutput, errForward := h.forwardResponsesWebsocket(c, conn, cliCancel, dataChan, errChan, &wsBodyLog, passthroughSessionID)
completedOutput, errForward := h.forwardResponsesWebsocket(c, conn, cliCancel, dataChan, errChan, &wsTimelineLog, passthroughSessionID)
if errForward != nil {
wsTerminateErr = errForward
appendWebsocketEvent(&wsBodyLog, "disconnect", []byte(errForward.Error()))
log.Warnf("responses websocket: forward failed id=%s error=%v", passthroughSessionID, errForward)
return
}
@@ -382,7 +379,7 @@ func shouldReplaceWebsocketTranscript(rawJSON []byte, nextInput gjson.Result) bo
for _, item := range nextInput.Array() {
switch strings.TrimSpace(item.Get("type").String()) {
case "function_call":
case "function_call", "custom_tool_call":
return true
case "message":
role := strings.TrimSpace(item.Get("role").String())
@@ -434,7 +431,7 @@ func dedupeFunctionCallsByCallID(rawArray string) (string, error) {
continue
}
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
if itemType == "function_call" {
if isResponsesToolCallType(itemType) {
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
if callID != "" {
if _, ok := seenCallIDs[callID]; ok {
@@ -597,7 +594,7 @@ func writeResponsesWebsocketSyntheticPrewarm(
c *gin.Context,
conn *websocket.Conn,
requestJSON []byte,
wsBodyLog *strings.Builder,
wsTimelineLog *strings.Builder,
sessionID string,
) error {
payloads, errPayloads := syntheticResponsesWebsocketPrewarmPayloads(requestJSON)
@@ -606,7 +603,6 @@ func writeResponsesWebsocketSyntheticPrewarm(
}
for i := 0; i < len(payloads); i++ {
markAPIResponseTimestamp(c)
appendWebsocketEvent(wsBodyLog, "response", payloads[i])
// log.Infof(
// "responses websocket: downstream_out id=%s type=%d event=%s payload=%s",
// sessionID,
@@ -614,7 +610,7 @@ func writeResponsesWebsocketSyntheticPrewarm(
// websocketPayloadEventType(payloads[i]),
// websocketPayloadPreview(payloads[i]),
// )
if errWrite := conn.WriteMessage(websocket.TextMessage, payloads[i]); errWrite != nil {
if errWrite := writeResponsesWebsocketPayload(conn, wsTimelineLog, payloads[i], time.Now()); errWrite != nil {
log.Warnf(
"responses websocket: downstream_out write failed id=%s event=%s error=%v",
sessionID,
@@ -713,7 +709,7 @@ func (h *OpenAIResponsesAPIHandler) forwardResponsesWebsocket(
cancel handlers.APIHandlerCancelFunc,
data <-chan []byte,
errs <-chan *interfaces.ErrorMessage,
wsBodyLog *strings.Builder,
wsTimelineLog *strings.Builder,
sessionID string,
) ([]byte, error) {
completed := false
@@ -736,8 +732,7 @@ func (h *OpenAIResponsesAPIHandler) forwardResponsesWebsocket(
if errMsg != nil {
h.LoggingAPIResponseError(context.WithValue(context.Background(), "gin", c), errMsg)
markAPIResponseTimestamp(c)
errorPayload, errWrite := writeResponsesWebsocketError(conn, errMsg)
appendWebsocketEvent(wsBodyLog, "response", errorPayload)
errorPayload, errWrite := writeResponsesWebsocketError(conn, wsTimelineLog, errMsg)
log.Infof(
"responses websocket: downstream_out id=%s type=%d event=%s payload=%s",
sessionID,
@@ -771,8 +766,7 @@ func (h *OpenAIResponsesAPIHandler) forwardResponsesWebsocket(
}
h.LoggingAPIResponseError(context.WithValue(context.Background(), "gin", c), errMsg)
markAPIResponseTimestamp(c)
errorPayload, errWrite := writeResponsesWebsocketError(conn, errMsg)
appendWebsocketEvent(wsBodyLog, "response", errorPayload)
errorPayload, errWrite := writeResponsesWebsocketError(conn, wsTimelineLog, errMsg)
log.Infof(
"responses websocket: downstream_out id=%s type=%d event=%s payload=%s",
sessionID,
@@ -806,7 +800,6 @@ func (h *OpenAIResponsesAPIHandler) forwardResponsesWebsocket(
completedOutput = responseCompletedOutputFromPayload(payloads[i])
}
markAPIResponseTimestamp(c)
appendWebsocketEvent(wsBodyLog, "response", payloads[i])
// log.Infof(
// "responses websocket: downstream_out id=%s type=%d event=%s payload=%s",
// sessionID,
@@ -814,7 +807,7 @@ func (h *OpenAIResponsesAPIHandler) forwardResponsesWebsocket(
// websocketPayloadEventType(payloads[i]),
// websocketPayloadPreview(payloads[i]),
// )
if errWrite := conn.WriteMessage(websocket.TextMessage, payloads[i]); errWrite != nil {
if errWrite := writeResponsesWebsocketPayload(conn, wsTimelineLog, payloads[i], time.Now()); errWrite != nil {
log.Warnf(
"responses websocket: downstream_out write failed id=%s event=%s error=%v",
sessionID,
@@ -870,7 +863,7 @@ func websocketJSONPayloadsFromChunk(chunk []byte) [][]byte {
return payloads
}
func writeResponsesWebsocketError(conn *websocket.Conn, errMsg *interfaces.ErrorMessage) ([]byte, error) {
func writeResponsesWebsocketError(conn *websocket.Conn, wsTimelineLog *strings.Builder, errMsg *interfaces.ErrorMessage) ([]byte, error) {
status := http.StatusInternalServerError
errText := http.StatusText(status)
if errMsg != nil {
@@ -940,7 +933,7 @@ func writeResponsesWebsocketError(conn *websocket.Conn, errMsg *interfaces.Error
}
}
return payload, conn.WriteMessage(websocket.TextMessage, payload)
return payload, writeResponsesWebsocketPayload(conn, wsTimelineLog, payload, time.Now())
}
func appendWebsocketEvent(builder *strings.Builder, eventType string, payload []byte) {
@@ -979,7 +972,11 @@ func websocketPayloadPreview(payload []byte) string {
return previewText
}
func setWebsocketRequestBody(c *gin.Context, body string) {
func setWebsocketTimelineBody(c *gin.Context, body string) {
setWebsocketBody(c, wsTimelineBodyKey, body)
}
func setWebsocketBody(c *gin.Context, key string, body string) {
if c == nil {
return
}
@@ -987,7 +984,40 @@ func setWebsocketRequestBody(c *gin.Context, body string) {
if trimmedBody == "" {
return
}
c.Set(wsRequestBodyKey, []byte(trimmedBody))
c.Set(key, []byte(trimmedBody))
}
func writeResponsesWebsocketPayload(conn *websocket.Conn, wsTimelineLog *strings.Builder, payload []byte, timestamp time.Time) error {
appendWebsocketTimelineEvent(wsTimelineLog, "response", payload, timestamp)
return conn.WriteMessage(websocket.TextMessage, payload)
}
func appendWebsocketTimelineDisconnect(builder *strings.Builder, err error, timestamp time.Time) {
if err == nil {
return
}
appendWebsocketTimelineEvent(builder, "disconnect", []byte(err.Error()), timestamp)
}
func appendWebsocketTimelineEvent(builder *strings.Builder, eventType string, payload []byte, timestamp time.Time) {
if builder == nil {
return
}
trimmedPayload := bytes.TrimSpace(payload)
if len(trimmedPayload) == 0 {
return
}
if builder.Len() > 0 {
builder.WriteString("\n")
}
builder.WriteString("Timestamp: ")
builder.WriteString(timestamp.Format(time.RFC3339Nano))
builder.WriteString("\n")
builder.WriteString("Event: websocket.")
builder.WriteString(eventType)
builder.WriteString("\n")
builder.Write(trimmedPayload)
builder.WriteString("\n")
}
func markAPIResponseTimestamp(c *gin.Context) {

View File

@@ -392,27 +392,45 @@ func TestAppendWebsocketEvent(t *testing.T) {
}
}
func TestSetWebsocketRequestBody(t *testing.T) {
func TestAppendWebsocketTimelineEvent(t *testing.T) {
var builder strings.Builder
ts := time.Date(2026, time.April, 1, 12, 34, 56, 789000000, time.UTC)
appendWebsocketTimelineEvent(&builder, "request", []byte(" {\"type\":\"response.create\"}\n"), ts)
got := builder.String()
if !strings.Contains(got, "Timestamp: 2026-04-01T12:34:56.789Z") {
t.Fatalf("timeline timestamp not found: %s", got)
}
if !strings.Contains(got, "Event: websocket.request") {
t.Fatalf("timeline event not found: %s", got)
}
if !strings.Contains(got, "{\"type\":\"response.create\"}") {
t.Fatalf("timeline payload not found: %s", got)
}
}
func TestSetWebsocketTimelineBody(t *testing.T) {
gin.SetMode(gin.TestMode)
recorder := httptest.NewRecorder()
c, _ := gin.CreateTestContext(recorder)
setWebsocketRequestBody(c, " \n ")
if _, exists := c.Get(wsRequestBodyKey); exists {
t.Fatalf("request body key should not be set for empty body")
setWebsocketTimelineBody(c, " \n ")
if _, exists := c.Get(wsTimelineBodyKey); exists {
t.Fatalf("timeline body key should not be set for empty body")
}
setWebsocketRequestBody(c, "event body")
value, exists := c.Get(wsRequestBodyKey)
setWebsocketTimelineBody(c, "timeline body")
value, exists := c.Get(wsTimelineBodyKey)
if !exists {
t.Fatalf("request body key not set")
t.Fatalf("timeline body key not set")
}
bodyBytes, ok := value.([]byte)
if !ok {
t.Fatalf("request body key type mismatch")
t.Fatalf("timeline body key type mismatch")
}
if string(bodyBytes) != "event body" {
t.Fatalf("request body = %q, want %q", string(bodyBytes), "event body")
if string(bodyBytes) != "timeline body" {
t.Fatalf("timeline body = %q, want %q", string(bodyBytes), "timeline body")
}
}
@@ -502,6 +520,92 @@ func TestRepairResponsesWebsocketToolCallsDropsOrphanOutputWhenCallMissing(t *te
}
}
func TestRepairResponsesWebsocketToolCallsInsertsCachedCustomToolOutput(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
cacheWarm := []byte(`{"previous_response_id":"resp-1","input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"}]}`)
warmed := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, cacheWarm)
if gjson.GetBytes(warmed, "input.0.call_id").String() != "call-1" {
t.Fatalf("expected warmup output to remain")
}
raw := []byte(`{"input":[{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"},{"type":"message","id":"msg-1"}]}`)
repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw)
input := gjson.GetBytes(repaired, "input").Array()
if len(input) != 3 {
t.Fatalf("repaired input len = %d, want 3", len(input))
}
if input[0].Get("type").String() != "custom_tool_call" || input[0].Get("call_id").String() != "call-1" {
t.Fatalf("unexpected first item: %s", input[0].Raw)
}
if input[1].Get("type").String() != "custom_tool_call_output" || input[1].Get("call_id").String() != "call-1" {
t.Fatalf("missing inserted output: %s", input[1].Raw)
}
if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" {
t.Fatalf("unexpected trailing item: %s", input[2].Raw)
}
}
func TestRepairResponsesWebsocketToolCallsDropsOrphanCustomToolCall(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
raw := []byte(`{"input":[{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"},{"type":"message","id":"msg-1"}]}`)
repaired := repairResponsesWebsocketToolCallsWithCache(cache, sessionKey, raw)
input := gjson.GetBytes(repaired, "input").Array()
if len(input) != 1 {
t.Fatalf("repaired input len = %d, want 1", len(input))
}
if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" {
t.Fatalf("unexpected remaining item: %s", input[0].Raw)
}
}
func TestRepairResponsesWebsocketToolCallsInsertsCachedCustomToolCallForOrphanOutput(t *testing.T) {
outputCache := newWebsocketToolOutputCache(time.Minute, 10)
callCache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
callCache.record(sessionKey, "call-1", []byte(`{"type":"custom_tool_call","call_id":"call-1","name":"apply_patch"}`))
raw := []byte(`{"input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`)
repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw)
input := gjson.GetBytes(repaired, "input").Array()
if len(input) != 3 {
t.Fatalf("repaired input len = %d, want 3", len(input))
}
if input[0].Get("type").String() != "custom_tool_call" || input[0].Get("call_id").String() != "call-1" {
t.Fatalf("missing inserted call: %s", input[0].Raw)
}
if input[1].Get("type").String() != "custom_tool_call_output" || input[1].Get("call_id").String() != "call-1" {
t.Fatalf("unexpected output item: %s", input[1].Raw)
}
if input[2].Get("type").String() != "message" || input[2].Get("id").String() != "msg-1" {
t.Fatalf("unexpected trailing item: %s", input[2].Raw)
}
}
func TestRepairResponsesWebsocketToolCallsDropsOrphanCustomToolOutputWhenCallMissing(t *testing.T) {
outputCache := newWebsocketToolOutputCache(time.Minute, 10)
callCache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
raw := []byte(`{"input":[{"type":"custom_tool_call_output","call_id":"call-1","output":"ok"},{"type":"message","id":"msg-1"}]}`)
repaired := repairResponsesWebsocketToolCallsWithCaches(outputCache, callCache, sessionKey, raw)
input := gjson.GetBytes(repaired, "input").Array()
if len(input) != 1 {
t.Fatalf("repaired input len = %d, want 1", len(input))
}
if input[0].Get("type").String() != "message" || input[0].Get("id").String() != "msg-1" {
t.Fatalf("unexpected remaining item: %s", input[0].Raw)
}
}
func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
@@ -518,6 +622,38 @@ func TestRecordResponsesWebsocketToolCallsFromPayloadWithCache(t *testing.T) {
}
}
func TestRecordResponsesWebsocketCustomToolCallsFromCompletedPayloadWithCache(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
payload := []byte(`{"type":"response.completed","response":{"id":"resp-1","output":[{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch","input":"*** Begin Patch"}]}}`)
recordResponsesWebsocketToolCallsFromPayloadWithCache(cache, sessionKey, payload)
cached, ok := cache.get(sessionKey, "call-1")
if !ok {
t.Fatalf("expected cached custom tool call")
}
if gjson.GetBytes(cached, "type").String() != "custom_tool_call" || gjson.GetBytes(cached, "call_id").String() != "call-1" {
t.Fatalf("unexpected cached custom tool call: %s", cached)
}
}
func TestRecordResponsesWebsocketCustomToolCallsFromOutputItemDoneWithCache(t *testing.T) {
cache := newWebsocketToolOutputCache(time.Minute, 10)
sessionKey := "session-1"
payload := []byte(`{"type":"response.output_item.done","item":{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch","input":"*** Begin Patch"}}`)
recordResponsesWebsocketToolCallsFromPayloadWithCache(cache, sessionKey, payload)
cached, ok := cache.get(sessionKey, "call-1")
if !ok {
t.Fatalf("expected cached custom tool call")
}
if gjson.GetBytes(cached, "type").String() != "custom_tool_call" || gjson.GetBytes(cached, "call_id").String() != "call-1" {
t.Fatalf("unexpected cached custom tool call: %s", cached)
}
}
func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) {
gin.SetMode(gin.TestMode)
@@ -544,14 +680,14 @@ func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) {
close(data)
close(errCh)
var bodyLog strings.Builder
var timelineLog strings.Builder
completedOutput, err := (*OpenAIResponsesAPIHandler)(nil).forwardResponsesWebsocket(
ctx,
conn,
func(...interface{}) {},
data,
errCh,
&bodyLog,
&timelineLog,
"session-1",
)
if err != nil {
@@ -562,6 +698,10 @@ func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) {
serverErrCh <- errors.New("completed output not captured")
return
}
if !strings.Contains(timelineLog.String(), "Event: websocket.response") {
serverErrCh <- errors.New("websocket timeline did not capture downstream response")
return
}
serverErrCh <- nil
}))
defer server.Close()
@@ -594,6 +734,116 @@ func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) {
}
}
func TestForwardResponsesWebsocketLogsAttemptedResponseOnWriteFailure(t *testing.T) {
gin.SetMode(gin.TestMode)
serverErrCh := make(chan error, 1)
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
conn, err := responsesWebsocketUpgrader.Upgrade(w, r, nil)
if err != nil {
serverErrCh <- err
return
}
ctx, _ := gin.CreateTestContext(httptest.NewRecorder())
ctx.Request = r
data := make(chan []byte, 1)
errCh := make(chan *interfaces.ErrorMessage)
data <- []byte("data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp-1\",\"output\":[{\"type\":\"message\",\"id\":\"out-1\"}]}}\n\n")
close(data)
close(errCh)
var timelineLog strings.Builder
if errClose := conn.Close(); errClose != nil {
serverErrCh <- errClose
return
}
_, err = (*OpenAIResponsesAPIHandler)(nil).forwardResponsesWebsocket(
ctx,
conn,
func(...interface{}) {},
data,
errCh,
&timelineLog,
"session-1",
)
if err == nil {
serverErrCh <- errors.New("expected websocket write failure")
return
}
if !strings.Contains(timelineLog.String(), "Event: websocket.response") {
serverErrCh <- errors.New("websocket timeline did not capture attempted downstream response")
return
}
if !strings.Contains(timelineLog.String(), "\"type\":\"response.completed\"") {
serverErrCh <- errors.New("websocket timeline did not retain attempted payload")
return
}
serverErrCh <- nil
}))
defer server.Close()
wsURL := "ws" + strings.TrimPrefix(server.URL, "http")
conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
if err != nil {
t.Fatalf("dial websocket: %v", err)
}
defer func() {
_ = conn.Close()
}()
if errServer := <-serverErrCh; errServer != nil {
t.Fatalf("server error: %v", errServer)
}
}
func TestResponsesWebsocketTimelineRecordsDisconnectEvent(t *testing.T) {
gin.SetMode(gin.TestMode)
manager := coreauth.NewManager(nil, nil, nil)
base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager)
h := NewOpenAIResponsesAPIHandler(base)
timelineCh := make(chan string, 1)
router := gin.New()
router.GET("/v1/responses/ws", func(c *gin.Context) {
h.ResponsesWebsocket(c)
timeline := ""
if value, exists := c.Get(wsTimelineBodyKey); exists {
if body, ok := value.([]byte); ok {
timeline = string(body)
}
}
timelineCh <- timeline
})
server := httptest.NewServer(router)
defer server.Close()
wsURL := "ws" + strings.TrimPrefix(server.URL, "http") + "/v1/responses/ws"
conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
if err != nil {
t.Fatalf("dial websocket: %v", err)
}
closePayload := websocket.FormatCloseMessage(websocket.CloseGoingAway, "client closing")
if err = conn.WriteControl(websocket.CloseMessage, closePayload, time.Now().Add(time.Second)); err != nil {
t.Fatalf("write close control: %v", err)
}
_ = conn.Close()
select {
case timeline := <-timelineCh:
if !strings.Contains(timeline, "Event: websocket.disconnect") {
t.Fatalf("websocket timeline missing disconnect event: %s", timeline)
}
case <-time.After(5 * time.Second):
t.Fatal("timed out waiting for websocket timeline")
}
}
func TestWebsocketUpstreamSupportsIncrementalInputForModel(t *testing.T) {
manager := coreauth.NewManager(nil, nil, nil)
auth := &coreauth.Auth{
@@ -891,6 +1141,161 @@ func TestNormalizeResponsesWebsocketRequestDropsDuplicateFunctionCallsByCallID(t
}
}
func TestNormalizeResponsesWebsocketRequestTreatsCustomToolTranscriptReplacementAsReset(t *testing.T) {
lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"message","id":"msg-1"},{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-1","call_id":"call-1"},{"type":"message","id":"assistant-1","role":"assistant"}]}`)
lastResponseOutput := []byte(`[
{"type":"message","id":"assistant-1","role":"assistant"}
]`)
raw := []byte(`{"type":"response.create","input":[{"type":"custom_tool_call","id":"ctc-compact","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-compact","call_id":"call-1"},{"type":"message","id":"msg-2"}]}`)
normalized, next, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput)
if errMsg != nil {
t.Fatalf("unexpected error: %v", errMsg.Error)
}
if gjson.GetBytes(normalized, "previous_response_id").Exists() {
t.Fatalf("previous_response_id must not exist in transcript replacement mode")
}
items := gjson.GetBytes(normalized, "input").Array()
if len(items) != 3 {
t.Fatalf("replacement input len = %d, want 3: %s", len(items), normalized)
}
if items[0].Get("id").String() != "ctc-compact" ||
items[1].Get("id").String() != "tool-out-compact" ||
items[2].Get("id").String() != "msg-2" {
t.Fatalf("replacement transcript was not preserved as-is: %s", normalized)
}
if !bytes.Equal(next, normalized) {
t.Fatalf("next request snapshot should match replacement request")
}
}
func TestNormalizeResponsesWebsocketRequestDropsDuplicateCustomToolCallsByCallID(t *testing.T) {
lastRequest := []byte(`{"model":"test-model","stream":true,"input":[{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-1","call_id":"call-1"}]}`)
lastResponseOutput := []byte(`[
{"type":"custom_tool_call","id":"ctc-1","call_id":"call-1","name":"apply_patch"}
]`)
raw := []byte(`{"type":"response.create","input":[{"type":"message","id":"msg-2"}]}`)
normalized, _, errMsg := normalizeResponsesWebsocketRequest(raw, lastRequest, lastResponseOutput)
if errMsg != nil {
t.Fatalf("unexpected error: %v", errMsg.Error)
}
items := gjson.GetBytes(normalized, "input").Array()
if len(items) != 3 {
t.Fatalf("merged input len = %d, want 3: %s", len(items), normalized)
}
if items[0].Get("id").String() != "ctc-1" ||
items[1].Get("id").String() != "tool-out-1" ||
items[2].Get("id").String() != "msg-2" {
t.Fatalf("unexpected merged input order: %s", normalized)
}
}
func TestResponsesWebsocketCompactionResetsTurnStateOnCustomToolTranscriptReplacement(t *testing.T) {
gin.SetMode(gin.TestMode)
executor := &websocketCompactionCaptureExecutor{}
manager := coreauth.NewManager(nil, nil, nil)
manager.RegisterExecutor(executor)
auth := &coreauth.Auth{ID: "auth-sse", Provider: executor.Identifier(), Status: coreauth.StatusActive}
if _, err := manager.Register(context.Background(), auth); err != nil {
t.Fatalf("Register auth: %v", err)
}
registry.GetGlobalRegistry().RegisterClient(auth.ID, auth.Provider, []*registry.ModelInfo{{ID: "test-model"}})
t.Cleanup(func() {
registry.GetGlobalRegistry().UnregisterClient(auth.ID)
})
base := handlers.NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, manager)
h := NewOpenAIResponsesAPIHandler(base)
router := gin.New()
router.GET("/v1/responses/ws", h.ResponsesWebsocket)
router.POST("/v1/responses/compact", h.Compact)
server := httptest.NewServer(router)
defer server.Close()
wsURL := "ws" + strings.TrimPrefix(server.URL, "http") + "/v1/responses/ws"
conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
if err != nil {
t.Fatalf("dial websocket: %v", err)
}
defer func() {
if errClose := conn.Close(); errClose != nil {
t.Fatalf("close websocket: %v", errClose)
}
}()
requests := []string{
`{"type":"response.create","model":"test-model","input":[{"type":"message","id":"msg-1"}]}`,
`{"type":"response.create","input":[{"type":"custom_tool_call_output","call_id":"call-1","id":"tool-out-1"}]}`,
}
for i := range requests {
if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(requests[i])); errWrite != nil {
t.Fatalf("write websocket message %d: %v", i+1, errWrite)
}
_, payload, errReadMessage := conn.ReadMessage()
if errReadMessage != nil {
t.Fatalf("read websocket message %d: %v", i+1, errReadMessage)
}
if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted {
t.Fatalf("message %d payload type = %s, want %s", i+1, got, wsEventTypeCompleted)
}
}
compactResp, errPost := server.Client().Post(
server.URL+"/v1/responses/compact",
"application/json",
strings.NewReader(`{"model":"test-model","input":[{"type":"message","id":"summary-1"}]}`),
)
if errPost != nil {
t.Fatalf("compact request failed: %v", errPost)
}
if errClose := compactResp.Body.Close(); errClose != nil {
t.Fatalf("close compact response body: %v", errClose)
}
if compactResp.StatusCode != http.StatusOK {
t.Fatalf("compact status = %d, want %d", compactResp.StatusCode, http.StatusOK)
}
postCompact := `{"type":"response.create","input":[{"type":"custom_tool_call","id":"ctc-compact","call_id":"call-1","name":"apply_patch"},{"type":"custom_tool_call_output","id":"tool-out-compact","call_id":"call-1"},{"type":"message","id":"msg-2"}]}`
if errWrite := conn.WriteMessage(websocket.TextMessage, []byte(postCompact)); errWrite != nil {
t.Fatalf("write post-compact websocket message: %v", errWrite)
}
_, payload, errReadMessage := conn.ReadMessage()
if errReadMessage != nil {
t.Fatalf("read post-compact websocket message: %v", errReadMessage)
}
if got := gjson.GetBytes(payload, "type").String(); got != wsEventTypeCompleted {
t.Fatalf("post-compact payload type = %s, want %s", got, wsEventTypeCompleted)
}
executor.mu.Lock()
defer executor.mu.Unlock()
if executor.compactPayload == nil {
t.Fatalf("compact payload was not captured")
}
if len(executor.streamPayloads) != 3 {
t.Fatalf("stream payload count = %d, want 3", len(executor.streamPayloads))
}
merged := executor.streamPayloads[2]
items := gjson.GetBytes(merged, "input").Array()
if len(items) != 3 {
t.Fatalf("merged input len = %d, want 3: %s", len(items), merged)
}
if items[0].Get("id").String() != "ctc-compact" ||
items[1].Get("id").String() != "tool-out-compact" ||
items[2].Get("id").String() != "msg-2" {
t.Fatalf("unexpected post-compact input order: %s", merged)
}
if items[0].Get("call_id").String() != "call-1" {
t.Fatalf("post-compact custom tool call id = %s, want call-1", items[0].Get("call_id").String())
}
}
func TestResponsesWebsocketCompactionResetsTurnStateOnTranscriptReplacement(t *testing.T) {
gin.SetMode(gin.TestMode)

View File

@@ -266,15 +266,15 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
continue
}
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
switch itemType {
case "function_call_output":
switch {
case isResponsesToolCallOutputType(itemType):
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
if callID == "" {
continue
}
outputPresent[callID] = struct{}{}
outputCache.record(sessionKey, callID, item)
case "function_call":
case isResponsesToolCallType(itemType):
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
if callID == "" {
continue
@@ -293,7 +293,7 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
continue
}
itemType := strings.TrimSpace(gjson.GetBytes(item, "type").String())
if itemType == "function_call_output" {
if isResponsesToolCallOutputType(itemType) {
callID := strings.TrimSpace(gjson.GetBytes(item, "call_id").String())
if callID == "" {
// Upstream rejects tool outputs without a call_id; drop it.
@@ -325,7 +325,7 @@ func repairResponsesToolCallsArray(outputCache, callCache *websocketToolOutputCa
// Drop orphaned function_call_output items; upstream rejects transcripts with missing calls.
continue
}
if itemType != "function_call" {
if !isResponsesToolCallType(itemType) {
filtered = append(filtered, item)
continue
}
@@ -376,7 +376,7 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
return
}
for _, item := range output.Array() {
if strings.TrimSpace(item.Get("type").String()) != "function_call" {
if !isResponsesToolCallType(item.Get("type").String()) {
continue
}
callID := strings.TrimSpace(item.Get("call_id").String())
@@ -390,7 +390,7 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
if !item.Exists() || !item.IsObject() {
return
}
if strings.TrimSpace(item.Get("type").String()) != "function_call" {
if !isResponsesToolCallType(item.Get("type").String()) {
return
}
callID := strings.TrimSpace(item.Get("call_id").String())
@@ -400,3 +400,21 @@ func recordResponsesWebsocketToolCallsFromPayloadWithCache(cache *websocketToolO
cache.record(sessionKey, callID, json.RawMessage(item.Raw))
}
}
func isResponsesToolCallType(itemType string) bool {
switch strings.TrimSpace(itemType) {
case "function_call", "custom_tool_call":
return true
default:
return false
}
}
func isResponsesToolCallOutputType(itemType string) bool {
switch strings.TrimSpace(itemType) {
case "function_call_output", "custom_tool_call_output":
return true
default:
return false
}
}

View File

@@ -263,6 +263,7 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
if email, ok := metadata["email"].(string); ok && email != "" {
auth.Attributes["email"] = email
}
cliproxyauth.ApplyCustomHeadersFromMetadata(auth)
return auth, nil
}

View File

@@ -0,0 +1,68 @@
package auth
import "strings"
func ExtractCustomHeadersFromMetadata(metadata map[string]any) map[string]string {
if len(metadata) == 0 {
return nil
}
raw, ok := metadata["headers"]
if !ok || raw == nil {
return nil
}
out := make(map[string]string)
switch headers := raw.(type) {
case map[string]string:
for key, value := range headers {
name := strings.TrimSpace(key)
if name == "" {
continue
}
val := strings.TrimSpace(value)
if val == "" {
continue
}
out[name] = val
}
case map[string]any:
for key, value := range headers {
name := strings.TrimSpace(key)
if name == "" {
continue
}
rawVal, ok := value.(string)
if !ok {
continue
}
val := strings.TrimSpace(rawVal)
if val == "" {
continue
}
out[name] = val
}
default:
return nil
}
if len(out) == 0 {
return nil
}
return out
}
func ApplyCustomHeadersFromMetadata(auth *Auth) {
if auth == nil || len(auth.Metadata) == 0 {
return
}
headers := ExtractCustomHeadersFromMetadata(auth.Metadata)
if len(headers) == 0 {
return
}
if auth.Attributes == nil {
auth.Attributes = make(map[string]string)
}
for name, value := range headers {
auth.Attributes["header:"+name] = value
}
}

View File

@@ -0,0 +1,50 @@
package auth
import (
"reflect"
"testing"
)
func TestExtractCustomHeadersFromMetadata(t *testing.T) {
meta := map[string]any{
"headers": map[string]any{
" X-Test ": " value ",
"": "ignored",
"X-Empty": " ",
"X-Num": float64(1),
},
}
got := ExtractCustomHeadersFromMetadata(meta)
want := map[string]string{"X-Test": "value"}
if !reflect.DeepEqual(got, want) {
t.Fatalf("ExtractCustomHeadersFromMetadata() = %#v, want %#v", got, want)
}
}
func TestApplyCustomHeadersFromMetadata(t *testing.T) {
auth := &Auth{
Metadata: map[string]any{
"headers": map[string]string{
"X-Test": "new",
"X-Empty": " ",
},
},
Attributes: map[string]string{
"header:X-Test": "old",
"keep": "1",
},
}
ApplyCustomHeadersFromMetadata(auth)
if got := auth.Attributes["header:X-Test"]; got != "new" {
t.Fatalf("header:X-Test = %q, want %q", got, "new")
}
if _, ok := auth.Attributes["header:X-Empty"]; ok {
t.Fatalf("expected header:X-Empty to be absent, got %#v", auth.Attributes["header:X-Empty"])
}
if got := auth.Attributes["keep"]; got != "1" {
t.Fatalf("keep = %q, want %q", got, "1")
}
}