Compare commits

...

52 Commits

Author SHA1 Message Date
Luis Pater
0c05131aeb Merge branch 'router-for-me:main' into main 2026-03-07 09:08:28 +08:00
Luis Pater
5ebc58fab4 refactor(executor): remove legacy connCreateSent logic and standardize response.create usage for all websocket events
- Simplified connection logic by removing `connCreateSent` and related state handling.
- Updated `buildCodexWebsocketRequestBody` to always use `response.create`.
- Added unit tests to validate `response.create` behavior and beta header preservation.
- Dropped unsupported `response.append` and outdated `response.done` event types.
2026-03-07 09:07:23 +08:00
Luis Pater
2b609dd891 Merge pull request #1912 from FradSer/main
feat(registry): add gemini 3.1 flash lite preview
2026-03-07 05:41:31 +08:00
Frad LEE
a8cbc68c3e feat(registry): add gemini 3.1 flash lite preview
- Add model to GetGeminiModels()
- Add model to GetGeminiVertexModels()
- Add model to GetGeminiCLIModels()
- Add model to GetAIStudioModels()
- Add to AntigravityModelConfig with thinking levels
- Update gemini-3-flash-preview description

Registers the new lightweight Gemini model across all provider
endpoints for cost-effective high-volume usage scenarios.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-06 20:52:28 +08:00
Luis Pater
89c428216e Merge branch 'router-for-me:main' into main 2026-03-06 11:09:31 +08:00
Luis Pater
2695a99623 fix(translator): conditionally remove service_tier from OpenAI response processing 2026-03-06 11:07:22 +08:00
Luis Pater
ad5253bd2b Merge branch 'router-for-me:main' into main 2026-03-06 04:15:55 +08:00
Luis Pater
9397f7049f fix(registry): simplify GPT 5.4 model description in static data 2026-03-06 02:32:56 +08:00
Luis Pater
a14d19b92c Merge branch 'router-for-me:main' into main 2026-03-06 02:25:19 +08:00
Luis Pater
8ae0c05ea6 Merge pull request #416 from ladeng07/main
feat(github-copilot): add /responses support for gpt-4o and gpt-4.1
2026-03-06 02:25:10 +08:00
Luis Pater
8822f20d17 feat(registry): add GPT 5.4 model definition to static data 2026-03-06 02:23:53 +08:00
Luis Pater
f0e5a5a367 test(watcher): add unit test for server update timer cancellation and immediate reload logic
- Add `TestTriggerServerUpdateCancelsPendingTimerOnImmediate` to verify proper handling of server update debounce and timer cancellation.
- Fix logic in `triggerServerUpdate` to prevent duplicate timers and ensure proper cleanup of pending state.
2026-03-05 23:48:50 +08:00
Luis Pater
f6dfea9357 Merge pull request #1874 from constansino/fix/watcher-auth-event-storm-debounce
fix(watcher): 合并 auth 事件风暴下的回调触发,降低高 CPU
2026-03-05 23:29:56 +08:00
Luis Pater
cc8dc7f62c Merge branch 'main' into dev 2026-03-05 23:13:21 +08:00
Luis Pater
a3846ea513 Merge pull request #1870 from sususu98/fix/remove-instructions-restore
cleanup(translator): remove leftover instructions restore in codex responses
2026-03-05 23:12:31 +08:00
Luis Pater
8d44be858e Merge pull request #1834 from DragonFSKY/fix/sse-streaming-accept-encoding
fix(claude): extend gzip fix to SSE success path and header-absent compression (#1763)
2026-03-05 22:57:27 +08:00
Luis Pater
0e6bb076e9 fix(translator): comment out service_tier removal from OpenAI response processing 2026-03-05 22:49:38 +08:00
Luis Pater
ac135fc7cb Fixed: #1815
**test(executor): add unit tests for prompt cache key generation in OpenAI `cacheHelper`**
2026-03-05 22:49:23 +08:00
Luis Pater
4e1d09809d Fixed: #1741
fix(translator): handle tool name mappings and improve tool call handling in OpenAI and Claude integrations
2026-03-05 22:24:50 +08:00
LMark
9e855f8100 feat(github-copilot): add /responses support for gpt-4o and gpt-4.1 2026-03-05 21:20:21 +08:00
Luis Pater
25680a8259 revert .gitignore 2026-03-05 20:14:08 +08:00
Luis Pater
13c93e8cfd Merge pull request #414 from CheesesNguyen/fix/remove-soft-limit-and-tool-compression
fix: remove SOFT_LIMIT_REACHED logic, tool compression, and fix bugs
2026-03-05 20:12:50 +08:00
Luis Pater
88aa1b9fd1 Merge pull request #408 from xy-host/feat/dynamic-copilot-models
feat: dynamic model fetching for GitHub Copilot
2026-03-05 20:10:40 +08:00
Luis Pater
352cb98ff0 Merge branch 'router-for-me:main' into main 2026-03-05 20:08:19 +08:00
constansino
ac95e92829 fix(watcher): guard debounced callback after Stop 2026-03-05 19:25:57 +08:00
constansino
8526c2da25 fix(watcher): debounce auth event callback storms 2026-03-05 19:12:57 +08:00
sususu98
68a6cabf8b style: blank unused params in codex responses translator 2026-03-05 16:42:48 +08:00
sususu98
ac0e387da1 cleanup(translator): remove leftover instructions restore in codex responses
The instructions restore logic was originally needed when the proxy
injected custom instructions (per-model system prompts) into requests.
Since ac802a46 removed the injection system, the proxy no longer
modifies instructions before forwarding. The upstream response's
instructions field now matches the client's original value, making
the restore a no-op.

Also removes unused sjson import.

Closes router-for-me/CLIProxyAPI#1868
2026-03-05 16:34:55 +08:00
CheesesNguyen
7fe1d102cb fix: don't treat empty input as truncation for tools without required fields
Tools like TaskList, TaskGet have no required parameters, so empty input
is valid. Previously, the truncation detector flagged all empty inputs as
truncated, causing these tools to be skipped and breaking the tool loop.

Now only flag empty input as truncation when the tool has required fields
defined in RequiredFieldsByTool.
2026-03-05 14:43:45 +07:00
Luis Pater
5850492a93 Fixed: #1548
test(translator): add unit tests for fallback logic in `ConvertCodexResponseToOpenAI` model assignment
2026-03-05 12:11:54 +08:00
Luis Pater
fdbd4041ca Fixed: #1531
fix(gemini): add `deprecated` to unsupported schema keywords

Add `deprecated` to the list of unsupported schema metadata fields in Gemini and update tests to verify its removal.
2026-03-05 11:48:15 +08:00
Luis Pater
ebef1fae2a Merge pull request #1511 from stondy0103/fix/responses-nullable-type-array
fix(translator): fix nullable type arrays breaking Gemini/Antigravity API
2026-03-05 11:30:09 +08:00
CheesesNguyen
c51851689b fix: remove SOFT_LIMIT_REACHED logic, tool compression, and fix bugs
- Remove SOFT_LIMIT_REACHED marker injection in response path
- Remove SOFT_LIMIT_REACHED detection logic in request path
- Remove SOFT_LIMIT_REACHED streaming logic in executor
- Remove tool_compression.go and related constants
- Fix truncation_detector: string(rune(len)) producing Unicode char instead of decimal string
- Fix WebSearchToolUseId being overwritten by non-web-search tools
- Fix duplicate kiro entry in model_definitions.go comment
- Add build output to .gitignore
2026-03-05 10:05:39 +07:00
DragonFSKY
419bf784ab fix(claude): prevent compressed SSE streams and add magic-byte decompression fallback
- Set Accept-Encoding: identity for SSE streams; upstream must not compress
  line-delimited SSE bodies that bufio.Scanner reads directly
- Re-enforce identity after ApplyCustomHeadersFromAttrs to prevent auth
  attribute injection from re-enabling compression on the stream path
- Add peekableBody type wrapping bufio.Reader for non-consuming magic-byte
  inspection of the first 4 bytes without affecting downstream readers
- Detect gzip (0x1f 0x8b) and zstd (0x28 0xb5 0x2f 0xfd) by magic bytes
  when Content-Encoding header is absent, covering misbehaving upstreams
- Remove if-Content-Encoding guard on all three error paths (Execute,
  ExecuteStream, CountTokens); unconditionally delegate to decodeResponseBody
  so magic-byte detection applies consistently to all response paths
- Add 10 tests covering stream identity enforcement, compressed success bodies,
  magic-byte detection without headers, error path decoding, and
  auth attribute override prevention
2026-03-05 06:38:38 +08:00
Luis Pater
4bbeb92e9a Fixed: #1135
**test(translator): add tests for `tool_choice` handling in Claude request conversions**
2026-03-04 22:28:26 +08:00
Luis Pater
b436dad8bc Merge pull request #1822 from sususu98/fix/strip-defer-loading
fix(translator): strip defer_loading from Claude tool declarations in Codex and Gemini translators
2026-03-04 20:49:48 +08:00
Luis Pater
6ae15d6c44 Merge pull request #1816 from sususu98/fix/antigravity-adaptive-effort
fix(antigravity): pass through adaptive thinking effort level instead of always mapping to high
2026-03-04 20:48:38 +08:00
Luis Pater
0468bde0d6 Merge branch 'dev' into fix/antigravity-adaptive-effort 2026-03-04 20:48:26 +08:00
Luis Pater
1d7329e797 Merge pull request #1825 from router-for-me/vertex
feat(config): support excluded vertex models in config
2026-03-04 20:44:41 +08:00
hkfires
48ffc4dee7 feat(config): support excluded vertex models in config 2026-03-04 18:47:42 +08:00
Luis Pater
7ebd8f0c44 Merge branch 'router-for-me:main' into main 2026-03-04 18:30:45 +08:00
Luis Pater
b680c146c1 chore(docs): update sponsor image links in README files 2026-03-04 18:29:23 +08:00
yx-bot7
7d6660d181 fix: address PR review feedback
- Fix SSRF: validate API endpoint host against allowlist before use
- Limit /models response body to 2MB to prevent memory exhaustion (DoS)
- Use MakeAuthenticatedRequest for consistent headers across API calls
- Trim trailing slash on API endpoint to prevent double-slash URLs
- Use ListModelsWithGitHubToken to simplify token exchange + listing
- Deduplicate model IDs to prevent incorrect registry reference counting
- Remove dead capabilities enrichment code block
- Remove unused ModelExtra field with misleading json:"-" tag
- Extract magic numbers to named constants (defaultCopilotContextLength)
- Remove redundant hyphenID == id check (already filtered by Contains)
- Use defer cancel() for context timeout in service.go
2026-03-04 15:43:51 +08:00
yx-bot7
d8e3d4e2b6 feat: dynamic model fetching for GitHub Copilot
- Add ListModels/ListModelsWithGitHubToken to CopilotAuth for querying
  the /models endpoint at api.githubcopilot.com
- Add FetchGitHubCopilotModels in executor with static fallback on failure
- Update service.go to use dynamic fetching (15s timeout) instead of
  hardcoded GetGitHubCopilotModels()
- Add GitHubCopilotAliasesFromModels for auto-generating dot-to-hyphen
  model aliases from dynamic model lists
2026-03-04 14:29:28 +08:00
sususu98
d26ad8224d fix(translator): strip defer_loading from Claude tool declarations in Codex and Gemini translators
Claude's Tool Search feature (advanced-tool-use-2025-11-20 beta) adds
defer_loading field to tool definitions. When proxying Claude requests
to Codex or Gemini, this unknown field causes 400 errors upstream.

Strip defer_loading (and cache_control where missing) in all three
Claude-to-upstream translation paths:
- codex/claude: defer_loading + cache_control
- gemini-cli/claude: defer_loading
- gemini/claude: defer_loading

Fixes #1725, Fixes #1375
2026-03-04 14:21:30 +08:00
hkfires
5c84d69d42 feat(translator): map output_config.effort to adaptive thinking level in antigravity 2026-03-04 13:11:07 +08:00
sususu98
527e4b7f26 fix(antigravity): pass through adaptive thinking effort level instead of always mapping to high 2026-03-04 10:12:45 +08:00
Luis Pater
b48485b42b Fixed: #822
**fix(auth): normalize ID casing on Windows to prevent duplicate entries due to case-insensitive paths**
2026-03-04 02:31:20 +08:00
Luis Pater
79009bb3d4 Fixed: #797
**test(auth): add test for preserving ModelStates during auth updates**
2026-03-04 02:06:24 +08:00
Luis Pater
26fc611f86 Merge pull request #403 from Ton-Git/main
github copilot - update x-initiator header rules
2026-03-03 21:59:02 +08:00
stefanet
4e99525279 github copilot - update x-initiator header rules 2026-03-02 16:41:29 +01:00
Finn Phillips
2615f489d6 fix(translator): remove broken type uppercasing in OpenAI Responses-to-Gemini translator
The `ConvertOpenAIResponsesRequestToGemini` function had code that attempted
to uppercase JSON Schema type values (e.g. "string" -> "STRING") for Gemini
compatibility. This broke nullable types because when `type` is a JSON array
like `["string", "null"]`:

1. `gjson.Result.String()` returns the raw JSON text `["string","null"]`
2. `strings.ToUpper()` produces `["STRING","NULL"]`
3. `sjson.Set()` stores it as a JSON **string** `"[\"STRING\",\"NULL\"]"`
   instead of a JSON array
4. The downstream `CleanJSONSchemaForGemini()` / `flattenTypeArrays()`
   cannot detect it (since `IsArray()` returns false on a string)
5. Gemini/Antigravity API rejects it with:
   `400 Invalid value at '...type' (Type), "["STRING","NULL"]"`

This was confirmed and tested with Droid Factory (Antigravity) Gemini models
where Claude Code sends tool schemas with nullable parameters.

The fix removes the uppercasing logic entirely and passes the raw schema
through to `parametersJsonSchema`. This is safe because:
- Antigravity executor already runs `CleanJSONSchemaForGemini()` which
  properly handles type arrays, nullable fields, and all schema cleanup
- Gemini/Vertex executors use `parametersJsonSchema` which accepts raw
  JSON Schema directly (no uppercasing needed)
- The uppercasing code also only iterated top-level properties, missing
  nested schemas entirely

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-10 09:29:09 +07:00
54 changed files with 1778 additions and 585 deletions

View File

@@ -10,7 +10,7 @@ The Plus release stays in lockstep with the mainline features.
## Differences from the Mainline
[![z.ai](https://assets.router-for.me/english-5.png)](https://z.ai/subscribe?ic=8JVLJQFSKB)
[![z.ai](https://assets.router-for.me/english-5-0.jpg)](https://z.ai/subscribe?ic=8JVLJQFSKB)
## New Features (Plus Enhanced)

View File

@@ -10,7 +10,7 @@
## 与主线版本版本差异
[![bigmodel.cn](https://assets.router-for.me/chinese-5.png)](https://www.bigmodel.cn/claude-code?ic=RRVJPB5SII)
[![bigmodel.cn](https://assets.router-for.me/chinese-5-0.jpg)](https://www.bigmodel.cn/claude-code?ic=RRVJPB5SII)
## 新增功能 (Plus 增强版)

View File

@@ -233,6 +233,9 @@ nonstream-keepalive-interval: 0
# alias: "vertex-flash" # client-visible alias
# - name: "gemini-2.5-pro"
# alias: "vertex-pro"
# excluded-models: # optional: models to exclude from listing
# - "imagen-3.0-generate-002"
# - "imagen-*"
# Amp Integration
# ampcode:

View File

@@ -16,6 +16,7 @@ import (
"net/url"
"os"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
@@ -698,17 +699,20 @@ func (h *Handler) authIDForPath(path string) string {
if path == "" {
return ""
}
if h == nil || h.cfg == nil {
return path
id := path
if h != nil && h.cfg != nil {
authDir := strings.TrimSpace(h.cfg.AuthDir)
if authDir != "" {
if rel, errRel := filepath.Rel(authDir, path); errRel == nil && rel != "" {
id = rel
}
}
}
authDir := strings.TrimSpace(h.cfg.AuthDir)
if authDir == "" {
return path
// On Windows, normalize ID casing to avoid duplicate auth entries caused by case-insensitive paths.
if runtime.GOOS == "windows" {
id = strings.ToLower(id)
}
if rel, err := filepath.Rel(authDir, path); err == nil && rel != "" {
return rel
}
return path
return id
}
func (h *Handler) registerAuthFromFile(ctx context.Context, path string, data []byte) error {

View File

@@ -516,12 +516,13 @@ func (h *Handler) PutVertexCompatKeys(c *gin.Context) {
}
func (h *Handler) PatchVertexCompatKey(c *gin.Context) {
type vertexCompatPatch struct {
APIKey *string `json:"api-key"`
Prefix *string `json:"prefix"`
BaseURL *string `json:"base-url"`
ProxyURL *string `json:"proxy-url"`
Headers *map[string]string `json:"headers"`
Models *[]config.VertexCompatModel `json:"models"`
APIKey *string `json:"api-key"`
Prefix *string `json:"prefix"`
BaseURL *string `json:"base-url"`
ProxyURL *string `json:"proxy-url"`
Headers *map[string]string `json:"headers"`
Models *[]config.VertexCompatModel `json:"models"`
ExcludedModels *[]string `json:"excluded-models"`
}
var body struct {
Index *int `json:"index"`
@@ -585,6 +586,9 @@ func (h *Handler) PatchVertexCompatKey(c *gin.Context) {
if body.Value.Models != nil {
entry.Models = append([]config.VertexCompatModel(nil), (*body.Value.Models)...)
}
if body.Value.ExcludedModels != nil {
entry.ExcludedModels = config.NormalizeExcludedModels(*body.Value.ExcludedModels)
}
normalizeVertexCompatKey(&entry)
h.cfg.VertexCompatAPIKey[targetIndex] = entry
h.cfg.SanitizeVertexCompatKeys()
@@ -1029,6 +1033,7 @@ func normalizeVertexCompatKey(entry *config.VertexCompatKey) {
entry.BaseURL = strings.TrimSpace(entry.BaseURL)
entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
entry.Headers = config.NormalizeHeaders(entry.Headers)
entry.ExcludedModels = config.NormalizeExcludedModels(entry.ExcludedModels)
if len(entry.Models) == 0 {
return
}

View File

@@ -8,6 +8,8 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -222,6 +224,97 @@ func (c *CopilotAuth) MakeAuthenticatedRequest(ctx context.Context, method, url
return req, nil
}
// CopilotModelEntry represents a single model entry returned by the Copilot /models API.
type CopilotModelEntry struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
OwnedBy string `json:"owned_by"`
Name string `json:"name,omitempty"`
Version string `json:"version,omitempty"`
Capabilities map[string]any `json:"capabilities,omitempty"`
}
// CopilotModelsResponse represents the response from the Copilot /models endpoint.
type CopilotModelsResponse struct {
Data []CopilotModelEntry `json:"data"`
Object string `json:"object"`
}
// maxModelsResponseSize is the maximum allowed response size from the /models endpoint (2 MB).
const maxModelsResponseSize = 2 * 1024 * 1024
// allowedCopilotAPIHosts is the set of hosts that are considered safe for Copilot API requests.
var allowedCopilotAPIHosts = map[string]bool{
"api.githubcopilot.com": true,
"api.individual.githubcopilot.com": true,
"api.business.githubcopilot.com": true,
"copilot-proxy.githubusercontent.com": true,
}
// ListModels fetches the list of available models from the Copilot API.
// It requires a valid Copilot API token (not the GitHub access token).
func (c *CopilotAuth) ListModels(ctx context.Context, apiToken *CopilotAPIToken) ([]CopilotModelEntry, error) {
if apiToken == nil || apiToken.Token == "" {
return nil, fmt.Errorf("copilot: api token is required for listing models")
}
// Build models URL, validating the endpoint host to prevent SSRF.
modelsURL := copilotAPIEndpoint + "/models"
if ep := strings.TrimRight(apiToken.Endpoints.API, "/"); ep != "" {
parsed, err := url.Parse(ep)
if err == nil && parsed.Scheme == "https" && allowedCopilotAPIHosts[parsed.Host] {
modelsURL = ep + "/models"
} else {
log.Warnf("copilot: ignoring untrusted API endpoint %q, using default", ep)
}
}
req, err := c.MakeAuthenticatedRequest(ctx, http.MethodGet, modelsURL, nil, apiToken)
if err != nil {
return nil, fmt.Errorf("copilot: failed to create models request: %w", err)
}
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("copilot: models request failed: %w", err)
}
defer func() {
if errClose := resp.Body.Close(); errClose != nil {
log.Errorf("copilot list models: close body error: %v", errClose)
}
}()
// Limit response body to prevent memory exhaustion.
limitedReader := io.LimitReader(resp.Body, maxModelsResponseSize)
bodyBytes, err := io.ReadAll(limitedReader)
if err != nil {
return nil, fmt.Errorf("copilot: failed to read models response: %w", err)
}
if !isHTTPSuccess(resp.StatusCode) {
return nil, fmt.Errorf("copilot: list models failed with status %d: %s", resp.StatusCode, string(bodyBytes))
}
var modelsResp CopilotModelsResponse
if err = json.Unmarshal(bodyBytes, &modelsResp); err != nil {
return nil, fmt.Errorf("copilot: failed to parse models response: %w", err)
}
return modelsResp.Data, nil
}
// ListModelsWithGitHubToken is a convenience method that exchanges a GitHub access token
// for a Copilot API token and then fetches the available models.
func (c *CopilotAuth) ListModelsWithGitHubToken(ctx context.Context, githubAccessToken string) ([]CopilotModelEntry, error) {
apiToken, err := c.GetCopilotAPIToken(ctx, githubAccessToken)
if err != nil {
return nil, fmt.Errorf("copilot: failed to get API token for model listing: %w", err)
}
return c.ListModels(ctx, apiToken)
}
// buildChatCompletionURL builds the URL for chat completions API.
func buildChatCompletionURL() string {
return copilotAPIEndpoint + "/chat/completions"

View File

@@ -1,5 +1,7 @@
package config
import "strings"
// defaultKiroAliases returns default oauth-model-alias entries for Kiro.
// These aliases expose standard Claude IDs for Kiro-prefixed upstream models.
func defaultKiroAliases() []OAuthModelAlias {
@@ -35,3 +37,25 @@ func defaultGitHubCopilotAliases() []OAuthModelAlias {
{Name: "claude-sonnet-4.6", Alias: "claude-sonnet-4-6", Fork: true},
}
}
// GitHubCopilotAliasesFromModels generates oauth-model-alias entries from a dynamic
// list of model IDs fetched from the Copilot API. It auto-creates aliases for
// models whose ID contains a dot (e.g. "claude-opus-4.6" → "claude-opus-4-6"),
// which is the pattern used by Claude models on Copilot.
func GitHubCopilotAliasesFromModels(modelIDs []string) []OAuthModelAlias {
var aliases []OAuthModelAlias
seen := make(map[string]struct{})
for _, id := range modelIDs {
if !strings.Contains(id, ".") {
continue
}
hyphenID := strings.ReplaceAll(id, ".", "-")
key := id + "→" + hyphenID
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
aliases = append(aliases, OAuthModelAlias{Name: id, Alias: hyphenID, Fork: true})
}
return aliases
}

View File

@@ -34,6 +34,9 @@ type VertexCompatKey struct {
// Models defines the model configurations including aliases for routing.
Models []VertexCompatModel `yaml:"models,omitempty" json:"models,omitempty"`
// ExcludedModels lists model IDs that should be excluded for this provider.
ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"`
}
func (k VertexCompatKey) GetAPIKey() string { return k.APIKey }
@@ -74,6 +77,7 @@ func (cfg *Config) SanitizeVertexCompatKeys() {
}
entry.ProxyURL = strings.TrimSpace(entry.ProxyURL)
entry.Headers = NormalizeHeaders(entry.Headers)
entry.ExcludedModels = NormalizeExcludedModels(entry.ExcludedModels)
// Sanitize models: remove entries without valid alias
sanitizedModels := make([]VertexCompatModel, 0, len(entry.Models))

View File

@@ -23,7 +23,6 @@ import (
// - kiro
// - kilo
// - github-copilot
// - kiro
// - amazonq
// - antigravity (returns static overrides only)
func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo {
@@ -152,6 +151,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
Description: "OpenAI GPT-4.1 via GitHub Copilot",
ContextLength: 128000,
MaxCompletionTokens: 16384,
SupportedEndpoints: []string{"/chat/completions", "/responses"},
},
}
@@ -166,6 +166,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
Description: entry.Description,
ContextLength: 128000,
MaxCompletionTokens: 16384,
SupportedEndpoints: []string{"/chat/completions", "/responses"},
})
}

View File

@@ -220,12 +220,27 @@ func GetGeminiModels() []*ModelInfo {
Name: "models/gemini-3-flash-preview",
Version: "3.0",
DisplayName: "Gemini 3 Flash Preview",
Description: "Gemini 3 Flash Preview",
Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gemini-3.1-flash-lite-preview",
Object: "model",
Created: 1776288000,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3.1-flash-lite-preview",
Version: "3.1",
DisplayName: "Gemini 3.1 Flash Lite Preview",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}},
},
{
ID: "gemini-3-pro-image-preview",
Object: "model",
@@ -336,6 +351,21 @@ func GetGeminiVertexModels() []*ModelInfo {
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
},
{
ID: "gemini-3.1-flash-lite-preview",
Object: "model",
Created: 1776288000,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3.1-flash-lite-preview",
Version: "3.1",
DisplayName: "Gemini 3.1 Flash Lite Preview",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}},
},
{
ID: "gemini-3-pro-image-preview",
Object: "model",
@@ -508,6 +538,21 @@ func GetGeminiCLIModels() []*ModelInfo {
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
},
{
ID: "gemini-3.1-flash-lite-preview",
Object: "model",
Created: 1776288000,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3.1-flash-lite-preview",
Version: "3.1",
DisplayName: "Gemini 3.1 Flash Lite Preview",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}},
},
}
}
@@ -604,6 +649,21 @@ func GetAIStudioModels() []*ModelInfo {
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-3.1-flash-lite-preview",
Object: "model",
Created: 1776288000,
OwnedBy: "google",
Type: "gemini",
Name: "models/gemini-3.1-flash-lite-preview",
Version: "3.1",
DisplayName: "Gemini 3.1 Flash Lite Preview",
Description: "Our smallest and most cost effective model, built for at scale usage.",
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}},
},
{
ID: "gemini-pro-latest",
Object: "model",
@@ -839,6 +899,20 @@ func GetOpenAIModels() []*ModelInfo {
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
},
{
ID: "gpt-5.4",
Object: "model",
Created: 1772668800,
OwnedBy: "openai",
Type: "openai",
Version: "gpt-5.4",
DisplayName: "GPT 5.4",
Description: "Stable version of GPT 5.4",
ContextLength: 1_050_000,
MaxCompletionTokens: 128000,
SupportedParameters: []string{"tools"},
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
},
}
}
@@ -966,6 +1040,7 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
"gemini-3.1-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
"gemini-3.1-pro-low": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
"gemini-3.1-flash-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}},
"gemini-3.1-flash-lite-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}},
"gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
"claude-opus-4-6-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
"claude-sonnet-4-6": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},

View File

@@ -59,6 +59,7 @@ func buildRequestBodyFromPayload(t *testing.T, modelName string) map[string]any
"properties": {
"mode": {
"type": "string",
"deprecated": true,
"enum": ["a", "b"],
"enumTitles": ["A", "B"]
}
@@ -156,4 +157,7 @@ func assertSchemaSanitizedAndPropertyPreserved(t *testing.T, params map[string]a
if _, ok := mode["enumTitles"]; ok {
t.Fatalf("enumTitles should be removed from nested schema")
}
if _, ok := mode["deprecated"]; ok {
t.Fatalf("deprecated should be removed from nested schema")
}
}

View File

@@ -187,17 +187,15 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
}
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
// Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API).
errBody := httpResp.Body
if ce := httpResp.Header.Get("Content-Encoding"); ce != "" {
var decErr error
errBody, decErr = decodeResponseBody(httpResp.Body, ce)
if decErr != nil {
recordAPIResponseError(ctx, e.cfg, decErr)
msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr)
logWithRequestID(ctx).Warn(msg)
return resp, statusErr{code: httpResp.StatusCode, msg: msg}
}
// Decompress error responses — pass the Content-Encoding value (may be empty)
// and let decodeResponseBody handle both header-declared and magic-byte-detected
// compression. This keeps error-path behaviour consistent with the success path.
errBody, decErr := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding"))
if decErr != nil {
recordAPIResponseError(ctx, e.cfg, decErr)
msg := fmt.Sprintf("failed to decode error response body: %v", decErr)
logWithRequestID(ctx).Warn(msg)
return resp, statusErr{code: httpResp.StatusCode, msg: msg}
}
b, readErr := io.ReadAll(errBody)
if readErr != nil {
@@ -352,17 +350,15 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
}
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
// Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API).
errBody := httpResp.Body
if ce := httpResp.Header.Get("Content-Encoding"); ce != "" {
var decErr error
errBody, decErr = decodeResponseBody(httpResp.Body, ce)
if decErr != nil {
recordAPIResponseError(ctx, e.cfg, decErr)
msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr)
logWithRequestID(ctx).Warn(msg)
return nil, statusErr{code: httpResp.StatusCode, msg: msg}
}
// Decompress error responses — pass the Content-Encoding value (may be empty)
// and let decodeResponseBody handle both header-declared and magic-byte-detected
// compression. This keeps error-path behaviour consistent with the success path.
errBody, decErr := decodeResponseBody(httpResp.Body, httpResp.Header.Get("Content-Encoding"))
if decErr != nil {
recordAPIResponseError(ctx, e.cfg, decErr)
msg := fmt.Sprintf("failed to decode error response body: %v", decErr)
logWithRequestID(ctx).Warn(msg)
return nil, statusErr{code: httpResp.StatusCode, msg: msg}
}
b, readErr := io.ReadAll(errBody)
if readErr != nil {
@@ -521,17 +517,15 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
}
recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone())
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
// Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API).
errBody := resp.Body
if ce := resp.Header.Get("Content-Encoding"); ce != "" {
var decErr error
errBody, decErr = decodeResponseBody(resp.Body, ce)
if decErr != nil {
recordAPIResponseError(ctx, e.cfg, decErr)
msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr)
logWithRequestID(ctx).Warn(msg)
return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: msg}
}
// Decompress error responses — pass the Content-Encoding value (may be empty)
// and let decodeResponseBody handle both header-declared and magic-byte-detected
// compression. This keeps error-path behaviour consistent with the success path.
errBody, decErr := decodeResponseBody(resp.Body, resp.Header.Get("Content-Encoding"))
if decErr != nil {
recordAPIResponseError(ctx, e.cfg, decErr)
msg := fmt.Sprintf("failed to decode error response body: %v", decErr)
logWithRequestID(ctx).Warn(msg)
return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: msg}
}
b, readErr := io.ReadAll(errBody)
if readErr != nil {
@@ -662,12 +656,61 @@ func (c *compositeReadCloser) Close() error {
return firstErr
}
// peekableBody wraps a bufio.Reader around the original ReadCloser so that
// magic bytes can be inspected without consuming them from the stream.
type peekableBody struct {
*bufio.Reader
closer io.Closer
}
func (p *peekableBody) Close() error {
return p.closer.Close()
}
func decodeResponseBody(body io.ReadCloser, contentEncoding string) (io.ReadCloser, error) {
if body == nil {
return nil, fmt.Errorf("response body is nil")
}
if contentEncoding == "" {
return body, nil
// No Content-Encoding header. Attempt best-effort magic-byte detection to
// handle misbehaving upstreams that compress without setting the header.
// Only gzip (1f 8b) and zstd (28 b5 2f fd) have reliable magic sequences;
// br and deflate have none and are left as-is.
// The bufio wrapper preserves unread bytes so callers always see the full
// stream regardless of whether decompression was applied.
pb := &peekableBody{Reader: bufio.NewReader(body), closer: body}
magic, peekErr := pb.Peek(4)
if peekErr == nil || (peekErr == io.EOF && len(magic) >= 2) {
switch {
case len(magic) >= 2 && magic[0] == 0x1f && magic[1] == 0x8b:
gzipReader, gzErr := gzip.NewReader(pb)
if gzErr != nil {
_ = pb.Close()
return nil, fmt.Errorf("magic-byte gzip: failed to create reader: %w", gzErr)
}
return &compositeReadCloser{
Reader: gzipReader,
closers: []func() error{
gzipReader.Close,
pb.Close,
},
}, nil
case len(magic) >= 4 && magic[0] == 0x28 && magic[1] == 0xb5 && magic[2] == 0x2f && magic[3] == 0xfd:
decoder, zdErr := zstd.NewReader(pb)
if zdErr != nil {
_ = pb.Close()
return nil, fmt.Errorf("magic-byte zstd: failed to create reader: %w", zdErr)
}
return &compositeReadCloser{
Reader: decoder,
closers: []func() error{
func() error { decoder.Close(); return nil },
pb.Close,
},
}, nil
}
}
return pb, nil
}
encodings := strings.Split(contentEncoding, ",")
for _, raw := range encodings {
@@ -844,11 +887,15 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
r.Header.Set("User-Agent", hdrDefault(hd.UserAgent, "claude-cli/2.1.63 (external, cli)"))
}
r.Header.Set("Connection", "keep-alive")
r.Header.Set("Accept-Encoding", "gzip, deflate, br, zstd")
if stream {
r.Header.Set("Accept", "text/event-stream")
// SSE streams must not be compressed: the downstream scanner reads
// line-delimited text and cannot parse compressed bytes. Using
// "identity" tells the upstream to send an uncompressed stream.
r.Header.Set("Accept-Encoding", "identity")
} else {
r.Header.Set("Accept", "application/json")
r.Header.Set("Accept-Encoding", "gzip, deflate, br, zstd")
}
// Keep OS/Arch mapping dynamic (not configurable).
// They intentionally continue to derive from runtime.GOOS/runtime.GOARCH.
@@ -857,6 +904,12 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
attrs = auth.Attributes
}
util.ApplyCustomHeadersFromAttrs(r, attrs)
// Re-enforce Accept-Encoding: identity after ApplyCustomHeadersFromAttrs, which
// may override it with a user-configured value. Compressed SSE breaks the line
// scanner regardless of user preference, so this is non-negotiable for streams.
if stream {
r.Header.Set("Accept-Encoding", "identity")
}
}
func claudeCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) {

View File

@@ -2,6 +2,7 @@ package executor
import (
"bytes"
"compress/gzip"
"context"
"io"
"net/http"
@@ -9,6 +10,7 @@ import (
"strings"
"testing"
"github.com/klauspost/compress/zstd"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -583,3 +585,385 @@ func testClaudeExecutorInvalidCompressedErrorBody(
t.Fatalf("expected status code 400, got: %v", err)
}
}
// TestClaudeExecutor_ExecuteStream_SetsIdentityAcceptEncoding verifies that streaming
// requests use Accept-Encoding: identity so the upstream cannot respond with a
// compressed SSE body that would silently break the line scanner.
func TestClaudeExecutor_ExecuteStream_SetsIdentityAcceptEncoding(t *testing.T) {
var gotEncoding, gotAccept string
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotEncoding = r.Header.Get("Accept-Encoding")
gotAccept = r.Header.Get("Accept")
w.Header().Set("Content-Type", "text/event-stream")
_, _ = w.Write([]byte("data: {\"type\":\"message_stop\"}\n\n"))
}))
defer server.Close()
executor := NewClaudeExecutor(&config.Config{})
auth := &cliproxyauth.Auth{Attributes: map[string]string{
"api_key": "key-123",
"base_url": server.URL,
}}
payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`)
result, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{
Model: "claude-3-5-sonnet-20241022",
Payload: payload,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("claude"),
})
if err != nil {
t.Fatalf("ExecuteStream error: %v", err)
}
for chunk := range result.Chunks {
if chunk.Err != nil {
t.Fatalf("unexpected chunk error: %v", chunk.Err)
}
}
if gotEncoding != "identity" {
t.Errorf("Accept-Encoding = %q, want %q", gotEncoding, "identity")
}
if gotAccept != "text/event-stream" {
t.Errorf("Accept = %q, want %q", gotAccept, "text/event-stream")
}
}
// TestClaudeExecutor_Execute_SetsCompressedAcceptEncoding verifies that non-streaming
// requests keep the full accept-encoding to allow response compression (which
// decodeResponseBody handles correctly).
func TestClaudeExecutor_Execute_SetsCompressedAcceptEncoding(t *testing.T) {
var gotEncoding, gotAccept string
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotEncoding = r.Header.Get("Accept-Encoding")
gotAccept = r.Header.Get("Accept")
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"id":"msg_1","type":"message","model":"claude-3-5-sonnet-20241022","role":"assistant","content":[{"type":"text","text":"hi"}],"usage":{"input_tokens":1,"output_tokens":1}}`))
}))
defer server.Close()
executor := NewClaudeExecutor(&config.Config{})
auth := &cliproxyauth.Auth{Attributes: map[string]string{
"api_key": "key-123",
"base_url": server.URL,
}}
payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`)
_, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{
Model: "claude-3-5-sonnet-20241022",
Payload: payload,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("claude"),
})
if err != nil {
t.Fatalf("Execute error: %v", err)
}
if gotEncoding != "gzip, deflate, br, zstd" {
t.Errorf("Accept-Encoding = %q, want %q", gotEncoding, "gzip, deflate, br, zstd")
}
if gotAccept != "application/json" {
t.Errorf("Accept = %q, want %q", gotAccept, "application/json")
}
}
// TestClaudeExecutor_ExecuteStream_GzipSuccessBodyDecoded verifies that a streaming
// HTTP 200 response with Content-Encoding: gzip is correctly decompressed before
// the line scanner runs, so SSE chunks are not silently dropped.
func TestClaudeExecutor_ExecuteStream_GzipSuccessBodyDecoded(t *testing.T) {
var buf bytes.Buffer
gz := gzip.NewWriter(&buf)
_, _ = gz.Write([]byte("data: {\"type\":\"message_stop\"}\n"))
_ = gz.Close()
compressedBody := buf.Bytes()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/event-stream")
w.Header().Set("Content-Encoding", "gzip")
_, _ = w.Write(compressedBody)
}))
defer server.Close()
executor := NewClaudeExecutor(&config.Config{})
auth := &cliproxyauth.Auth{Attributes: map[string]string{
"api_key": "key-123",
"base_url": server.URL,
}}
payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`)
result, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{
Model: "claude-3-5-sonnet-20241022",
Payload: payload,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("claude"),
})
if err != nil {
t.Fatalf("ExecuteStream error: %v", err)
}
var combined strings.Builder
for chunk := range result.Chunks {
if chunk.Err != nil {
t.Fatalf("chunk error: %v", chunk.Err)
}
combined.Write(chunk.Payload)
}
if combined.Len() == 0 {
t.Fatal("expected at least one chunk from gzip-encoded SSE body, got none (body was not decompressed)")
}
if !strings.Contains(combined.String(), "message_stop") {
t.Errorf("expected SSE content in chunks, got: %q", combined.String())
}
}
// TestDecodeResponseBody_MagicByteGzipNoHeader verifies that decodeResponseBody
// detects gzip-compressed content via magic bytes even when Content-Encoding is absent.
func TestDecodeResponseBody_MagicByteGzipNoHeader(t *testing.T) {
const plaintext = "data: {\"type\":\"message_stop\"}\n"
var buf bytes.Buffer
gz := gzip.NewWriter(&buf)
_, _ = gz.Write([]byte(plaintext))
_ = gz.Close()
rc := io.NopCloser(&buf)
decoded, err := decodeResponseBody(rc, "")
if err != nil {
t.Fatalf("decodeResponseBody error: %v", err)
}
defer decoded.Close()
got, err := io.ReadAll(decoded)
if err != nil {
t.Fatalf("ReadAll error: %v", err)
}
if string(got) != plaintext {
t.Errorf("decoded = %q, want %q", got, plaintext)
}
}
// TestDecodeResponseBody_PlainTextNoHeader verifies that decodeResponseBody returns
// plain text untouched when Content-Encoding is absent and no magic bytes match.
func TestDecodeResponseBody_PlainTextNoHeader(t *testing.T) {
const plaintext = "data: {\"type\":\"message_stop\"}\n"
rc := io.NopCloser(strings.NewReader(plaintext))
decoded, err := decodeResponseBody(rc, "")
if err != nil {
t.Fatalf("decodeResponseBody error: %v", err)
}
defer decoded.Close()
got, err := io.ReadAll(decoded)
if err != nil {
t.Fatalf("ReadAll error: %v", err)
}
if string(got) != plaintext {
t.Errorf("decoded = %q, want %q", got, plaintext)
}
}
// TestClaudeExecutor_ExecuteStream_GzipNoContentEncodingHeader verifies the full
// pipeline: when the upstream returns a gzip-compressed SSE body WITHOUT setting
// Content-Encoding (a misbehaving upstream), the magic-byte sniff in
// decodeResponseBody still decompresses it, so chunks reach the caller.
func TestClaudeExecutor_ExecuteStream_GzipNoContentEncodingHeader(t *testing.T) {
var buf bytes.Buffer
gz := gzip.NewWriter(&buf)
_, _ = gz.Write([]byte("data: {\"type\":\"message_stop\"}\n"))
_ = gz.Close()
compressedBody := buf.Bytes()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/event-stream")
// Intentionally omit Content-Encoding to simulate misbehaving upstream.
_, _ = w.Write(compressedBody)
}))
defer server.Close()
executor := NewClaudeExecutor(&config.Config{})
auth := &cliproxyauth.Auth{Attributes: map[string]string{
"api_key": "key-123",
"base_url": server.URL,
}}
payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`)
result, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{
Model: "claude-3-5-sonnet-20241022",
Payload: payload,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("claude"),
})
if err != nil {
t.Fatalf("ExecuteStream error: %v", err)
}
var combined strings.Builder
for chunk := range result.Chunks {
if chunk.Err != nil {
t.Fatalf("chunk error: %v", chunk.Err)
}
combined.Write(chunk.Payload)
}
if combined.Len() == 0 {
t.Fatal("expected chunks from gzip body without Content-Encoding header, got none (magic-byte sniff failed)")
}
if !strings.Contains(combined.String(), "message_stop") {
t.Errorf("unexpected chunk content: %q", combined.String())
}
}
// TestClaudeExecutor_ExecuteStream_AcceptEncodingOverrideCannotBypassIdentity verifies
// that injecting Accept-Encoding via auth.Attributes cannot override the stream
// path's enforced identity encoding.
func TestClaudeExecutor_ExecuteStream_AcceptEncodingOverrideCannotBypassIdentity(t *testing.T) {
var gotEncoding string
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotEncoding = r.Header.Get("Accept-Encoding")
w.Header().Set("Content-Type", "text/event-stream")
_, _ = w.Write([]byte("data: {\"type\":\"message_stop\"}\n\n"))
}))
defer server.Close()
executor := NewClaudeExecutor(&config.Config{})
// Inject Accept-Encoding via the custom header attribute mechanism.
auth := &cliproxyauth.Auth{Attributes: map[string]string{
"api_key": "key-123",
"base_url": server.URL,
"header:Accept-Encoding": "gzip, deflate, br, zstd",
}}
payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`)
result, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{
Model: "claude-3-5-sonnet-20241022",
Payload: payload,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("claude"),
})
if err != nil {
t.Fatalf("ExecuteStream error: %v", err)
}
for chunk := range result.Chunks {
if chunk.Err != nil {
t.Fatalf("unexpected chunk error: %v", chunk.Err)
}
}
if gotEncoding != "identity" {
t.Errorf("Accept-Encoding = %q; stream path must enforce identity regardless of auth.Attributes override", gotEncoding)
}
}
// TestDecodeResponseBody_MagicByteZstdNoHeader verifies that decodeResponseBody
// detects zstd-compressed content via magic bytes (28 b5 2f fd) even when
// Content-Encoding is absent.
func TestDecodeResponseBody_MagicByteZstdNoHeader(t *testing.T) {
const plaintext = "data: {\"type\":\"message_stop\"}\n"
var buf bytes.Buffer
enc, err := zstd.NewWriter(&buf)
if err != nil {
t.Fatalf("zstd.NewWriter: %v", err)
}
_, _ = enc.Write([]byte(plaintext))
_ = enc.Close()
rc := io.NopCloser(&buf)
decoded, err := decodeResponseBody(rc, "")
if err != nil {
t.Fatalf("decodeResponseBody error: %v", err)
}
defer decoded.Close()
got, err := io.ReadAll(decoded)
if err != nil {
t.Fatalf("ReadAll error: %v", err)
}
if string(got) != plaintext {
t.Errorf("decoded = %q, want %q", got, plaintext)
}
}
// TestClaudeExecutor_Execute_GzipErrorBodyNoContentEncodingHeader verifies that the
// error path (4xx) correctly decompresses a gzip body even when the upstream omits
// the Content-Encoding header. This closes the gap left by PR #1771, which only
// fixed header-declared compression on the error path.
func TestClaudeExecutor_Execute_GzipErrorBodyNoContentEncodingHeader(t *testing.T) {
const errJSON = `{"type":"error","error":{"type":"invalid_request_error","message":"test error"}}`
var buf bytes.Buffer
gz := gzip.NewWriter(&buf)
_, _ = gz.Write([]byte(errJSON))
_ = gz.Close()
compressedBody := buf.Bytes()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
// Intentionally omit Content-Encoding to simulate misbehaving upstream.
w.WriteHeader(http.StatusBadRequest)
_, _ = w.Write(compressedBody)
}))
defer server.Close()
executor := NewClaudeExecutor(&config.Config{})
auth := &cliproxyauth.Auth{Attributes: map[string]string{
"api_key": "key-123",
"base_url": server.URL,
}}
payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`)
_, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{
Model: "claude-3-5-sonnet-20241022",
Payload: payload,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("claude"),
})
if err == nil {
t.Fatal("expected an error for 400 response, got nil")
}
if !strings.Contains(err.Error(), "test error") {
t.Errorf("error message should contain decompressed JSON, got: %q", err.Error())
}
}
// TestClaudeExecutor_ExecuteStream_GzipErrorBodyNoContentEncodingHeader verifies
// the same for the streaming executor: 4xx gzip body without Content-Encoding is
// decoded and the error message is readable.
func TestClaudeExecutor_ExecuteStream_GzipErrorBodyNoContentEncodingHeader(t *testing.T) {
const errJSON = `{"type":"error","error":{"type":"invalid_request_error","message":"stream test error"}}`
var buf bytes.Buffer
gz := gzip.NewWriter(&buf)
_, _ = gz.Write([]byte(errJSON))
_ = gz.Close()
compressedBody := buf.Bytes()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
// Intentionally omit Content-Encoding to simulate misbehaving upstream.
w.WriteHeader(http.StatusBadRequest)
_, _ = w.Write(compressedBody)
}))
defer server.Close()
executor := NewClaudeExecutor(&config.Config{})
auth := &cliproxyauth.Auth{Attributes: map[string]string{
"api_key": "key-123",
"base_url": server.URL,
}}
payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`)
_, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{
Model: "claude-3-5-sonnet-20241022",
Payload: payload,
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("claude"),
})
if err == nil {
t.Fatal("expected an error for 400 response, got nil")
}
if !strings.Contains(err.Error(), "stream test error") {
t.Errorf("error message should contain decompressed JSON, got: %q", err.Error())
}
}

View File

@@ -616,6 +616,10 @@ func (e *CodexExecutor) cacheHelper(ctx context.Context, from sdktranslator.Form
if promptCacheKey.Exists() {
cache.ID = promptCacheKey.String()
}
} else if from == "openai" {
if apiKey := strings.TrimSpace(apiKeyFromContext(ctx)); apiKey != "" {
cache.ID = uuid.NewSHA1(uuid.NameSpaceOID, []byte("cli-proxy-api:codex:prompt-cache:"+apiKey)).String()
}
}
if cache.ID != "" {

View File

@@ -0,0 +1,64 @@
package executor
import (
"context"
"io"
"net/http/httptest"
"testing"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
"github.com/tidwall/gjson"
)
func TestCodexExecutorCacheHelper_OpenAIChatCompletions_StablePromptCacheKeyFromAPIKey(t *testing.T) {
recorder := httptest.NewRecorder()
ginCtx, _ := gin.CreateTestContext(recorder)
ginCtx.Set("apiKey", "test-api-key")
ctx := context.WithValue(context.Background(), "gin", ginCtx)
executor := &CodexExecutor{}
rawJSON := []byte(`{"model":"gpt-5.3-codex","stream":true}`)
req := cliproxyexecutor.Request{
Model: "gpt-5.3-codex",
Payload: []byte(`{"model":"gpt-5.3-codex"}`),
}
url := "https://example.com/responses"
httpReq, err := executor.cacheHelper(ctx, sdktranslator.FromString("openai"), url, req, rawJSON)
if err != nil {
t.Fatalf("cacheHelper error: %v", err)
}
body, errRead := io.ReadAll(httpReq.Body)
if errRead != nil {
t.Fatalf("read request body: %v", errRead)
}
expectedKey := uuid.NewSHA1(uuid.NameSpaceOID, []byte("cli-proxy-api:codex:prompt-cache:test-api-key")).String()
gotKey := gjson.GetBytes(body, "prompt_cache_key").String()
if gotKey != expectedKey {
t.Fatalf("prompt_cache_key = %q, want %q", gotKey, expectedKey)
}
if gotConversation := httpReq.Header.Get("Conversation_id"); gotConversation != expectedKey {
t.Fatalf("Conversation_id = %q, want %q", gotConversation, expectedKey)
}
if gotSession := httpReq.Header.Get("Session_id"); gotSession != expectedKey {
t.Fatalf("Session_id = %q, want %q", gotSession, expectedKey)
}
httpReq2, err := executor.cacheHelper(ctx, sdktranslator.FromString("openai"), url, req, rawJSON)
if err != nil {
t.Fatalf("cacheHelper error (second call): %v", err)
}
body2, errRead2 := io.ReadAll(httpReq2.Body)
if errRead2 != nil {
t.Fatalf("read request body (second call): %v", errRead2)
}
gotKey2 := gjson.GetBytes(body2, "prompt_cache_key").String()
if gotKey2 != expectedKey {
t.Fatalf("prompt_cache_key (second call) = %q, want %q", gotKey2, expectedKey)
}
}

View File

@@ -31,7 +31,7 @@ import (
)
const (
codexResponsesWebsocketBetaHeaderValue = "responses_websockets=2026-02-04"
codexResponsesWebsocketBetaHeaderValue = "responses_websockets=2026-02-06"
codexResponsesWebsocketIdleTimeout = 5 * time.Minute
codexResponsesWebsocketHandshakeTO = 30 * time.Second
)
@@ -57,11 +57,6 @@ type codexWebsocketSession struct {
wsURL string
authID string
// connCreateSent tracks whether a `response.create` message has been successfully sent
// on the current websocket connection. The upstream expects the first message on each
// connection to be `response.create`.
connCreateSent bool
writeMu sync.Mutex
activeMu sync.Mutex
@@ -212,13 +207,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
defer sess.reqMu.Unlock()
}
allowAppend := true
if sess != nil {
sess.connMu.Lock()
allowAppend = sess.connCreateSent
sess.connMu.Unlock()
}
wsReqBody := buildCodexWebsocketRequestBody(body, allowAppend)
wsReqBody := buildCodexWebsocketRequestBody(body)
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
URL: wsURL,
Method: "WEBSOCKET",
@@ -280,10 +269,7 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
// execution session.
connRetry, _, errDialRetry := e.ensureUpstreamConn(ctx, auth, sess, authID, wsURL, wsHeaders)
if errDialRetry == nil && connRetry != nil {
sess.connMu.Lock()
allowAppend = sess.connCreateSent
sess.connMu.Unlock()
wsReqBodyRetry := buildCodexWebsocketRequestBody(body, allowAppend)
wsReqBodyRetry := buildCodexWebsocketRequestBody(body)
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
URL: wsURL,
Method: "WEBSOCKET",
@@ -312,7 +298,6 @@ func (e *CodexWebsocketsExecutor) Execute(ctx context.Context, auth *cliproxyaut
return resp, errSend
}
}
markCodexWebsocketCreateSent(sess, conn, wsReqBody)
for {
if ctx != nil && ctx.Err() != nil {
@@ -403,26 +388,20 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
wsHeaders = applyCodexWebsocketHeaders(ctx, wsHeaders, auth, apiKey)
var authID, authLabel, authType, authValue string
if auth != nil {
authID = auth.ID
authLabel = auth.Label
authType, authValue = auth.AccountInfo()
}
authID = auth.ID
authLabel = auth.Label
authType, authValue = auth.AccountInfo()
executionSessionID := executionSessionIDFromOptions(opts)
var sess *codexWebsocketSession
if executionSessionID != "" {
sess = e.getOrCreateSession(executionSessionID)
sess.reqMu.Lock()
if sess != nil {
sess.reqMu.Lock()
}
}
allowAppend := true
if sess != nil {
sess.connMu.Lock()
allowAppend = sess.connCreateSent
sess.connMu.Unlock()
}
wsReqBody := buildCodexWebsocketRequestBody(body, allowAppend)
wsReqBody := buildCodexWebsocketRequestBody(body)
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
URL: wsURL,
Method: "WEBSOCKET",
@@ -483,10 +462,7 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
sess.reqMu.Unlock()
return nil, errDialRetry
}
sess.connMu.Lock()
allowAppend = sess.connCreateSent
sess.connMu.Unlock()
wsReqBodyRetry := buildCodexWebsocketRequestBody(body, allowAppend)
wsReqBodyRetry := buildCodexWebsocketRequestBody(body)
recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
URL: wsURL,
Method: "WEBSOCKET",
@@ -515,7 +491,6 @@ func (e *CodexWebsocketsExecutor) ExecuteStream(ctx context.Context, auth *clipr
return nil, errSend
}
}
markCodexWebsocketCreateSent(sess, conn, wsReqBody)
out := make(chan cliproxyexecutor.StreamChunk)
go func() {
@@ -657,31 +632,14 @@ func writeCodexWebsocketMessage(sess *codexWebsocketSession, conn *websocket.Con
return conn.WriteMessage(websocket.TextMessage, payload)
}
func buildCodexWebsocketRequestBody(body []byte, allowAppend bool) []byte {
func buildCodexWebsocketRequestBody(body []byte) []byte {
if len(body) == 0 {
return nil
}
// Codex CLI websocket v2 uses `response.create` with `previous_response_id` for incremental turns.
// The upstream ChatGPT Codex websocket currently rejects that with close 1008 (policy violation).
// Fall back to v1 `response.append` semantics on the same websocket connection to keep the session alive.
//
// NOTE: The upstream expects the first websocket event on each connection to be `response.create`,
// so we only use `response.append` after we have initialized the current connection.
if allowAppend {
if prev := strings.TrimSpace(gjson.GetBytes(body, "previous_response_id").String()); prev != "" {
inputNode := gjson.GetBytes(body, "input")
wsReqBody := []byte(`{}`)
wsReqBody, _ = sjson.SetBytes(wsReqBody, "type", "response.append")
if inputNode.Exists() && inputNode.IsArray() && strings.TrimSpace(inputNode.Raw) != "" {
wsReqBody, _ = sjson.SetRawBytes(wsReqBody, "input", []byte(inputNode.Raw))
return wsReqBody
}
wsReqBody, _ = sjson.SetRawBytes(wsReqBody, "input", []byte("[]"))
return wsReqBody
}
}
// Match codex-rs websocket v2 semantics: every request is `response.create`.
// Incremental follow-up turns continue on the same websocket using
// `previous_response_id` + incremental `input`, not `response.append`.
wsReqBody, errSet := sjson.SetBytes(bytes.Clone(body), "type", "response.create")
if errSet == nil && len(wsReqBody) > 0 {
return wsReqBody
@@ -725,21 +683,6 @@ func readCodexWebsocketMessage(ctx context.Context, sess *codexWebsocketSession,
}
}
func markCodexWebsocketCreateSent(sess *codexWebsocketSession, conn *websocket.Conn, payload []byte) {
if sess == nil || conn == nil || len(payload) == 0 {
return
}
if strings.TrimSpace(gjson.GetBytes(payload, "type").String()) != "response.create" {
return
}
sess.connMu.Lock()
if sess.conn == conn {
sess.connCreateSent = true
}
sess.connMu.Unlock()
}
func newProxyAwareWebsocketDialer(cfg *config.Config, auth *cliproxyauth.Auth) *websocket.Dialer {
dialer := &websocket.Dialer{
Proxy: http.ProxyFromEnvironment,
@@ -1017,36 +960,6 @@ func closeHTTPResponseBody(resp *http.Response, logPrefix string) {
}
}
func closeOnContextDone(ctx context.Context, conn *websocket.Conn) chan struct{} {
done := make(chan struct{})
if ctx == nil || conn == nil {
return done
}
go func() {
select {
case <-done:
case <-ctx.Done():
_ = conn.Close()
}
}()
return done
}
func cancelReadOnContextDone(ctx context.Context, conn *websocket.Conn) chan struct{} {
done := make(chan struct{})
if ctx == nil || conn == nil {
return done
}
go func() {
select {
case <-done:
case <-ctx.Done():
_ = conn.SetReadDeadline(time.Now())
}
}()
return done
}
func executionSessionIDFromOptions(opts cliproxyexecutor.Options) string {
if len(opts.Metadata) == 0 {
return ""
@@ -1120,7 +1033,6 @@ func (e *CodexWebsocketsExecutor) ensureUpstreamConn(ctx context.Context, auth *
sess.conn = conn
sess.wsURL = wsURL
sess.authID = authID
sess.connCreateSent = false
sess.readerConn = conn
sess.connMu.Unlock()
@@ -1206,7 +1118,6 @@ func (e *CodexWebsocketsExecutor) invalidateUpstreamConn(sess *codexWebsocketSes
return
}
sess.conn = nil
sess.connCreateSent = false
if sess.readerConn == conn {
sess.readerConn = nil
}
@@ -1273,7 +1184,6 @@ func (e *CodexWebsocketsExecutor) closeExecutionSession(sess *codexWebsocketSess
authID := sess.authID
wsURL := sess.wsURL
sess.conn = nil
sess.connCreateSent = false
if sess.readerConn == conn {
sess.readerConn = nil
}

View File

@@ -0,0 +1,36 @@
package executor
import (
"context"
"net/http"
"testing"
"github.com/tidwall/gjson"
)
func TestBuildCodexWebsocketRequestBodyPreservesPreviousResponseID(t *testing.T) {
body := []byte(`{"model":"gpt-5-codex","previous_response_id":"resp-1","input":[{"type":"message","id":"msg-1"}]}`)
wsReqBody := buildCodexWebsocketRequestBody(body)
if got := gjson.GetBytes(wsReqBody, "type").String(); got != "response.create" {
t.Fatalf("type = %s, want response.create", got)
}
if got := gjson.GetBytes(wsReqBody, "previous_response_id").String(); got != "resp-1" {
t.Fatalf("previous_response_id = %s, want resp-1", got)
}
if gjson.GetBytes(wsReqBody, "input.0.id").String() != "msg-1" {
t.Fatalf("input item id mismatch")
}
if got := gjson.GetBytes(wsReqBody, "type").String(); got == "response.append" {
t.Fatalf("unexpected websocket request type: %s", got)
}
}
func TestApplyCodexWebsocketHeadersDefaultsToCurrentResponsesBeta(t *testing.T) {
headers := applyCodexWebsocketHeaders(context.Background(), http.Header{}, nil, "")
if got := headers.Get("OpenAI-Beta"); got != codexResponsesWebsocketBetaHeaderValue {
t.Fatalf("OpenAI-Beta = %s, want %s", got, codexResponsesWebsocketBetaHeaderValue)
}
}

View File

@@ -14,6 +14,7 @@ import (
"github.com/google/uuid"
copilotauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/copilot"
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -490,18 +491,46 @@ func (e *GitHubCopilotExecutor) applyHeaders(r *http.Request, apiToken string, b
r.Header.Set("X-Request-Id", uuid.NewString())
initiator := "user"
if len(body) > 0 {
if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
for _, msg := range messages.Array() {
role := msg.Get("role").String()
if role == "assistant" || role == "tool" {
initiator = "agent"
break
}
if role := detectLastConversationRole(body); role == "assistant" || role == "tool" {
initiator = "agent"
}
r.Header.Set("X-Initiator", initiator)
}
func detectLastConversationRole(body []byte) string {
if len(body) == 0 {
return ""
}
if messages := gjson.GetBytes(body, "messages"); messages.Exists() && messages.IsArray() {
arr := messages.Array()
for i := len(arr) - 1; i >= 0; i-- {
if role := arr[i].Get("role").String(); role != "" {
return role
}
}
}
r.Header.Set("X-Initiator", initiator)
if inputs := gjson.GetBytes(body, "input"); inputs.Exists() && inputs.IsArray() {
arr := inputs.Array()
for i := len(arr) - 1; i >= 0; i-- {
item := arr[i]
// Most Responses input items carry a top-level role.
if role := item.Get("role").String(); role != "" {
return role
}
switch item.Get("type").String() {
case "function_call", "function_call_arguments":
return "assistant"
case "function_call_output", "function_call_response", "tool_result":
return "tool"
}
}
}
return ""
}
// detectVisionContent checks if the request body contains vision/image content.
@@ -1236,3 +1265,99 @@ func translateGitHubCopilotResponsesStreamToClaude(line []byte, param *any) []st
func isHTTPSuccess(statusCode int) bool {
return statusCode >= 200 && statusCode < 300
}
const (
// defaultCopilotContextLength is the default context window for unknown Copilot models.
defaultCopilotContextLength = 128000
// defaultCopilotMaxCompletionTokens is the default max output tokens for unknown Copilot models.
defaultCopilotMaxCompletionTokens = 16384
)
// FetchGitHubCopilotModels dynamically fetches available models from the GitHub Copilot API.
// It exchanges the GitHub access token stored in auth.Metadata for a Copilot API token,
// then queries the /models endpoint. Falls back to the static registry on any failure.
func FetchGitHubCopilotModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *config.Config) []*registry.ModelInfo {
if auth == nil {
log.Debug("github-copilot: auth is nil, using static models")
return registry.GetGitHubCopilotModels()
}
accessToken := metaStringValue(auth.Metadata, "access_token")
if accessToken == "" {
log.Debug("github-copilot: no access_token in auth metadata, using static models")
return registry.GetGitHubCopilotModels()
}
copilotAuth := copilotauth.NewCopilotAuth(cfg)
entries, err := copilotAuth.ListModelsWithGitHubToken(ctx, accessToken)
if err != nil {
log.Warnf("github-copilot: failed to fetch dynamic models: %v, using static models", err)
return registry.GetGitHubCopilotModels()
}
if len(entries) == 0 {
log.Debug("github-copilot: API returned no models, using static models")
return registry.GetGitHubCopilotModels()
}
// Build a lookup from the static definitions so we can enrich dynamic entries
// with known context lengths, thinking support, etc.
staticMap := make(map[string]*registry.ModelInfo)
for _, m := range registry.GetGitHubCopilotModels() {
staticMap[m.ID] = m
}
now := time.Now().Unix()
models := make([]*registry.ModelInfo, 0, len(entries))
seen := make(map[string]struct{}, len(entries))
for _, entry := range entries {
if entry.ID == "" {
continue
}
// Deduplicate model IDs to avoid incorrect reference counting.
if _, dup := seen[entry.ID]; dup {
continue
}
seen[entry.ID] = struct{}{}
m := &registry.ModelInfo{
ID: entry.ID,
Object: "model",
Created: now,
OwnedBy: "github-copilot",
Type: "github-copilot",
}
if entry.Created > 0 {
m.Created = entry.Created
}
if entry.Name != "" {
m.DisplayName = entry.Name
} else {
m.DisplayName = entry.ID
}
// Merge known metadata from the static fallback list
if static, ok := staticMap[entry.ID]; ok {
if m.DisplayName == entry.ID && static.DisplayName != "" {
m.DisplayName = static.DisplayName
}
m.Description = static.Description
m.ContextLength = static.ContextLength
m.MaxCompletionTokens = static.MaxCompletionTokens
m.SupportedEndpoints = static.SupportedEndpoints
m.Thinking = static.Thinking
} else {
// Sensible defaults for models not in the static list
m.Description = entry.ID + " via GitHub Copilot"
m.ContextLength = defaultCopilotContextLength
m.MaxCompletionTokens = defaultCopilotMaxCompletionTokens
}
models = append(models, m)
}
log.Infof("github-copilot: fetched %d models from API", len(models))
return models
}

View File

@@ -262,15 +262,15 @@ func TestApplyHeaders_XInitiator_UserOnly(t *testing.T) {
}
}
func TestApplyHeaders_XInitiator_AgentWithAssistantAndUserToolResult(t *testing.T) {
func TestApplyHeaders_XInitiator_UserWhenLastRoleIsUser(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
// Claude Code typical flow: last message is user (tool result), but has assistant in history
// Last role governs the initiator decision.
body := []byte(`{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"I will read the file"},{"role":"user","content":"tool result here"}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want agent (assistant exists in messages)", got)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
}
}
@@ -285,6 +285,39 @@ func TestApplyHeaders_XInitiator_AgentWithToolRole(t *testing.T) {
}
}
func TestApplyHeaders_XInitiator_InputArrayLastAssistantMessage(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
body := []byte(`{"input":[{"type":"message","role":"user","content":[{"type":"input_text","text":"Hi"}]},{"type":"message","role":"assistant","content":[{"type":"output_text","text":"Hello"}]}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want agent (last role is assistant)", got)
}
}
func TestApplyHeaders_XInitiator_InputArrayLastUserMessage(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
body := []byte(`{"input":[{"type":"message","role":"assistant","content":[{"type":"output_text","text":"I can help"}]},{"type":"message","role":"user","content":[{"type":"input_text","text":"Do X"}]}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "user" {
t.Fatalf("X-Initiator = %q, want user (last role is user)", got)
}
}
func TestApplyHeaders_XInitiator_InputArrayLastFunctionCallOutput(t *testing.T) {
t.Parallel()
e := &GitHubCopilotExecutor{}
req, _ := http.NewRequest(http.MethodPost, "https://example.com", nil)
body := []byte(`{"input":[{"type":"message","role":"user","content":[{"type":"input_text","text":"Use tool"}]},{"type":"function_call","call_id":"c1","name":"Read","arguments":"{}"},{"type":"function_call_output","call_id":"c1","output":"ok"}]}`)
e.applyHeaders(req, "token", body)
if got := req.Header.Get("X-Initiator"); got != "agent" {
t.Fatalf("X-Initiator = %q, want agent (last item maps to tool role)", got)
}
}
// --- Tests for x-github-api-version header (Problem M) ---
func TestApplyHeaders_GitHubAPIVersion(t *testing.T) {

View File

@@ -2458,7 +2458,6 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
reader := bufio.NewReaderSize(body, 20*1024*1024) // 20MB buffer to match other providers
var totalUsage usage.Detail
var hasToolUses bool // Track if any tool uses were emitted
var hasTruncatedTools bool // Track if any tool uses were truncated
var upstreamStopReason string // Track stop_reason from upstream events
// Tool use state tracking for input buffering and deduplication
@@ -3286,59 +3285,9 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
// Emit completed tool uses
for _, tu := range completedToolUses {
// Check if this tool was truncated - emit with SOFT_LIMIT_REACHED marker
// Skip truncated tools - don't emit fake marker tool_use
if tu.IsTruncated {
hasTruncatedTools = true
log.Infof("kiro: streamToChannel emitting truncated tool with SOFT_LIMIT_REACHED: %s (ID: %s)", tu.Name, tu.ToolUseID)
// Close text block if open
if isTextBlockOpen && contentBlockIndex >= 0 {
blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex)
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam)
for _, chunk := range sseData {
if chunk != "" {
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
isTextBlockOpen = false
}
contentBlockIndex++
// Emit tool_use with SOFT_LIMIT_REACHED marker input
blockStart := kiroclaude.BuildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", tu.ToolUseID, tu.Name)
sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
for _, chunk := range sseData {
if chunk != "" {
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
// Build SOFT_LIMIT_REACHED marker input
markerInput := map[string]interface{}{
"_status": "SOFT_LIMIT_REACHED",
"_message": "Tool output was truncated. Split content into smaller chunks (max 300 lines). Due to potential model hallucination, you MUST re-fetch the current working directory and generate the correct file_path.",
}
markerJSON, _ := json.Marshal(markerInput)
inputDelta := kiroclaude.BuildClaudeInputJsonDeltaEvent(string(markerJSON), contentBlockIndex)
sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, inputDelta, &translatorParam)
for _, chunk := range sseData {
if chunk != "" {
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
// Close tool_use block
blockStop := kiroclaude.BuildClaudeContentBlockStopEvent(contentBlockIndex)
sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam)
for _, chunk := range sseData {
if chunk != "" {
out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
}
}
hasToolUses = true // Keep this so stop_reason = tool_use
log.Warnf("kiro: streamToChannel skipping truncated tool: %s (ID: %s)", tu.Name, tu.ToolUseID)
continue
}
@@ -3640,12 +3589,7 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
}
// Determine stop reason: prefer upstream, then detect tool_use, default to end_turn
// SOFT_LIMIT_REACHED: Keep stop_reason = "tool_use" so Claude continues the loop
stopReason := upstreamStopReason
if hasTruncatedTools {
// Log that we're using SOFT_LIMIT_REACHED approach
log.Infof("kiro: streamToChannel using SOFT_LIMIT_REACHED - keeping stop_reason=tool_use for truncated tools")
}
if stopReason == "" {
if hasToolUses {
stopReason = "tool_use"

View File

@@ -431,6 +431,33 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
out, _ = sjson.SetRaw(out, "request.tools", toolsJSON)
}
// tool_choice
toolChoiceResult := gjson.GetBytes(rawJSON, "tool_choice")
if toolChoiceResult.Exists() {
toolChoiceType := ""
toolChoiceName := ""
if toolChoiceResult.IsObject() {
toolChoiceType = toolChoiceResult.Get("type").String()
toolChoiceName = toolChoiceResult.Get("name").String()
} else if toolChoiceResult.Type == gjson.String {
toolChoiceType = toolChoiceResult.String()
}
switch toolChoiceType {
case "auto":
out, _ = sjson.Set(out, "request.toolConfig.functionCallingConfig.mode", "AUTO")
case "none":
out, _ = sjson.Set(out, "request.toolConfig.functionCallingConfig.mode", "NONE")
case "any":
out, _ = sjson.Set(out, "request.toolConfig.functionCallingConfig.mode", "ANY")
case "tool":
out, _ = sjson.Set(out, "request.toolConfig.functionCallingConfig.mode", "ANY")
if toolChoiceName != "" {
out, _ = sjson.Set(out, "request.toolConfig.functionCallingConfig.allowedFunctionNames", []string{toolChoiceName})
}
}
}
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
if t := gjson.GetBytes(rawJSON, "thinking"); enableThoughtTranslate && t.Exists() && t.IsObject() {
switch t.Get("type").String() {
@@ -441,9 +468,22 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
}
case "adaptive", "auto":
// Keep adaptive/auto as a high level sentinel; ApplyThinking resolves it
// to model-specific max capability.
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingLevel", "high")
// For adaptive thinking:
// - If output_config.effort is explicitly present, pass through as thinkingLevel.
// - Otherwise, treat it as "enabled with target-model maximum" and emit high.
// ApplyThinking handles clamping to target model's supported levels.
effort := ""
if v := gjson.GetBytes(rawJSON, "output_config.effort"); v.Exists() && v.Type == gjson.String {
effort = strings.ToLower(strings.TrimSpace(v.String()))
}
if effort != "" {
if effort == "max" {
effort = "high"
}
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingLevel", effort)
} else {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingLevel", "high")
}
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
}
}

View File

@@ -193,6 +193,42 @@ func TestConvertClaudeRequestToAntigravity_ToolDeclarations(t *testing.T) {
}
}
func TestConvertClaudeRequestToAntigravity_ToolChoice_SpecificTool(t *testing.T) {
inputJSON := []byte(`{
"model": "gemini-3-flash-preview",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "hi"}
]
}
],
"tools": [
{
"name": "json",
"description": "A JSON tool",
"input_schema": {
"type": "object",
"properties": {}
}
}
],
"tool_choice": {"type": "tool", "name": "json"}
}`)
output := ConvertClaudeRequestToAntigravity("gemini-3-flash-preview", inputJSON, false)
outputStr := string(output)
if got := gjson.Get(outputStr, "request.toolConfig.functionCallingConfig.mode").String(); got != "ANY" {
t.Fatalf("Expected toolConfig.functionCallingConfig.mode 'ANY', got '%s'", got)
}
allowed := gjson.Get(outputStr, "request.toolConfig.functionCallingConfig.allowedFunctionNames").Array()
if len(allowed) != 1 || allowed[0].String() != "json" {
t.Fatalf("Expected allowedFunctionNames ['json'], got %s", gjson.Get(outputStr, "request.toolConfig.functionCallingConfig.allowedFunctionNames").Raw)
}
}
func TestConvertClaudeRequestToAntigravity_ToolUse(t *testing.T) {
inputJSON := []byte(`{
"model": "claude-3-5-sonnet-20240620",
@@ -1199,3 +1235,64 @@ func TestConvertClaudeRequestToAntigravity_ToolAndThinking_NoExistingSystem(t *t
t.Errorf("Interleaved thinking hint should be in created systemInstruction, got: %v", sysInstruction.Raw)
}
}
func TestConvertClaudeRequestToAntigravity_AdaptiveThinking_EffortLevels(t *testing.T) {
tests := []struct {
name string
effort string
expected string
}{
{"low", "low", "low"},
{"medium", "medium", "medium"},
{"high", "high", "high"},
{"max", "max", "high"},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
inputJSON := []byte(`{
"model": "claude-opus-4-6-thinking",
"messages": [{"role": "user", "content": [{"type": "text", "text": "Hello"}]}],
"thinking": {"type": "adaptive"},
"output_config": {"effort": "` + tt.effort + `"}
}`)
output := ConvertClaudeRequestToAntigravity("claude-opus-4-6-thinking", inputJSON, false)
outputStr := string(output)
thinkingConfig := gjson.Get(outputStr, "request.generationConfig.thinkingConfig")
if !thinkingConfig.Exists() {
t.Fatal("thinkingConfig should exist for adaptive thinking")
}
if thinkingConfig.Get("thinkingLevel").String() != tt.expected {
t.Errorf("Expected thinkingLevel %q, got %q", tt.expected, thinkingConfig.Get("thinkingLevel").String())
}
if !thinkingConfig.Get("includeThoughts").Bool() {
t.Error("includeThoughts should be true")
}
})
}
}
func TestConvertClaudeRequestToAntigravity_AdaptiveThinking_NoEffort(t *testing.T) {
inputJSON := []byte(`{
"model": "claude-opus-4-6-thinking",
"messages": [{"role": "user", "content": [{"type": "text", "text": "Hello"}]}],
"thinking": {"type": "adaptive"}
}`)
output := ConvertClaudeRequestToAntigravity("claude-opus-4-6-thinking", inputJSON, false)
outputStr := string(output)
thinkingConfig := gjson.Get(outputStr, "request.generationConfig.thinkingConfig")
if !thinkingConfig.Exists() {
t.Fatal("thinkingConfig should exist for adaptive thinking without effort")
}
if thinkingConfig.Get("thinkingLevel").String() != "high" {
t.Errorf("Expected default thinkingLevel \"high\", got %q", thinkingConfig.Get("thinkingLevel").String())
}
if !thinkingConfig.Get("includeThoughts").Bool() {
t.Error("includeThoughts should be true")
}
}

View File

@@ -255,6 +255,8 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
tool, _ = sjson.SetRaw(tool, "parameters", normalizeToolParameters(toolResult.Get("input_schema").Raw))
tool, _ = sjson.Delete(tool, "input_schema")
tool, _ = sjson.Delete(tool, "parameters.$schema")
tool, _ = sjson.Delete(tool, "cache_control")
tool, _ = sjson.Delete(tool, "defer_loading")
tool, _ = sjson.Set(tool, "strict", false)
template, _ = sjson.SetRaw(template, "tools.-1", tool)
}

View File

@@ -74,8 +74,13 @@ func ConvertCodexResponseToOpenAI(_ context.Context, modelName string, originalR
}
// Extract and set the model version.
cachedModel := (*param).(*ConvertCliToOpenAIParams).Model
if modelResult := gjson.GetBytes(rawJSON, "model"); modelResult.Exists() {
template, _ = sjson.Set(template, "model", modelResult.String())
} else if cachedModel != "" {
template, _ = sjson.Set(template, "model", cachedModel)
} else if modelName != "" {
template, _ = sjson.Set(template, "model", modelName)
}
template, _ = sjson.Set(template, "created", (*param).(*ConvertCliToOpenAIParams).CreatedAt)

View File

@@ -0,0 +1,47 @@
package chat_completions
import (
"context"
"testing"
"github.com/tidwall/gjson"
)
func TestConvertCodexResponseToOpenAI_StreamSetsModelFromResponseCreated(t *testing.T) {
ctx := context.Background()
var param any
modelName := "gpt-5.3-codex"
out := ConvertCodexResponseToOpenAI(ctx, modelName, nil, nil, []byte(`data: {"type":"response.created","response":{"id":"resp_123","created_at":1700000000,"model":"gpt-5.3-codex"}}`), &param)
if len(out) != 0 {
t.Fatalf("expected no output for response.created, got %d chunks", len(out))
}
out = ConvertCodexResponseToOpenAI(ctx, modelName, nil, nil, []byte(`data: {"type":"response.output_text.delta","delta":"hello"}`), &param)
if len(out) != 1 {
t.Fatalf("expected 1 chunk, got %d", len(out))
}
gotModel := gjson.Get(out[0], "model").String()
if gotModel != modelName {
t.Fatalf("expected model %q, got %q", modelName, gotModel)
}
}
func TestConvertCodexResponseToOpenAI_FirstChunkUsesRequestModelName(t *testing.T) {
ctx := context.Background()
var param any
modelName := "gpt-5.3-codex"
out := ConvertCodexResponseToOpenAI(ctx, modelName, nil, nil, []byte(`data: {"type":"response.output_text.delta","delta":"hello"}`), &param)
if len(out) != 1 {
t.Fatalf("expected 1 chunk, got %d", len(out))
}
gotModel := gjson.Get(out[0], "model").String()
if gotModel != modelName {
t.Fatalf("expected model %q, got %q", modelName, gotModel)
}
}

View File

@@ -25,7 +25,12 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte,
rawJSON, _ = sjson.DeleteBytes(rawJSON, "max_completion_tokens")
rawJSON, _ = sjson.DeleteBytes(rawJSON, "temperature")
rawJSON, _ = sjson.DeleteBytes(rawJSON, "top_p")
rawJSON, _ = sjson.DeleteBytes(rawJSON, "service_tier")
if v := gjson.GetBytes(rawJSON, "service_tier"); v.Exists() {
if v.String() != "priority" {
rawJSON, _ = sjson.DeleteBytes(rawJSON, "service_tier")
}
}
rawJSON, _ = sjson.DeleteBytes(rawJSON, "truncation")
rawJSON = applyResponsesCompactionCompatibility(rawJSON)

View File

@@ -6,24 +6,14 @@ import (
"fmt"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
// ConvertCodexResponseToOpenAIResponses converts OpenAI Chat Completions streaming chunks
// to OpenAI Responses SSE events (response.*).
func ConvertCodexResponseToOpenAIResponses(ctx context.Context, modelName string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) []string {
func ConvertCodexResponseToOpenAIResponses(_ context.Context, _ string, _, _, rawJSON []byte, _ *any) []string {
if bytes.HasPrefix(rawJSON, []byte("data:")) {
rawJSON = bytes.TrimSpace(rawJSON[5:])
if typeResult := gjson.GetBytes(rawJSON, "type"); typeResult.Exists() {
typeStr := typeResult.String()
if typeStr == "response.created" || typeStr == "response.in_progress" || typeStr == "response.completed" {
if gjson.GetBytes(rawJSON, "response.instructions").Exists() {
instructions := gjson.GetBytes(originalRequestRawJSON, "instructions").String()
rawJSON, _ = sjson.SetBytes(rawJSON, "response.instructions", instructions)
}
}
}
out := fmt.Sprintf("data: %s", string(rawJSON))
return []string{out}
}
@@ -32,17 +22,12 @@ func ConvertCodexResponseToOpenAIResponses(ctx context.Context, modelName string
// ConvertCodexResponseToOpenAIResponsesNonStream builds a single Responses JSON
// from a non-streaming OpenAI Chat Completions response.
func ConvertCodexResponseToOpenAIResponsesNonStream(_ context.Context, modelName string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
func ConvertCodexResponseToOpenAIResponsesNonStream(_ context.Context, _ string, _, _, rawJSON []byte, _ *any) string {
rootResult := gjson.ParseBytes(rawJSON)
// Verify this is a response.completed event
if rootResult.Get("type").String() != "response.completed" {
return ""
}
responseResult := rootResult.Get("response")
template := responseResult.Raw
if responseResult.Get("instructions").Exists() {
instructions := gjson.GetBytes(originalRequestRawJSON, "instructions").String()
template, _ = sjson.Set(template, "instructions", instructions)
}
return template
return responseResult.Raw
}

View File

@@ -156,6 +156,7 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
tool, _ = sjson.Delete(tool, "input_examples")
tool, _ = sjson.Delete(tool, "type")
tool, _ = sjson.Delete(tool, "cache_control")
tool, _ = sjson.Delete(tool, "defer_loading")
if gjson.Valid(tool) && gjson.Parse(tool).IsObject() {
if !hasTools {
out, _ = sjson.SetRaw(out, "request.tools", `[{"functionDeclarations":[]}]`)
@@ -171,6 +172,33 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
}
}
// tool_choice
toolChoiceResult := gjson.GetBytes(rawJSON, "tool_choice")
if toolChoiceResult.Exists() {
toolChoiceType := ""
toolChoiceName := ""
if toolChoiceResult.IsObject() {
toolChoiceType = toolChoiceResult.Get("type").String()
toolChoiceName = toolChoiceResult.Get("name").String()
} else if toolChoiceResult.Type == gjson.String {
toolChoiceType = toolChoiceResult.String()
}
switch toolChoiceType {
case "auto":
out, _ = sjson.Set(out, "request.toolConfig.functionCallingConfig.mode", "AUTO")
case "none":
out, _ = sjson.Set(out, "request.toolConfig.functionCallingConfig.mode", "NONE")
case "any":
out, _ = sjson.Set(out, "request.toolConfig.functionCallingConfig.mode", "ANY")
case "tool":
out, _ = sjson.Set(out, "request.toolConfig.functionCallingConfig.mode", "ANY")
if toolChoiceName != "" {
out, _ = sjson.Set(out, "request.toolConfig.functionCallingConfig.allowedFunctionNames", []string{toolChoiceName})
}
}
}
// Map Anthropic thinking -> Gemini CLI thinkingConfig when enabled
// Translator only does format conversion, ApplyThinking handles model capability validation.
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {

View File

@@ -0,0 +1,42 @@
package claude
import (
"testing"
"github.com/tidwall/gjson"
)
func TestConvertClaudeRequestToCLI_ToolChoice_SpecificTool(t *testing.T) {
inputJSON := []byte(`{
"model": "gemini-3-flash-preview",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "hi"}
]
}
],
"tools": [
{
"name": "json",
"description": "A JSON tool",
"input_schema": {
"type": "object",
"properties": {}
}
}
],
"tool_choice": {"type": "tool", "name": "json"}
}`)
output := ConvertClaudeRequestToCLI("gemini-3-flash-preview", inputJSON, false)
if got := gjson.GetBytes(output, "request.toolConfig.functionCallingConfig.mode").String(); got != "ANY" {
t.Fatalf("Expected request.toolConfig.functionCallingConfig.mode 'ANY', got '%s'", got)
}
allowed := gjson.GetBytes(output, "request.toolConfig.functionCallingConfig.allowedFunctionNames").Array()
if len(allowed) != 1 || allowed[0].String() != "json" {
t.Fatalf("Expected allowedFunctionNames ['json'], got %s", gjson.GetBytes(output, "request.toolConfig.functionCallingConfig.allowedFunctionNames").Raw)
}
}

View File

@@ -85,6 +85,11 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
case "tool_use":
functionName := contentResult.Get("name").String()
if toolUseID := contentResult.Get("id").String(); toolUseID != "" {
if derived := toolNameFromClaudeToolUseID(toolUseID); derived != "" {
functionName = derived
}
}
functionArgs := contentResult.Get("input").String()
argsResult := gjson.Parse(functionArgs)
if argsResult.IsObject() && gjson.Valid(functionArgs) {
@@ -100,10 +105,9 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
if toolCallID == "" {
return true
}
funcName := toolCallID
toolCallIDs := strings.Split(toolCallID, "-")
if len(toolCallIDs) > 1 {
funcName = strings.Join(toolCallIDs[0:len(toolCallIDs)-1], "-")
funcName := toolNameFromClaudeToolUseID(toolCallID)
if funcName == "" {
funcName = toolCallID
}
responseData := contentResult.Get("content").Raw
part := `{"functionResponse":{"name":"","response":{"result":""}}}`
@@ -137,6 +141,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
tool, _ = sjson.Delete(tool, "input_examples")
tool, _ = sjson.Delete(tool, "type")
tool, _ = sjson.Delete(tool, "cache_control")
tool, _ = sjson.Delete(tool, "defer_loading")
if gjson.Valid(tool) && gjson.Parse(tool).IsObject() {
if !hasTools {
out, _ = sjson.SetRaw(out, "tools", `[{"functionDeclarations":[]}]`)
@@ -152,6 +157,33 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
}
}
// tool_choice
toolChoiceResult := gjson.GetBytes(rawJSON, "tool_choice")
if toolChoiceResult.Exists() {
toolChoiceType := ""
toolChoiceName := ""
if toolChoiceResult.IsObject() {
toolChoiceType = toolChoiceResult.Get("type").String()
toolChoiceName = toolChoiceResult.Get("name").String()
} else if toolChoiceResult.Type == gjson.String {
toolChoiceType = toolChoiceResult.String()
}
switch toolChoiceType {
case "auto":
out, _ = sjson.Set(out, "toolConfig.functionCallingConfig.mode", "AUTO")
case "none":
out, _ = sjson.Set(out, "toolConfig.functionCallingConfig.mode", "NONE")
case "any":
out, _ = sjson.Set(out, "toolConfig.functionCallingConfig.mode", "ANY")
case "tool":
out, _ = sjson.Set(out, "toolConfig.functionCallingConfig.mode", "ANY")
if toolChoiceName != "" {
out, _ = sjson.Set(out, "toolConfig.functionCallingConfig.allowedFunctionNames", []string{toolChoiceName})
}
}
}
// Map Anthropic thinking -> Gemini thinking config when enabled
// Translator only does format conversion, ApplyThinking handles model capability validation.
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
@@ -202,3 +234,11 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
return result
}
func toolNameFromClaudeToolUseID(toolUseID string) string {
parts := strings.Split(toolUseID, "-")
if len(parts) <= 1 {
return ""
}
return strings.Join(parts[0:len(parts)-1], "-")
}

View File

@@ -0,0 +1,42 @@
package claude
import (
"testing"
"github.com/tidwall/gjson"
)
func TestConvertClaudeRequestToGemini_ToolChoice_SpecificTool(t *testing.T) {
inputJSON := []byte(`{
"model": "gemini-3-flash-preview",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "hi"}
]
}
],
"tools": [
{
"name": "json",
"description": "A JSON tool",
"input_schema": {
"type": "object",
"properties": {}
}
}
],
"tool_choice": {"type": "tool", "name": "json"}
}`)
output := ConvertClaudeRequestToGemini("gemini-3-flash-preview", inputJSON, false)
if got := gjson.GetBytes(output, "toolConfig.functionCallingConfig.mode").String(); got != "ANY" {
t.Fatalf("Expected toolConfig.functionCallingConfig.mode 'ANY', got '%s'", got)
}
allowed := gjson.GetBytes(output, "toolConfig.functionCallingConfig.allowedFunctionNames").Array()
if len(allowed) != 1 || allowed[0].String() != "json" {
t.Fatalf("Expected allowedFunctionNames ['json'], got %s", gjson.GetBytes(output, "toolConfig.functionCallingConfig.allowedFunctionNames").Raw)
}
}

View File

@@ -12,8 +12,8 @@ import (
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
@@ -25,6 +25,8 @@ type Params struct {
ResponseType int
ResponseIndex int
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
ToolNameMap map[string]string
SawToolCall bool
}
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
@@ -53,6 +55,8 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
HasFirstResponse: false,
ResponseType: 0,
ResponseIndex: 0,
ToolNameMap: util.ToolNameMapFromClaudeRequest(originalRequestRawJSON),
SawToolCall: false,
}
}
@@ -66,8 +70,6 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
return []string{}
}
// Track whether tools are being used in this response chunk
usedTool := false
output := ""
// Initialize the streaming session with a message_start event
@@ -175,12 +177,13 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
} else if functionCallResult.Exists() {
// Handle function/tool calls from the AI model
// This processes tool usage requests and formats them for Claude API compatibility
usedTool = true
fcName := functionCallResult.Get("name").String()
(*param).(*Params).SawToolCall = true
upstreamToolName := functionCallResult.Get("name").String()
clientToolName := util.MapToolName((*param).(*Params).ToolNameMap, upstreamToolName)
// FIX: Handle streaming split/delta where name might be empty in subsequent chunks.
// If we are already in tool use mode and name is empty, treat as continuation (delta).
if (*param).(*Params).ResponseType == 3 && fcName == "" {
if (*param).(*Params).ResponseType == 3 && upstreamToolName == "" {
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
output = output + "event: content_block_delta\n"
data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"input_json_delta","partial_json":""}}`, (*param).(*Params).ResponseIndex), "delta.partial_json", fcArgsResult.Raw)
@@ -221,8 +224,8 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
// Create the tool use block with unique ID and function details
data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, (*param).(*Params).ResponseIndex)
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
data, _ = sjson.Set(data, "content_block.name", fcName)
data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", upstreamToolName, atomic.AddUint64(&toolUseIDCounter, 1)))
data, _ = sjson.Set(data, "content_block.name", clientToolName)
output = output + fmt.Sprintf("data: %s\n\n\n", data)
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
@@ -249,7 +252,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
output = output + `data: `
template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
if usedTool {
if (*param).(*Params).SawToolCall {
template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
} else if finish := gjson.GetBytes(rawJSON, "candidates.0.finishReason"); finish.Exists() && finish.String() == "MAX_TOKENS" {
template = `{"type":"message_delta","delta":{"stop_reason":"max_tokens","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
@@ -278,10 +281,10 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
// Returns:
// - string: A Claude-compatible JSON response.
func ConvertGeminiResponseToClaudeNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
_ = originalRequestRawJSON
_ = requestRawJSON
root := gjson.ParseBytes(rawJSON)
toolNameMap := util.ToolNameMapFromClaudeRequest(originalRequestRawJSON)
out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}`
out, _ = sjson.Set(out, "id", root.Get("responseId").String())
@@ -336,11 +339,12 @@ func ConvertGeminiResponseToClaudeNonStream(_ context.Context, _ string, origina
flushText()
hasToolCall = true
name := functionCall.Get("name").String()
upstreamToolName := functionCall.Get("name").String()
clientToolName := util.MapToolName(toolNameMap, upstreamToolName)
toolIDCounter++
toolBlock := `{"type":"tool_use","id":"","name":"","input":{}}`
toolBlock, _ = sjson.Set(toolBlock, "id", fmt.Sprintf("tool_%d", toolIDCounter))
toolBlock, _ = sjson.Set(toolBlock, "name", name)
toolBlock, _ = sjson.Set(toolBlock, "id", fmt.Sprintf("%s-%d", upstreamToolName, toolIDCounter))
toolBlock, _ = sjson.Set(toolBlock, "name", clientToolName)
inputRaw := "{}"
if args := functionCall.Get("args"); args.Exists() && gjson.Valid(args.Raw) && args.IsObject() {
inputRaw = args.Raw

View File

@@ -354,22 +354,7 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
funcDecl, _ = sjson.Set(funcDecl, "description", desc.String())
}
if params := tool.Get("parameters"); params.Exists() {
// Convert parameter types from OpenAI format to Gemini format
cleaned := params.Raw
// Convert type values to uppercase for Gemini
paramsResult := gjson.Parse(cleaned)
if properties := paramsResult.Get("properties"); properties.Exists() {
properties.ForEach(func(key, value gjson.Result) bool {
if propType := value.Get("type"); propType.Exists() {
upperType := strings.ToUpper(propType.String())
cleaned, _ = sjson.Set(cleaned, "properties."+key.String()+".type", upperType)
}
return true
})
}
// Set the overall type to OBJECT
cleaned, _ = sjson.Set(cleaned, "type", "OBJECT")
funcDecl, _ = sjson.SetRaw(funcDecl, "parametersJsonSchema", cleaned)
funcDecl, _ = sjson.SetRaw(funcDecl, "parametersJsonSchema", params.Raw)
}
geminiTools, _ = sjson.SetRaw(geminiTools, "0.functionDeclarations.-1", funcDecl)

View File

@@ -605,10 +605,6 @@ func convertClaudeToolsToKiro(tools gjson.Result) []KiroToolWrapper {
})
}
// Apply dynamic compression if total tools size exceeds threshold
// This prevents 500 errors when Claude Code sends too many tools
kiroTools = compressToolsIfNeeded(kiroTools)
return kiroTools
}
@@ -858,34 +854,7 @@ func BuildUserMessageStruct(msg gjson.Result, modelID, origin string) (KiroUserI
var textContents []KiroTextContent
// Check if this tool_result contains error from our SOFT_LIMIT_REACHED tool_use
// The client will return an error when trying to execute a tool with marker input
resultStr := resultContent.String()
isSoftLimitError := strings.Contains(resultStr, "SOFT_LIMIT_REACHED") ||
strings.Contains(resultStr, "_status") ||
strings.Contains(resultStr, "truncated") ||
strings.Contains(resultStr, "missing required") ||
strings.Contains(resultStr, "invalid input") ||
strings.Contains(resultStr, "Error writing file")
if isError && isSoftLimitError {
// Replace error content with SOFT_LIMIT_REACHED guidance
log.Infof("kiro: detected SOFT_LIMIT_REACHED in tool_result for %s, replacing with guidance", toolUseID)
softLimitMsg := `SOFT_LIMIT_REACHED
Your previous tool call was incomplete due to API output size limits.
The content was PARTIALLY transmitted but NOT executed.
REQUIRED ACTION:
1. Split your content into smaller chunks (max 300 lines per call)
2. For file writes: Create file with first chunk, then use append for remaining
3. Do NOT regenerate content you already attempted - continue from where you stopped
STATUS: This is NOT an error. Continue with smaller chunks.`
textContents = append(textContents, KiroTextContent{Text: softLimitMsg})
// Mark as SUCCESS so Claude doesn't treat it as a failure
isError = false
} else if resultContent.IsArray() {
if resultContent.IsArray() {
for _, item := range resultContent.Array() {
if item.Get("type").String() == "text" {
textContents = append(textContents, KiroTextContent{Text: item.Get("text").String()})

View File

@@ -55,39 +55,18 @@ func BuildClaudeResponse(content string, toolUses []KiroToolUse, model string, u
}
}
// Add tool_use blocks - emit truncated tools with SOFT_LIMIT_REACHED marker
hasTruncatedTools := false
// Add tool_use blocks - skip truncated tools and log warning
for _, toolUse := range toolUses {
if toolUse.IsTruncated && toolUse.TruncationInfo != nil {
// Emit tool_use with SOFT_LIMIT_REACHED marker input
hasTruncatedTools = true
log.Infof("kiro: buildClaudeResponse emitting truncated tool with SOFT_LIMIT_REACHED: %s (ID: %s)", toolUse.Name, toolUse.ToolUseID)
markerInput := map[string]interface{}{
"_status": "SOFT_LIMIT_REACHED",
"_message": "Tool output was truncated. Split content into smaller chunks (max 300 lines). Due to potential model hallucination, you MUST re-fetch the current working directory and generate the correct file_path.",
}
contentBlocks = append(contentBlocks, map[string]interface{}{
"type": "tool_use",
"id": toolUse.ToolUseID,
"name": toolUse.Name,
"input": markerInput,
})
} else {
// Normal tool use
contentBlocks = append(contentBlocks, map[string]interface{}{
"type": "tool_use",
"id": toolUse.ToolUseID,
"name": toolUse.Name,
"input": toolUse.Input,
})
log.Warnf("kiro: buildClaudeResponse skipping truncated tool: %s (ID: %s)", toolUse.Name, toolUse.ToolUseID)
continue
}
}
// Log if we used SOFT_LIMIT_REACHED
if hasTruncatedTools {
log.Infof("kiro: buildClaudeResponse using SOFT_LIMIT_REACHED - keeping stop_reason=tool_use")
contentBlocks = append(contentBlocks, map[string]interface{}{
"type": "tool_use",
"id": toolUse.ToolUseID,
"name": toolUse.Name,
"input": toolUse.Input,
})
}
// Ensure at least one content block (Claude API requires non-empty content)

View File

@@ -192,8 +192,8 @@ func AnalyzeBufferedStream(chunks [][]byte) BufferedStreamResult {
if idx, ok := event["index"].(float64); ok {
currentToolIndex = int(idx)
}
// Capture tool use ID for toolResults handshake
if id, ok := cb["id"].(string); ok {
// Capture tool use ID only for web_search toolResults handshake
if id, ok := cb["id"].(string); ok && (currentToolName == "web_search" || currentToolName == "remote_web_search") {
result.WebSearchToolUseId = id
}
toolInputBuilder.Reset()

View File

@@ -1,191 +0,0 @@
// Package claude provides tool compression functionality for Kiro translator.
// This file implements dynamic tool compression to reduce tool payload size
// when it exceeds the target threshold, preventing 500 errors from Kiro API.
package claude
import (
"encoding/json"
"unicode/utf8"
kirocommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/kiro/common"
log "github.com/sirupsen/logrus"
)
// calculateToolsSize calculates the JSON serialized size of the tools list.
// Returns the size in bytes.
func calculateToolsSize(tools []KiroToolWrapper) int {
if len(tools) == 0 {
return 0
}
data, err := json.Marshal(tools)
if err != nil {
log.Warnf("kiro: failed to marshal tools for size calculation: %v", err)
return 0
}
return len(data)
}
// simplifyInputSchema simplifies the input_schema by keeping only essential fields:
// type, enum, required. Recursively processes nested properties.
func simplifyInputSchema(schema interface{}) interface{} {
if schema == nil {
return nil
}
schemaMap, ok := schema.(map[string]interface{})
if !ok {
return schema
}
simplified := make(map[string]interface{})
// Keep essential fields
if t, ok := schemaMap["type"]; ok {
simplified["type"] = t
}
if enum, ok := schemaMap["enum"]; ok {
simplified["enum"] = enum
}
if required, ok := schemaMap["required"]; ok {
simplified["required"] = required
}
// Recursively process properties
if properties, ok := schemaMap["properties"].(map[string]interface{}); ok {
simplifiedProps := make(map[string]interface{})
for key, value := range properties {
simplifiedProps[key] = simplifyInputSchema(value)
}
simplified["properties"] = simplifiedProps
}
// Process items for array types
if items, ok := schemaMap["items"]; ok {
simplified["items"] = simplifyInputSchema(items)
}
// Process additionalProperties if present
if additionalProps, ok := schemaMap["additionalProperties"]; ok {
simplified["additionalProperties"] = simplifyInputSchema(additionalProps)
}
// Process anyOf, oneOf, allOf
for _, key := range []string{"anyOf", "oneOf", "allOf"} {
if arr, ok := schemaMap[key].([]interface{}); ok {
simplifiedArr := make([]interface{}, len(arr))
for i, item := range arr {
simplifiedArr[i] = simplifyInputSchema(item)
}
simplified[key] = simplifiedArr
}
}
return simplified
}
// compressToolDescription compresses a description to the target length.
// Ensures the result is at least MinToolDescriptionLength characters.
// Uses UTF-8 safe truncation.
func compressToolDescription(description string, targetLength int) string {
if targetLength < kirocommon.MinToolDescriptionLength {
targetLength = kirocommon.MinToolDescriptionLength
}
if len(description) <= targetLength {
return description
}
// Find a safe truncation point (UTF-8 boundary)
truncLen := targetLength - 3 // Leave room for "..."
// Ensure we don't cut in the middle of a UTF-8 character
for truncLen > 0 && !utf8.RuneStart(description[truncLen]) {
truncLen--
}
if truncLen <= 0 {
return description[:kirocommon.MinToolDescriptionLength]
}
return description[:truncLen] + "..."
}
// compressToolsIfNeeded compresses tools if their total size exceeds the target threshold.
// Compression strategy:
// 1. First, check if compression is needed (size > ToolCompressionTargetSize)
// 2. Step 1: Simplify input_schema (keep only type/enum/required)
// 3. Step 2: Proportionally compress descriptions (minimum MinToolDescriptionLength chars)
// Returns the compressed tools list.
func compressToolsIfNeeded(tools []KiroToolWrapper) []KiroToolWrapper {
if len(tools) == 0 {
return tools
}
originalSize := calculateToolsSize(tools)
if originalSize <= kirocommon.ToolCompressionTargetSize {
log.Debugf("kiro: tools size %d bytes is within target %d bytes, no compression needed",
originalSize, kirocommon.ToolCompressionTargetSize)
return tools
}
log.Infof("kiro: tools size %d bytes exceeds target %d bytes, starting compression",
originalSize, kirocommon.ToolCompressionTargetSize)
// Create a copy of tools to avoid modifying the original
compressedTools := make([]KiroToolWrapper, len(tools))
for i, tool := range tools {
compressedTools[i] = KiroToolWrapper{
ToolSpecification: KiroToolSpecification{
Name: tool.ToolSpecification.Name,
Description: tool.ToolSpecification.Description,
InputSchema: KiroInputSchema{JSON: tool.ToolSpecification.InputSchema.JSON},
},
}
}
// Step 1: Simplify input_schema
for i := range compressedTools {
compressedTools[i].ToolSpecification.InputSchema.JSON =
simplifyInputSchema(compressedTools[i].ToolSpecification.InputSchema.JSON)
}
sizeAfterSchemaSimplification := calculateToolsSize(compressedTools)
log.Debugf("kiro: size after schema simplification: %d bytes (reduced by %d bytes)",
sizeAfterSchemaSimplification, originalSize-sizeAfterSchemaSimplification)
// Check if we're within target after schema simplification
if sizeAfterSchemaSimplification <= kirocommon.ToolCompressionTargetSize {
log.Infof("kiro: compression complete after schema simplification, final size: %d bytes",
sizeAfterSchemaSimplification)
return compressedTools
}
// Step 2: Compress descriptions proportionally
sizeToReduce := float64(sizeAfterSchemaSimplification - kirocommon.ToolCompressionTargetSize)
var totalDescLen float64
for _, tool := range compressedTools {
totalDescLen += float64(len(tool.ToolSpecification.Description))
}
if totalDescLen > 0 {
// Assume size reduction comes primarily from descriptions.
keepRatio := 1.0 - (sizeToReduce / totalDescLen)
if keepRatio > 1.0 {
keepRatio = 1.0
} else if keepRatio < 0 {
keepRatio = 0
}
for i := range compressedTools {
desc := compressedTools[i].ToolSpecification.Description
targetLen := int(float64(len(desc)) * keepRatio)
compressedTools[i].ToolSpecification.Description = compressToolDescription(desc, targetLen)
}
}
finalSize := calculateToolsSize(compressedTools)
log.Infof("kiro: compression complete, original: %d bytes, final: %d bytes (%.1f%% reduction)",
originalSize, finalSize, float64(originalSize-finalSize)/float64(originalSize)*100)
return compressedTools
}

View File

@@ -84,13 +84,18 @@ func DetectTruncation(toolName, toolUseID, rawInput string, parsedInput map[stri
ParsedFields: make(map[string]string),
}
// Scenario 1: Empty input buffer - no data received at all
// Scenario 1: Empty input buffer - only flag as truncation if tool has required fields
// Many tools (e.g. TaskList, TaskGet) have no required params, so empty input is valid
if strings.TrimSpace(rawInput) == "" {
info.IsTruncated = true
info.TruncationType = TruncationTypeEmptyInput
info.ErrorMessage = "Tool input was completely empty - API response may have been truncated before tool parameters were transmitted"
log.Warnf("kiro: truncation detected [%s] for tool %s (ID: %s): empty input buffer",
info.TruncationType, toolName, toolUseID)
if _, hasRequirements := RequiredFieldsByTool[toolName]; hasRequirements {
info.IsTruncated = true
info.TruncationType = TruncationTypeEmptyInput
info.ErrorMessage = "Tool input was completely empty - API response may have been truncated before tool parameters were transmitted"
log.Warnf("kiro: truncation detected [%s] for tool %s (ID: %s): empty input buffer",
info.TruncationType, toolName, toolUseID)
return info
}
log.Debugf("kiro: empty input for tool %s (ID: %s) - no required fields, treating as valid", toolName, toolUseID)
return info
}
@@ -342,7 +347,7 @@ func buildTruncationErrorMessage(toolName, truncationType string, parsedFields m
}
sb.WriteString(" Received ")
sb.WriteString(string(rune(len(rawInput))))
sb.WriteString(formatInt(len(rawInput)))
sb.WriteString(" bytes. Please retry with smaller content chunks.")
return sb.String()

View File

@@ -6,14 +6,6 @@ const (
// Kiro API limit is 10240 bytes, leave room for "..."
KiroMaxToolDescLen = 10237
// ToolCompressionTargetSize is the target total size for compressed tools (20KB).
// If tools exceed this size, compression will be applied.
ToolCompressionTargetSize = 20 * 1024 // 20KB
// MinToolDescriptionLength is the minimum description length after compression.
// Descriptions will not be shortened below this length.
MinToolDescriptionLength = 50
// ThinkingStartTag is the start tag for thinking blocks in responses.
ThinkingStartTag = "<thinking>"

View File

@@ -22,9 +22,11 @@ var (
// ConvertOpenAIResponseToAnthropicParams holds parameters for response conversion
type ConvertOpenAIResponseToAnthropicParams struct {
MessageID string
Model string
CreatedAt int64
MessageID string
Model string
CreatedAt int64
ToolNameMap map[string]string
SawToolCall bool
// Content accumulator for streaming
ContentAccumulator strings.Builder
// Tool calls accumulator for streaming
@@ -78,6 +80,8 @@ func ConvertOpenAIResponseToClaude(_ context.Context, _ string, originalRequestR
MessageID: "",
Model: "",
CreatedAt: 0,
ToolNameMap: nil,
SawToolCall: false,
ContentAccumulator: strings.Builder{},
ToolCallsAccumulator: nil,
TextContentBlockStarted: false,
@@ -97,6 +101,10 @@ func ConvertOpenAIResponseToClaude(_ context.Context, _ string, originalRequestR
}
rawJSON = bytes.TrimSpace(rawJSON[5:])
if (*param).(*ConvertOpenAIResponseToAnthropicParams).ToolNameMap == nil {
(*param).(*ConvertOpenAIResponseToAnthropicParams).ToolNameMap = util.ToolNameMapFromClaudeRequest(originalRequestRawJSON)
}
// Check if this is the [DONE] marker
rawStr := strings.TrimSpace(string(rawJSON))
if rawStr == "[DONE]" {
@@ -111,6 +119,16 @@ func ConvertOpenAIResponseToClaude(_ context.Context, _ string, originalRequestR
}
}
func effectiveOpenAIFinishReason(param *ConvertOpenAIResponseToAnthropicParams) string {
if param == nil {
return ""
}
if param.SawToolCall {
return "tool_calls"
}
return param.FinishReason
}
// convertOpenAIStreamingChunkToAnthropic converts OpenAI streaming chunk to Anthropic streaming events
func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAIResponseToAnthropicParams) []string {
root := gjson.ParseBytes(rawJSON)
@@ -197,6 +215,7 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
}
toolCalls.ForEach(func(_, toolCall gjson.Result) bool {
param.SawToolCall = true
index := int(toolCall.Get("index").Int())
blockIndex := param.toolContentBlockIndex(index)
@@ -215,7 +234,7 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
// Handle function name
if function := toolCall.Get("function"); function.Exists() {
if name := function.Get("name"); name.Exists() {
accumulator.Name = name.String()
accumulator.Name = util.MapToolName(param.ToolNameMap, name.String())
stopThinkingContentBlock(param, &results)
@@ -246,7 +265,11 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
// Handle finish_reason (but don't send message_delta/message_stop yet)
if finishReason := root.Get("choices.0.finish_reason"); finishReason.Exists() && finishReason.String() != "" {
reason := finishReason.String()
param.FinishReason = reason
if param.SawToolCall {
param.FinishReason = "tool_calls"
} else {
param.FinishReason = reason
}
// Send content_block_stop for thinking content if needed
if param.ThinkingContentBlockStarted {
@@ -294,7 +317,7 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
inputTokens, outputTokens, cachedTokens = extractOpenAIUsage(usage)
// Send message_delta with usage
messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason))
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(effectiveOpenAIFinishReason(param)))
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.input_tokens", inputTokens)
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "usage.output_tokens", outputTokens)
if cachedTokens > 0 {
@@ -348,7 +371,7 @@ func convertOpenAIDoneToAnthropic(param *ConvertOpenAIResponseToAnthropicParams)
// If we haven't sent message_delta yet (no usage info was received), send it now
if param.FinishReason != "" && !param.MessageDeltaSent {
messageDeltaJSON := `{"type":"message_delta","delta":{"stop_reason":"","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(param.FinishReason))
messageDeltaJSON, _ = sjson.Set(messageDeltaJSON, "delta.stop_reason", mapOpenAIFinishReasonToAnthropic(effectiveOpenAIFinishReason(param)))
results = append(results, "event: message_delta\ndata: "+messageDeltaJSON+"\n\n")
param.MessageDeltaSent = true
}
@@ -531,10 +554,10 @@ func stopTextContentBlock(param *ConvertOpenAIResponseToAnthropicParams, results
// Returns:
// - string: An Anthropic-compatible JSON response.
func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) string {
_ = originalRequestRawJSON
_ = requestRawJSON
root := gjson.ParseBytes(rawJSON)
toolNameMap := util.ToolNameMapFromClaudeRequest(originalRequestRawJSON)
out := `{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}`
out, _ = sjson.Set(out, "id", root.Get("id").String())
out, _ = sjson.Set(out, "model", root.Get("model").String())
@@ -590,7 +613,7 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
hasToolCall = true
toolUse := `{"type":"tool_use","id":"","name":"","input":{}}`
toolUse, _ = sjson.Set(toolUse, "id", tc.Get("id").String())
toolUse, _ = sjson.Set(toolUse, "name", tc.Get("function.name").String())
toolUse, _ = sjson.Set(toolUse, "name", util.MapToolName(toolNameMap, tc.Get("function.name").String()))
argsStr := util.FixJSON(tc.Get("function.arguments").String())
if argsStr != "" && gjson.Valid(argsStr) {
@@ -647,7 +670,7 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
hasToolCall = true
toolUseBlock := `{"type":"tool_use","id":"","name":"","input":{}}`
toolUseBlock, _ = sjson.Set(toolUseBlock, "id", toolCall.Get("id").String())
toolUseBlock, _ = sjson.Set(toolUseBlock, "name", toolCall.Get("function.name").String())
toolUseBlock, _ = sjson.Set(toolUseBlock, "name", util.MapToolName(toolNameMap, toolCall.Get("function.name").String()))
argsStr := util.FixJSON(toolCall.Get("function.arguments").String())
if argsStr != "" && gjson.Valid(argsStr) {

View File

@@ -430,7 +430,7 @@ func removeUnsupportedKeywords(jsonStr string) string {
keywords := append(unsupportedConstraints,
"$schema", "$defs", "definitions", "const", "$ref", "$id", "additionalProperties",
"propertyNames", "patternProperties", // Gemini doesn't support these schema keywords
"enumTitles", "prefill", // Claude/OpenCode schema metadata fields unsupported by Gemini
"enumTitles", "prefill", "deprecated", // Schema metadata fields unsupported by Gemini
)
deletePaths := make([]string, 0)

View File

@@ -6,6 +6,7 @@ package util
import (
"bytes"
"fmt"
"strings"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
@@ -219,3 +220,54 @@ func FixJSON(input string) string {
return out.String()
}
func CanonicalToolName(name string) string {
canonical := strings.TrimSpace(name)
canonical = strings.TrimLeft(canonical, "_")
return strings.ToLower(canonical)
}
// ToolNameMapFromClaudeRequest returns a canonical-name -> original-name map extracted from a Claude request.
// It is used to restore exact tool name casing for clients that require strict tool name matching (e.g. Claude Code).
func ToolNameMapFromClaudeRequest(rawJSON []byte) map[string]string {
if len(rawJSON) == 0 || !gjson.ValidBytes(rawJSON) {
return nil
}
tools := gjson.GetBytes(rawJSON, "tools")
if !tools.Exists() || !tools.IsArray() {
return nil
}
toolResults := tools.Array()
out := make(map[string]string, len(toolResults))
tools.ForEach(func(_, tool gjson.Result) bool {
name := strings.TrimSpace(tool.Get("name").String())
if name == "" {
return true
}
key := CanonicalToolName(name)
if key == "" {
return true
}
if _, exists := out[key]; !exists {
out[key] = name
}
return true
})
if len(out) == 0 {
return nil
}
return out
}
func MapToolName(toolNameMap map[string]string, name string) string {
if name == "" || toolNameMap == nil {
return name
}
if mapped, ok := toolNameMap[CanonicalToolName(name)]; ok && mapped != "" {
return mapped
}
return name
}

View File

@@ -183,7 +183,7 @@ func (w *Watcher) addOrUpdateClient(path string) {
if w.reloadCallback != nil {
log.Debugf("triggering server update callback after add/update")
w.reloadCallback(cfg)
w.triggerServerUpdate(cfg)
}
w.persistAuthAsync(fmt.Sprintf("Sync auth %s", filepath.Base(path)), path)
}
@@ -202,7 +202,7 @@ func (w *Watcher) removeClient(path string) {
if w.reloadCallback != nil {
log.Debugf("triggering server update callback after removal")
w.reloadCallback(cfg)
w.triggerServerUpdate(cfg)
}
w.persistAuthAsync(fmt.Sprintf("Remove auth %s", filepath.Base(path)), path)
}
@@ -303,3 +303,79 @@ func (w *Watcher) persistAuthAsync(message string, paths ...string) {
}
}()
}
func (w *Watcher) stopServerUpdateTimer() {
w.serverUpdateMu.Lock()
defer w.serverUpdateMu.Unlock()
if w.serverUpdateTimer != nil {
w.serverUpdateTimer.Stop()
w.serverUpdateTimer = nil
}
w.serverUpdatePend = false
}
func (w *Watcher) triggerServerUpdate(cfg *config.Config) {
if w == nil || w.reloadCallback == nil || cfg == nil {
return
}
if w.stopped.Load() {
return
}
now := time.Now()
w.serverUpdateMu.Lock()
if w.serverUpdateLast.IsZero() || now.Sub(w.serverUpdateLast) >= serverUpdateDebounce {
w.serverUpdateLast = now
if w.serverUpdateTimer != nil {
w.serverUpdateTimer.Stop()
w.serverUpdateTimer = nil
}
w.serverUpdatePend = false
w.serverUpdateMu.Unlock()
w.reloadCallback(cfg)
return
}
if w.serverUpdatePend {
w.serverUpdateMu.Unlock()
return
}
delay := serverUpdateDebounce - now.Sub(w.serverUpdateLast)
if delay < 10*time.Millisecond {
delay = 10 * time.Millisecond
}
w.serverUpdatePend = true
if w.serverUpdateTimer != nil {
w.serverUpdateTimer.Stop()
w.serverUpdateTimer = nil
}
var timer *time.Timer
timer = time.AfterFunc(delay, func() {
if w.stopped.Load() {
return
}
w.clientsMutex.RLock()
latestCfg := w.config
w.clientsMutex.RUnlock()
w.serverUpdateMu.Lock()
if w.serverUpdateTimer != timer || !w.serverUpdatePend {
w.serverUpdateMu.Unlock()
return
}
w.serverUpdateTimer = nil
w.serverUpdatePend = false
if latestCfg == nil || w.reloadCallback == nil || w.stopped.Load() {
w.serverUpdateMu.Unlock()
return
}
w.serverUpdateLast = time.Now()
w.serverUpdateMu.Unlock()
w.reloadCallback(latestCfg)
})
w.serverUpdateTimer = timer
w.serverUpdateMu.Unlock()
}

View File

@@ -304,6 +304,11 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
if oldModels.hash != newModels.hash {
changes = append(changes, fmt.Sprintf("vertex[%d].models: updated (%d -> %d entries)", i, oldModels.count, newModels.count))
}
oldExcluded := SummarizeExcludedModels(o.ExcludedModels)
newExcluded := SummarizeExcludedModels(n.ExcludedModels)
if oldExcluded.hash != newExcluded.hash {
changes = append(changes, fmt.Sprintf("vertex[%d].excluded-models: updated (%d -> %d entries)", i, oldExcluded.count, newExcluded.count))
}
if !equalStringMap(o.Headers, n.Headers) {
changes = append(changes, fmt.Sprintf("vertex[%d].headers: updated", i))
}

View File

@@ -319,7 +319,7 @@ func (s *ConfigSynthesizer) synthesizeVertexCompat(ctx *SynthesisContext) []*cor
CreatedAt: now,
UpdatedAt: now,
}
ApplyAuthExcludedModelsMeta(a, cfg, nil, "apikey")
ApplyAuthExcludedModelsMeta(a, cfg, compat.ExcludedModels, "apikey")
out = append(out, a)
}
return out

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"time"
@@ -72,6 +73,10 @@ func (s *FileSynthesizer) Synthesize(ctx *SynthesisContext) ([]*coreauth.Auth, e
if rel, errRel := filepath.Rel(ctx.AuthDir, full); errRel == nil && rel != "" {
id = rel
}
// On Windows, normalize ID casing to avoid duplicate auth entries caused by case-insensitive paths.
if runtime.GOOS == "windows" {
id = strings.ToLower(id)
}
proxyURL := ""
if p, ok := metadata["proxy_url"].(string); ok {

View File

@@ -6,6 +6,7 @@ import (
"context"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/fsnotify/fsnotify"
@@ -35,6 +36,11 @@ type Watcher struct {
clientsMutex sync.RWMutex
configReloadMu sync.Mutex
configReloadTimer *time.Timer
serverUpdateMu sync.Mutex
serverUpdateTimer *time.Timer
serverUpdateLast time.Time
serverUpdatePend bool
stopped atomic.Bool
reloadCallback func(*config.Config)
watcher *fsnotify.Watcher
lastAuthHashes map[string]string
@@ -76,6 +82,7 @@ const (
replaceCheckDelay = 50 * time.Millisecond
configReloadDebounce = 150 * time.Millisecond
authRemoveDebounceWindow = 1 * time.Second
serverUpdateDebounce = 1 * time.Second
)
// NewWatcher creates a new file watcher instance
@@ -114,8 +121,10 @@ func (w *Watcher) Start(ctx context.Context) error {
// Stop stops the file watcher
func (w *Watcher) Stop() error {
w.stopped.Store(true)
w.stopDispatch()
w.stopConfigReloadTimer()
w.stopServerUpdateTimer()
return w.watcher.Close()
}

View File

@@ -441,6 +441,46 @@ func TestRemoveClientRemovesHash(t *testing.T) {
}
}
func TestTriggerServerUpdateCancelsPendingTimerOnImmediate(t *testing.T) {
tmpDir := t.TempDir()
cfg := &config.Config{AuthDir: tmpDir}
var reloads int32
w := &Watcher{
reloadCallback: func(*config.Config) {
atomic.AddInt32(&reloads, 1)
},
}
w.SetConfig(cfg)
w.serverUpdateMu.Lock()
w.serverUpdateLast = time.Now().Add(-(serverUpdateDebounce - 100*time.Millisecond))
w.serverUpdateMu.Unlock()
w.triggerServerUpdate(cfg)
if got := atomic.LoadInt32(&reloads); got != 0 {
t.Fatalf("expected no immediate reload, got %d", got)
}
w.serverUpdateMu.Lock()
if !w.serverUpdatePend || w.serverUpdateTimer == nil {
w.serverUpdateMu.Unlock()
t.Fatal("expected a pending server update timer")
}
w.serverUpdateLast = time.Now().Add(-(serverUpdateDebounce + 10*time.Millisecond))
w.serverUpdateMu.Unlock()
w.triggerServerUpdate(cfg)
if got := atomic.LoadInt32(&reloads); got != 1 {
t.Fatalf("expected immediate reload once, got %d", got)
}
time.Sleep(250 * time.Millisecond)
if got := atomic.LoadInt32(&reloads); got != 1 {
t.Fatalf("expected pending timer to be cancelled, got %d reloads", got)
}
}
func TestShouldDebounceRemove(t *testing.T) {
w := &Watcher{}
path := filepath.Clean("test.json")

View File

@@ -26,7 +26,6 @@ const (
wsRequestTypeAppend = "response.append"
wsEventTypeError = "error"
wsEventTypeCompleted = "response.completed"
wsEventTypeDone = "response.done"
wsDoneMarker = "[DONE]"
wsTurnStateHeader = "x-codex-turn-state"
wsRequestBodyKey = "REQUEST_BODY_OVERRIDE"
@@ -469,9 +468,6 @@ func (h *OpenAIResponsesAPIHandler) forwardResponsesWebsocket(
for i := range payloads {
eventType := gjson.GetBytes(payloads[i], "type").String()
if eventType == wsEventTypeCompleted {
// log.Infof("replace %s with %s", wsEventTypeCompleted, wsEventTypeDone)
payloads[i], _ = sjson.SetBytes(payloads[i], "type", wsEventTypeDone)
completed = true
completedOutput = responseCompletedOutputFromPayload(payloads[i])
}

View File

@@ -2,12 +2,15 @@ package openai
import (
"bytes"
"errors"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/gin-gonic/gin"
"github.com/gorilla/websocket"
"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
"github.com/tidwall/gjson"
)
@@ -247,3 +250,79 @@ func TestSetWebsocketRequestBody(t *testing.T) {
t.Fatalf("request body = %q, want %q", string(bodyBytes), "event body")
}
}
func TestForwardResponsesWebsocketPreservesCompletedEvent(t *testing.T) {
gin.SetMode(gin.TestMode)
serverErrCh := make(chan error, 1)
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
conn, err := responsesWebsocketUpgrader.Upgrade(w, r, nil)
if err != nil {
serverErrCh <- err
return
}
defer func() {
errClose := conn.Close()
if errClose != nil {
serverErrCh <- errClose
}
}()
ctx, _ := gin.CreateTestContext(httptest.NewRecorder())
ctx.Request = r
data := make(chan []byte, 1)
errCh := make(chan *interfaces.ErrorMessage)
data <- []byte("data: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp-1\",\"output\":[{\"type\":\"message\",\"id\":\"out-1\"}]}}\n\n")
close(data)
close(errCh)
var bodyLog strings.Builder
completedOutput, err := (*OpenAIResponsesAPIHandler)(nil).forwardResponsesWebsocket(
ctx,
conn,
func(...interface{}) {},
data,
errCh,
&bodyLog,
"session-1",
)
if err != nil {
serverErrCh <- err
return
}
if gjson.GetBytes(completedOutput, "0.id").String() != "out-1" {
serverErrCh <- errors.New("completed output not captured")
return
}
serverErrCh <- nil
}))
defer server.Close()
wsURL := "ws" + strings.TrimPrefix(server.URL, "http")
conn, _, err := websocket.DefaultDialer.Dial(wsURL, nil)
if err != nil {
t.Fatalf("dial websocket: %v", err)
}
defer func() {
errClose := conn.Close()
if errClose != nil {
t.Fatalf("close websocket: %v", errClose)
}
}()
_, payload, errReadMessage := conn.ReadMessage()
if errReadMessage != nil {
t.Fatalf("read websocket message: %v", errReadMessage)
}
if gjson.GetBytes(payload, "type").String() != wsEventTypeCompleted {
t.Fatalf("payload type = %s, want %s", gjson.GetBytes(payload, "type").String(), wsEventTypeCompleted)
}
if strings.Contains(string(payload), "response.done") {
t.Fatalf("payload unexpectedly rewrote completed event: %s", payload)
}
if errServer := <-serverErrCh; errServer != nil {
t.Fatalf("server error: %v", errServer)
}
}

View File

@@ -10,6 +10,7 @@ import (
"net/url"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"time"
@@ -266,14 +267,17 @@ func (s *FileTokenStore) readAuthFile(path, baseDir string) (*cliproxyauth.Auth,
}
func (s *FileTokenStore) idFor(path, baseDir string) string {
if baseDir == "" {
return path
id := path
if baseDir != "" {
if rel, errRel := filepath.Rel(baseDir, path); errRel == nil && rel != "" {
id = rel
}
}
rel, err := filepath.Rel(baseDir, path)
if err != nil {
return path
// On Windows, normalize ID casing to avoid duplicate auth entries caused by case-insensitive paths.
if runtime.GOOS == "windows" {
id = strings.ToLower(id)
}
return rel
return id
}
func (s *FileTokenStore) resolveAuthPath(auth *cliproxyauth.Auth) (string, error) {

View File

@@ -463,9 +463,14 @@ func (m *Manager) Update(ctx context.Context, auth *Auth) (*Auth, error) {
return nil, nil
}
m.mu.Lock()
if existing, ok := m.auths[auth.ID]; ok && existing != nil && !auth.indexAssigned && auth.Index == "" {
auth.Index = existing.Index
auth.indexAssigned = existing.indexAssigned
if existing, ok := m.auths[auth.ID]; ok && existing != nil {
if !auth.indexAssigned && auth.Index == "" {
auth.Index = existing.Index
auth.indexAssigned = existing.indexAssigned
}
if len(auth.ModelStates) == 0 && len(existing.ModelStates) > 0 {
auth.ModelStates = existing.ModelStates
}
}
auth.EnsureIndex()
m.auths[auth.ID] = auth.Clone()

View File

@@ -0,0 +1,49 @@
package auth
import (
"context"
"testing"
)
func TestManager_Update_PreservesModelStates(t *testing.T) {
m := NewManager(nil, nil, nil)
model := "test-model"
backoffLevel := 7
if _, errRegister := m.Register(context.Background(), &Auth{
ID: "auth-1",
Provider: "claude",
Metadata: map[string]any{"k": "v"},
ModelStates: map[string]*ModelState{
model: {
Quota: QuotaState{BackoffLevel: backoffLevel},
},
},
}); errRegister != nil {
t.Fatalf("register auth: %v", errRegister)
}
if _, errUpdate := m.Update(context.Background(), &Auth{
ID: "auth-1",
Provider: "claude",
Metadata: map[string]any{"k": "v2"},
}); errUpdate != nil {
t.Fatalf("update auth: %v", errUpdate)
}
updated, ok := m.GetByID("auth-1")
if !ok || updated == nil {
t.Fatalf("expected auth to be present")
}
if len(updated.ModelStates) == 0 {
t.Fatalf("expected ModelStates to be preserved")
}
state := updated.ModelStates[model]
if state == nil {
t.Fatalf("expected model state to be present")
}
if state.Quota.BackoffLevel != backoffLevel {
t.Fatalf("expected BackoffLevel to be %d, got %d", backoffLevel, state.Quota.BackoffLevel)
}
}

View File

@@ -301,6 +301,9 @@ func (s *Service) applyCoreAuthAddOrUpdate(ctx context.Context, auth *coreauth.A
auth.CreatedAt = existing.CreatedAt
auth.LastRefreshedAt = existing.LastRefreshedAt
auth.NextRefreshAfter = existing.NextRefreshAfter
if len(auth.ModelStates) == 0 && len(existing.ModelStates) > 0 {
auth.ModelStates = existing.ModelStates
}
op = "update"
_, err = s.coreManager.Update(ctx, auth)
} else {
@@ -817,10 +820,13 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
case "vertex":
// Vertex AI Gemini supports the same model identifiers as Gemini.
models = registry.GetGeminiVertexModels()
if authKind == "apikey" {
if entry := s.resolveConfigVertexCompatKey(a); entry != nil && len(entry.Models) > 0 {
if entry := s.resolveConfigVertexCompatKey(a); entry != nil {
if len(entry.Models) > 0 {
models = buildVertexCompatConfigModels(entry)
}
if authKind == "apikey" {
excluded = entry.ExcludedModels
}
}
models = applyExcludedModels(models, excluded)
case "gemini-cli":
@@ -866,7 +872,9 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
models = registry.GetKimiModels()
models = applyExcludedModels(models, excluded)
case "github-copilot":
models = registry.GetGitHubCopilotModels()
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
models = executor.FetchGitHubCopilotModels(ctx, a, s.cfg)
models = applyExcludedModels(models, excluded)
case "kiro":
models = s.fetchKiroModels(a)