mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-03-08 06:43:41 +00:00
Merge branch 'router-for-me:main' into main
This commit is contained in:
@@ -31,6 +31,7 @@ bin/*
|
||||
.agent/*
|
||||
.agents/*
|
||||
.opencode/*
|
||||
.idea/*
|
||||
.bmad/*
|
||||
_bmad/*
|
||||
_bmad-output/*
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -44,6 +44,7 @@ GEMINI.md
|
||||
.agents/*
|
||||
.agents/*
|
||||
.opencode/*
|
||||
.idea/*
|
||||
.bmad/*
|
||||
_bmad/*
|
||||
_bmad-output/*
|
||||
|
||||
@@ -959,18 +959,17 @@ type AntigravityModelConfig struct {
|
||||
// Keys use upstream model names returned by the Antigravity models endpoint.
|
||||
func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
|
||||
return map[string]*AntigravityModelConfig{
|
||||
// "rev19-uic3-1p": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
|
||||
"gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
|
||||
"gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
|
||||
"gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
|
||||
"gemini-3-pro-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
|
||||
"gemini-3-pro-low": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
|
||||
"gemini-3.1-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
|
||||
"gemini-3.1-pro-low": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}},
|
||||
"gemini-3.1-flash-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}},
|
||||
"gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}},
|
||||
"claude-opus-4-6-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}},
|
||||
"claude-sonnet-4-6": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}},
|
||||
"claude-opus-4-6-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
"claude-sonnet-4-6": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000},
|
||||
"gpt-oss-120b-medium": {},
|
||||
"tab_flash_lite_preview": {},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1152,7 +1152,7 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
|
||||
continue
|
||||
}
|
||||
switch modelID {
|
||||
case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro":
|
||||
case "chat_20706", "chat_23310", "tab_flash_lite_preview", "tab_jump_flash_lite_preview", "gemini-2.5-flash-thinking", "gemini-2.5-pro":
|
||||
continue
|
||||
}
|
||||
modelCfg := modelConfig[modelID]
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -135,6 +136,15 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
body = ensureCacheControl(body)
|
||||
}
|
||||
|
||||
// Enforce Anthropic's cache_control block limit (max 4 breakpoints per request).
|
||||
// Cloaking and ensureCacheControl may push the total over 4 when the client
|
||||
// (e.g. Amp CLI) already sends multiple cache_control blocks.
|
||||
body = enforceCacheControlLimit(body, 4)
|
||||
|
||||
// Normalize TTL values to prevent ordering violations under prompt-caching-scope-2026-01-05.
|
||||
// A 1h-TTL block must not appear after a 5m-TTL block in evaluation order (tools→system→messages).
|
||||
body = normalizeCacheControlTTL(body)
|
||||
|
||||
// Extract betas from body and convert to header
|
||||
var extraBetas []string
|
||||
extraBetas, body = extractAndRemoveBetas(body)
|
||||
@@ -176,11 +186,29 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
||||
}
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(httpResp.Body)
|
||||
// Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API).
|
||||
errBody := httpResp.Body
|
||||
if ce := httpResp.Header.Get("Content-Encoding"); ce != "" {
|
||||
var decErr error
|
||||
errBody, decErr = decodeResponseBody(httpResp.Body, ce)
|
||||
if decErr != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, decErr)
|
||||
msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr)
|
||||
logWithRequestID(ctx).Warn(msg)
|
||||
return resp, statusErr{code: httpResp.StatusCode, msg: msg}
|
||||
}
|
||||
}
|
||||
b, readErr := io.ReadAll(errBody)
|
||||
if readErr != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, readErr)
|
||||
msg := fmt.Sprintf("failed to read error response body: %v", readErr)
|
||||
logWithRequestID(ctx).Warn(msg)
|
||||
b = []byte(msg)
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||
err = statusErr{code: httpResp.StatusCode, msg: string(b)}
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
if errClose := errBody.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
}
|
||||
return resp, err
|
||||
@@ -276,6 +304,12 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
body = ensureCacheControl(body)
|
||||
}
|
||||
|
||||
// Enforce Anthropic's cache_control block limit (max 4 breakpoints per request).
|
||||
body = enforceCacheControlLimit(body, 4)
|
||||
|
||||
// Normalize TTL values to prevent ordering violations under prompt-caching-scope-2026-01-05.
|
||||
body = normalizeCacheControlTTL(body)
|
||||
|
||||
// Extract betas from body and convert to header
|
||||
var extraBetas []string
|
||||
extraBetas, body = extractAndRemoveBetas(body)
|
||||
@@ -317,10 +351,28 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
||||
}
|
||||
recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
|
||||
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(httpResp.Body)
|
||||
// Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API).
|
||||
errBody := httpResp.Body
|
||||
if ce := httpResp.Header.Get("Content-Encoding"); ce != "" {
|
||||
var decErr error
|
||||
errBody, decErr = decodeResponseBody(httpResp.Body, ce)
|
||||
if decErr != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, decErr)
|
||||
msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr)
|
||||
logWithRequestID(ctx).Warn(msg)
|
||||
return nil, statusErr{code: httpResp.StatusCode, msg: msg}
|
||||
}
|
||||
}
|
||||
b, readErr := io.ReadAll(errBody)
|
||||
if readErr != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, readErr)
|
||||
msg := fmt.Sprintf("failed to read error response body: %v", readErr)
|
||||
logWithRequestID(ctx).Warn(msg)
|
||||
b = []byte(msg)
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
logWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||
if errClose := httpResp.Body.Close(); errClose != nil {
|
||||
if errClose := errBody.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
}
|
||||
err = statusErr{code: httpResp.StatusCode, msg: string(b)}
|
||||
@@ -425,6 +477,10 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
||||
body = checkSystemInstructions(body)
|
||||
}
|
||||
|
||||
// Keep count_tokens requests compatible with Anthropic cache-control constraints too.
|
||||
body = enforceCacheControlLimit(body, 4)
|
||||
body = normalizeCacheControlTTL(body)
|
||||
|
||||
// Extract betas from body and convert to header (for count_tokens too)
|
||||
var extraBetas []string
|
||||
extraBetas, body = extractAndRemoveBetas(body)
|
||||
@@ -464,9 +520,27 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
||||
}
|
||||
recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone())
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
b, _ := io.ReadAll(resp.Body)
|
||||
// Decompress error responses (e.g. gzip-compressed 400 errors from Anthropic API).
|
||||
errBody := resp.Body
|
||||
if ce := resp.Header.Get("Content-Encoding"); ce != "" {
|
||||
var decErr error
|
||||
errBody, decErr = decodeResponseBody(resp.Body, ce)
|
||||
if decErr != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, decErr)
|
||||
msg := fmt.Sprintf("failed to decode error response body (encoding=%s): %v", ce, decErr)
|
||||
logWithRequestID(ctx).Warn(msg)
|
||||
return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: msg}
|
||||
}
|
||||
}
|
||||
b, readErr := io.ReadAll(errBody)
|
||||
if readErr != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, readErr)
|
||||
msg := fmt.Sprintf("failed to read error response body: %v", readErr)
|
||||
logWithRequestID(ctx).Warn(msg)
|
||||
b = []byte(msg)
|
||||
}
|
||||
appendAPIResponseChunk(ctx, e.cfg, b)
|
||||
if errClose := resp.Body.Close(); errClose != nil {
|
||||
if errClose := errBody.Close(); errClose != nil {
|
||||
log.Errorf("response body close error: %v", errClose)
|
||||
}
|
||||
return cliproxyexecutor.Response{}, statusErr{code: resp.StatusCode, msg: string(b)}
|
||||
@@ -1073,17 +1147,22 @@ func generateBillingHeader(payload []byte) string {
|
||||
return fmt.Sprintf("x-anthropic-billing-header: cc_version=2.1.63.%s; cc_entrypoint=cli; cch=%s;", buildHash, cch)
|
||||
}
|
||||
|
||||
// checkSystemInstructionsWithMode injects Claude Code system prompt to match
|
||||
// the real Claude Code request format:
|
||||
// system[0]: billing header (no cache_control)
|
||||
// system[1]: "You are a Claude agent, built on Anthropic's Claude Agent SDK." (with cache_control)
|
||||
// system[2..]: user's system messages (with cache_control on last)
|
||||
// checkSystemInstructionsWithMode injects Claude Code-style system blocks:
|
||||
//
|
||||
// system[0]: billing header (no cache_control)
|
||||
// system[1]: agent identifier (no cache_control)
|
||||
// system[2..]: user system messages (cache_control added when missing)
|
||||
func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte {
|
||||
system := gjson.GetBytes(payload, "system")
|
||||
|
||||
billingText := generateBillingHeader(payload)
|
||||
billingBlock := fmt.Sprintf(`{"type":"text","text":"%s"}`, billingText)
|
||||
agentBlock := `{"type":"text","text":"You are a Claude agent, built on Anthropic's Claude Agent SDK.","cache_control":{"type":"ephemeral","ttl":"1h"}}`
|
||||
// No cache_control on the agent block. It is a cloaking artifact with zero cache
|
||||
// value (the last system block is what actually triggers caching of all system content).
|
||||
// Including any cache_control here creates an intra-system TTL ordering violation
|
||||
// when the client's system blocks use ttl='1h' (prompt-caching-scope-2026-01-05 beta
|
||||
// forbids 1h blocks after 5m blocks, and a no-TTL block defaults to 5m).
|
||||
agentBlock := `{"type":"text","text":"You are a Claude agent, built on Anthropic's Claude Agent SDK."}`
|
||||
|
||||
if strictMode {
|
||||
// Strict mode: billing header + agent identifier only
|
||||
@@ -1103,11 +1182,12 @@ func checkSystemInstructionsWithMode(payload []byte, strictMode bool) []byte {
|
||||
if system.IsArray() {
|
||||
system.ForEach(func(_, part gjson.Result) bool {
|
||||
if part.Get("type").String() == "text" {
|
||||
// Add cache_control with ttl to user system messages if not present
|
||||
// Add cache_control to user system messages if not present.
|
||||
// Do NOT add ttl — let it inherit the default (5m) to avoid
|
||||
// TTL ordering violations with the prompt-caching-scope-2026-01-05 beta.
|
||||
partJSON := part.Raw
|
||||
if !part.Get("cache_control").Exists() {
|
||||
partJSON, _ = sjson.Set(partJSON, "cache_control.type", "ephemeral")
|
||||
partJSON, _ = sjson.Set(partJSON, "cache_control.ttl", "1h")
|
||||
}
|
||||
result += "," + partJSON
|
||||
}
|
||||
@@ -1254,6 +1334,313 @@ func countCacheControls(payload []byte) int {
|
||||
return count
|
||||
}
|
||||
|
||||
func parsePayloadObject(payload []byte) (map[string]any, bool) {
|
||||
if len(payload) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
var root map[string]any
|
||||
if err := json.Unmarshal(payload, &root); err != nil {
|
||||
return nil, false
|
||||
}
|
||||
return root, true
|
||||
}
|
||||
|
||||
func marshalPayloadObject(original []byte, root map[string]any) []byte {
|
||||
if root == nil {
|
||||
return original
|
||||
}
|
||||
out, err := json.Marshal(root)
|
||||
if err != nil {
|
||||
return original
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func asObject(v any) (map[string]any, bool) {
|
||||
obj, ok := v.(map[string]any)
|
||||
return obj, ok
|
||||
}
|
||||
|
||||
func asArray(v any) ([]any, bool) {
|
||||
arr, ok := v.([]any)
|
||||
return arr, ok
|
||||
}
|
||||
|
||||
func countCacheControlsMap(root map[string]any) int {
|
||||
count := 0
|
||||
|
||||
if system, ok := asArray(root["system"]); ok {
|
||||
for _, item := range system {
|
||||
if obj, ok := asObject(item); ok {
|
||||
if _, exists := obj["cache_control"]; exists {
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if tools, ok := asArray(root["tools"]); ok {
|
||||
for _, item := range tools {
|
||||
if obj, ok := asObject(item); ok {
|
||||
if _, exists := obj["cache_control"]; exists {
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if messages, ok := asArray(root["messages"]); ok {
|
||||
for _, msg := range messages {
|
||||
msgObj, ok := asObject(msg)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
content, ok := asArray(msgObj["content"])
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, item := range content {
|
||||
if obj, ok := asObject(item); ok {
|
||||
if _, exists := obj["cache_control"]; exists {
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return count
|
||||
}
|
||||
|
||||
func normalizeTTLForBlock(obj map[string]any, seen5m *bool) {
|
||||
ccRaw, exists := obj["cache_control"]
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
cc, ok := asObject(ccRaw)
|
||||
if !ok {
|
||||
*seen5m = true
|
||||
return
|
||||
}
|
||||
ttlRaw, ttlExists := cc["ttl"]
|
||||
ttl, ttlIsString := ttlRaw.(string)
|
||||
if !ttlExists || !ttlIsString || ttl != "1h" {
|
||||
*seen5m = true
|
||||
return
|
||||
}
|
||||
if *seen5m {
|
||||
delete(cc, "ttl")
|
||||
}
|
||||
}
|
||||
|
||||
func findLastCacheControlIndex(arr []any) int {
|
||||
last := -1
|
||||
for idx, item := range arr {
|
||||
obj, ok := asObject(item)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if _, exists := obj["cache_control"]; exists {
|
||||
last = idx
|
||||
}
|
||||
}
|
||||
return last
|
||||
}
|
||||
|
||||
func stripCacheControlExceptIndex(arr []any, preserveIdx int, excess *int) {
|
||||
for idx, item := range arr {
|
||||
if *excess <= 0 {
|
||||
return
|
||||
}
|
||||
obj, ok := asObject(item)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if _, exists := obj["cache_control"]; exists && idx != preserveIdx {
|
||||
delete(obj, "cache_control")
|
||||
*excess--
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func stripAllCacheControl(arr []any, excess *int) {
|
||||
for _, item := range arr {
|
||||
if *excess <= 0 {
|
||||
return
|
||||
}
|
||||
obj, ok := asObject(item)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if _, exists := obj["cache_control"]; exists {
|
||||
delete(obj, "cache_control")
|
||||
*excess--
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func stripMessageCacheControl(messages []any, excess *int) {
|
||||
for _, msg := range messages {
|
||||
if *excess <= 0 {
|
||||
return
|
||||
}
|
||||
msgObj, ok := asObject(msg)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
content, ok := asArray(msgObj["content"])
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, item := range content {
|
||||
if *excess <= 0 {
|
||||
return
|
||||
}
|
||||
obj, ok := asObject(item)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if _, exists := obj["cache_control"]; exists {
|
||||
delete(obj, "cache_control")
|
||||
*excess--
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// normalizeCacheControlTTL ensures cache_control TTL values don't violate the
|
||||
// prompt-caching-scope-2026-01-05 ordering constraint: a 1h-TTL block must not
|
||||
// appear after a 5m-TTL block anywhere in the evaluation order.
|
||||
//
|
||||
// Anthropic evaluates blocks in order: tools → system (index 0..N) → messages.
|
||||
// Within each section, blocks are evaluated in array order. A 5m (default) block
|
||||
// followed by a 1h block at ANY later position is an error — including within
|
||||
// the same section (e.g. system[1]=5m then system[3]=1h).
|
||||
//
|
||||
// Strategy: walk all cache_control blocks in evaluation order. Once a 5m block
|
||||
// is seen, strip ttl from ALL subsequent 1h blocks (downgrading them to 5m).
|
||||
func normalizeCacheControlTTL(payload []byte) []byte {
|
||||
root, ok := parsePayloadObject(payload)
|
||||
if !ok {
|
||||
return payload
|
||||
}
|
||||
|
||||
seen5m := false
|
||||
|
||||
if tools, ok := asArray(root["tools"]); ok {
|
||||
for _, tool := range tools {
|
||||
if obj, ok := asObject(tool); ok {
|
||||
normalizeTTLForBlock(obj, &seen5m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if system, ok := asArray(root["system"]); ok {
|
||||
for _, item := range system {
|
||||
if obj, ok := asObject(item); ok {
|
||||
normalizeTTLForBlock(obj, &seen5m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if messages, ok := asArray(root["messages"]); ok {
|
||||
for _, msg := range messages {
|
||||
msgObj, ok := asObject(msg)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
content, ok := asArray(msgObj["content"])
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, item := range content {
|
||||
if obj, ok := asObject(item); ok {
|
||||
normalizeTTLForBlock(obj, &seen5m)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return marshalPayloadObject(payload, root)
|
||||
}
|
||||
|
||||
// enforceCacheControlLimit removes excess cache_control blocks from a payload
|
||||
// so the total does not exceed the Anthropic API limit (currently 4).
|
||||
//
|
||||
// Anthropic evaluates cache breakpoints in order: tools → system → messages.
|
||||
// The most valuable breakpoints are:
|
||||
// 1. Last tool — caches ALL tool definitions
|
||||
// 2. Last system block — caches ALL system content
|
||||
// 3. Recent messages — cache conversation context
|
||||
//
|
||||
// Removal priority (strip lowest-value first):
|
||||
//
|
||||
// Phase 1: system blocks earliest-first, preserving the last one.
|
||||
// Phase 2: tool blocks earliest-first, preserving the last one.
|
||||
// Phase 3: message content blocks earliest-first.
|
||||
// Phase 4: remaining system blocks (last system).
|
||||
// Phase 5: remaining tool blocks (last tool).
|
||||
func enforceCacheControlLimit(payload []byte, maxBlocks int) []byte {
|
||||
root, ok := parsePayloadObject(payload)
|
||||
if !ok {
|
||||
return payload
|
||||
}
|
||||
|
||||
total := countCacheControlsMap(root)
|
||||
if total <= maxBlocks {
|
||||
return payload
|
||||
}
|
||||
|
||||
excess := total - maxBlocks
|
||||
|
||||
var system []any
|
||||
if arr, ok := asArray(root["system"]); ok {
|
||||
system = arr
|
||||
}
|
||||
var tools []any
|
||||
if arr, ok := asArray(root["tools"]); ok {
|
||||
tools = arr
|
||||
}
|
||||
var messages []any
|
||||
if arr, ok := asArray(root["messages"]); ok {
|
||||
messages = arr
|
||||
}
|
||||
|
||||
if len(system) > 0 {
|
||||
stripCacheControlExceptIndex(system, findLastCacheControlIndex(system), &excess)
|
||||
}
|
||||
if excess <= 0 {
|
||||
return marshalPayloadObject(payload, root)
|
||||
}
|
||||
|
||||
if len(tools) > 0 {
|
||||
stripCacheControlExceptIndex(tools, findLastCacheControlIndex(tools), &excess)
|
||||
}
|
||||
if excess <= 0 {
|
||||
return marshalPayloadObject(payload, root)
|
||||
}
|
||||
|
||||
if len(messages) > 0 {
|
||||
stripMessageCacheControl(messages, &excess)
|
||||
}
|
||||
if excess <= 0 {
|
||||
return marshalPayloadObject(payload, root)
|
||||
}
|
||||
|
||||
if len(system) > 0 {
|
||||
stripAllCacheControl(system, &excess)
|
||||
}
|
||||
if excess <= 0 {
|
||||
return marshalPayloadObject(payload, root)
|
||||
}
|
||||
|
||||
if len(tools) > 0 {
|
||||
stripAllCacheControl(tools, &excess)
|
||||
}
|
||||
|
||||
return marshalPayloadObject(payload, root)
|
||||
}
|
||||
|
||||
// injectMessagesCacheControl adds cache_control to the second-to-last user turn for multi-turn caching.
|
||||
// Per Anthropic docs: "Place cache_control on the second-to-last User message to let the model reuse the earlier cache."
|
||||
// This enables caching of conversation history, which is especially beneficial for long multi-turn conversations.
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||
@@ -348,3 +349,237 @@ func TestApplyClaudeToolPrefix_SkipsBuiltinToolReference(t *testing.T) {
|
||||
t.Fatalf("built-in tool_reference should not be prefixed, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeCacheControlTTL_DowngradesLaterOneHourBlocks(t *testing.T) {
|
||||
payload := []byte(`{
|
||||
"tools": [{"name":"t1","cache_control":{"type":"ephemeral","ttl":"1h"}}],
|
||||
"system": [{"type":"text","text":"s1","cache_control":{"type":"ephemeral"}}],
|
||||
"messages": [{"role":"user","content":[{"type":"text","text":"u1","cache_control":{"type":"ephemeral","ttl":"1h"}}]}]
|
||||
}`)
|
||||
|
||||
out := normalizeCacheControlTTL(payload)
|
||||
|
||||
if got := gjson.GetBytes(out, "tools.0.cache_control.ttl").String(); got != "1h" {
|
||||
t.Fatalf("tools.0.cache_control.ttl = %q, want %q", got, "1h")
|
||||
}
|
||||
if gjson.GetBytes(out, "messages.0.content.0.cache_control.ttl").Exists() {
|
||||
t.Fatalf("messages.0.content.0.cache_control.ttl should be removed after a default-5m block")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnforceCacheControlLimit_StripsNonLastToolBeforeMessages(t *testing.T) {
|
||||
payload := []byte(`{
|
||||
"tools": [
|
||||
{"name":"t1","cache_control":{"type":"ephemeral"}},
|
||||
{"name":"t2","cache_control":{"type":"ephemeral"}}
|
||||
],
|
||||
"system": [{"type":"text","text":"s1","cache_control":{"type":"ephemeral"}}],
|
||||
"messages": [
|
||||
{"role":"user","content":[{"type":"text","text":"u1","cache_control":{"type":"ephemeral"}}]},
|
||||
{"role":"user","content":[{"type":"text","text":"u2","cache_control":{"type":"ephemeral"}}]}
|
||||
]
|
||||
}`)
|
||||
|
||||
out := enforceCacheControlLimit(payload, 4)
|
||||
|
||||
if got := countCacheControls(out); got != 4 {
|
||||
t.Fatalf("cache_control count = %d, want 4", got)
|
||||
}
|
||||
if gjson.GetBytes(out, "tools.0.cache_control").Exists() {
|
||||
t.Fatalf("tools.0.cache_control should be removed first (non-last tool)")
|
||||
}
|
||||
if !gjson.GetBytes(out, "tools.1.cache_control").Exists() {
|
||||
t.Fatalf("tools.1.cache_control (last tool) should be preserved")
|
||||
}
|
||||
if !gjson.GetBytes(out, "messages.0.content.0.cache_control").Exists() || !gjson.GetBytes(out, "messages.1.content.0.cache_control").Exists() {
|
||||
t.Fatalf("message cache_control blocks should be preserved when non-last tool removal is enough")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnforceCacheControlLimit_ToolOnlyPayloadStillRespectsLimit(t *testing.T) {
|
||||
payload := []byte(`{
|
||||
"tools": [
|
||||
{"name":"t1","cache_control":{"type":"ephemeral"}},
|
||||
{"name":"t2","cache_control":{"type":"ephemeral"}},
|
||||
{"name":"t3","cache_control":{"type":"ephemeral"}},
|
||||
{"name":"t4","cache_control":{"type":"ephemeral"}},
|
||||
{"name":"t5","cache_control":{"type":"ephemeral"}}
|
||||
]
|
||||
}`)
|
||||
|
||||
out := enforceCacheControlLimit(payload, 4)
|
||||
|
||||
if got := countCacheControls(out); got != 4 {
|
||||
t.Fatalf("cache_control count = %d, want 4", got)
|
||||
}
|
||||
if gjson.GetBytes(out, "tools.0.cache_control").Exists() {
|
||||
t.Fatalf("tools.0.cache_control should be removed to satisfy max=4")
|
||||
}
|
||||
if !gjson.GetBytes(out, "tools.4.cache_control").Exists() {
|
||||
t.Fatalf("last tool cache_control should be preserved when possible")
|
||||
}
|
||||
}
|
||||
|
||||
func TestClaudeExecutor_CountTokens_AppliesCacheControlGuards(t *testing.T) {
|
||||
var seenBody []byte
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
body, _ := io.ReadAll(r.Body)
|
||||
seenBody = bytes.Clone(body)
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"input_tokens":42}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
executor := NewClaudeExecutor(&config.Config{})
|
||||
auth := &cliproxyauth.Auth{Attributes: map[string]string{
|
||||
"api_key": "key-123",
|
||||
"base_url": server.URL,
|
||||
}}
|
||||
|
||||
payload := []byte(`{
|
||||
"tools": [
|
||||
{"name":"t1","cache_control":{"type":"ephemeral","ttl":"1h"}},
|
||||
{"name":"t2","cache_control":{"type":"ephemeral"}}
|
||||
],
|
||||
"system": [
|
||||
{"type":"text","text":"s1","cache_control":{"type":"ephemeral","ttl":"1h"}},
|
||||
{"type":"text","text":"s2","cache_control":{"type":"ephemeral","ttl":"1h"}}
|
||||
],
|
||||
"messages": [
|
||||
{"role":"user","content":[{"type":"text","text":"u1","cache_control":{"type":"ephemeral","ttl":"1h"}}]},
|
||||
{"role":"user","content":[{"type":"text","text":"u2","cache_control":{"type":"ephemeral","ttl":"1h"}}]}
|
||||
]
|
||||
}`)
|
||||
|
||||
_, err := executor.CountTokens(context.Background(), auth, cliproxyexecutor.Request{
|
||||
Model: "claude-3-5-haiku-20241022",
|
||||
Payload: payload,
|
||||
}, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")})
|
||||
if err != nil {
|
||||
t.Fatalf("CountTokens error: %v", err)
|
||||
}
|
||||
|
||||
if len(seenBody) == 0 {
|
||||
t.Fatal("expected count_tokens request body to be captured")
|
||||
}
|
||||
if got := countCacheControls(seenBody); got > 4 {
|
||||
t.Fatalf("count_tokens body has %d cache_control blocks, want <= 4", got)
|
||||
}
|
||||
if hasTTLOrderingViolation(seenBody) {
|
||||
t.Fatalf("count_tokens body still has ttl ordering violations: %s", string(seenBody))
|
||||
}
|
||||
}
|
||||
|
||||
func hasTTLOrderingViolation(payload []byte) bool {
|
||||
seen5m := false
|
||||
violates := false
|
||||
|
||||
checkCC := func(cc gjson.Result) {
|
||||
if !cc.Exists() || violates {
|
||||
return
|
||||
}
|
||||
ttl := cc.Get("ttl").String()
|
||||
if ttl != "1h" {
|
||||
seen5m = true
|
||||
return
|
||||
}
|
||||
if seen5m {
|
||||
violates = true
|
||||
}
|
||||
}
|
||||
|
||||
tools := gjson.GetBytes(payload, "tools")
|
||||
if tools.IsArray() {
|
||||
tools.ForEach(func(_, tool gjson.Result) bool {
|
||||
checkCC(tool.Get("cache_control"))
|
||||
return !violates
|
||||
})
|
||||
}
|
||||
|
||||
system := gjson.GetBytes(payload, "system")
|
||||
if system.IsArray() {
|
||||
system.ForEach(func(_, item gjson.Result) bool {
|
||||
checkCC(item.Get("cache_control"))
|
||||
return !violates
|
||||
})
|
||||
}
|
||||
|
||||
messages := gjson.GetBytes(payload, "messages")
|
||||
if messages.IsArray() {
|
||||
messages.ForEach(func(_, msg gjson.Result) bool {
|
||||
content := msg.Get("content")
|
||||
if content.IsArray() {
|
||||
content.ForEach(func(_, item gjson.Result) bool {
|
||||
checkCC(item.Get("cache_control"))
|
||||
return !violates
|
||||
})
|
||||
}
|
||||
return !violates
|
||||
})
|
||||
}
|
||||
|
||||
return violates
|
||||
}
|
||||
|
||||
func TestClaudeExecutor_Execute_InvalidGzipErrorBodyReturnsDecodeMessage(t *testing.T) {
|
||||
testClaudeExecutorInvalidCompressedErrorBody(t, func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error {
|
||||
_, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{
|
||||
Model: "claude-3-5-sonnet-20241022",
|
||||
Payload: payload,
|
||||
}, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")})
|
||||
return err
|
||||
})
|
||||
}
|
||||
|
||||
func TestClaudeExecutor_ExecuteStream_InvalidGzipErrorBodyReturnsDecodeMessage(t *testing.T) {
|
||||
testClaudeExecutorInvalidCompressedErrorBody(t, func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error {
|
||||
_, err := executor.ExecuteStream(context.Background(), auth, cliproxyexecutor.Request{
|
||||
Model: "claude-3-5-sonnet-20241022",
|
||||
Payload: payload,
|
||||
}, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")})
|
||||
return err
|
||||
})
|
||||
}
|
||||
|
||||
func TestClaudeExecutor_CountTokens_InvalidGzipErrorBodyReturnsDecodeMessage(t *testing.T) {
|
||||
testClaudeExecutorInvalidCompressedErrorBody(t, func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error {
|
||||
_, err := executor.CountTokens(context.Background(), auth, cliproxyexecutor.Request{
|
||||
Model: "claude-3-5-sonnet-20241022",
|
||||
Payload: payload,
|
||||
}, cliproxyexecutor.Options{SourceFormat: sdktranslator.FromString("claude")})
|
||||
return err
|
||||
})
|
||||
}
|
||||
|
||||
func testClaudeExecutorInvalidCompressedErrorBody(
|
||||
t *testing.T,
|
||||
invoke func(executor *ClaudeExecutor, auth *cliproxyauth.Auth, payload []byte) error,
|
||||
) {
|
||||
t.Helper()
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Header().Set("Content-Encoding", "gzip")
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
_, _ = w.Write([]byte("not-a-valid-gzip-stream"))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
executor := NewClaudeExecutor(&config.Config{})
|
||||
auth := &cliproxyauth.Auth{Attributes: map[string]string{
|
||||
"api_key": "key-123",
|
||||
"base_url": server.URL,
|
||||
}}
|
||||
payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`)
|
||||
|
||||
err := invoke(executor, auth, payload)
|
||||
if err == nil {
|
||||
t.Fatal("expected error, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "failed to decode error response body") {
|
||||
t.Fatalf("expected decode failure message, got: %v", err)
|
||||
}
|
||||
if statusProvider, ok := err.(interface{ StatusCode() int }); !ok || statusProvider.StatusCode() != http.StatusBadRequest {
|
||||
t.Fatalf("expected status code 400, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -440,14 +440,8 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
}
|
||||
case "auto":
|
||||
// Amp sends thinking.type="auto" — use max budget from model config
|
||||
// Antigravity API for Claude models requires a concrete positive budget,
|
||||
// not -1. Use a high default that ApplyThinking will cap to model max.
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 64000)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
case "adaptive":
|
||||
// Keep adaptive as a high level sentinel; ApplyThinking resolves it
|
||||
case "adaptive", "auto":
|
||||
// Keep adaptive/auto as a high level sentinel; ApplyThinking resolves it
|
||||
// to model-specific max capability.
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingLevel", "high")
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
|
||||
@@ -230,8 +230,8 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
|
||||
reasoningEffort = effort
|
||||
}
|
||||
}
|
||||
case "adaptive":
|
||||
// Claude adaptive means "enable with max capacity"; keep it as highest level
|
||||
case "adaptive", "auto":
|
||||
// Claude adaptive/auto means "enable with max capacity"; keep it as highest level
|
||||
// and let ApplyThinking normalize per target model capability.
|
||||
reasoningEffort = string(thinking.LevelXHigh)
|
||||
case "disabled":
|
||||
|
||||
@@ -180,8 +180,8 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
}
|
||||
case "adaptive":
|
||||
// Keep adaptive as a high level sentinel; ApplyThinking resolves it
|
||||
case "adaptive", "auto":
|
||||
// Keep adaptive/auto as a high level sentinel; ApplyThinking resolves it
|
||||
// to model-specific max capability.
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingLevel", "high")
|
||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true)
|
||||
|
||||
@@ -161,8 +161,8 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.includeThoughts", true)
|
||||
}
|
||||
case "adaptive":
|
||||
// Keep adaptive as a high level sentinel; ApplyThinking resolves it
|
||||
case "adaptive", "auto":
|
||||
// Keep adaptive/auto as a high level sentinel; ApplyThinking resolves it
|
||||
// to model-specific max capability.
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingLevel", "high")
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.includeThoughts", true)
|
||||
|
||||
@@ -75,8 +75,8 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream
|
||||
out, _ = sjson.Set(out, "reasoning_effort", effort)
|
||||
}
|
||||
}
|
||||
case "adaptive":
|
||||
// Claude adaptive means "enable with max capacity"; keep it as highest level
|
||||
case "adaptive", "auto":
|
||||
// Claude adaptive/auto means "enable with max capacity"; keep it as highest level
|
||||
// and let ApplyThinking normalize per target model capability.
|
||||
out, _ = sjson.Set(out, "reasoning_effort", string(thinking.LevelXHigh))
|
||||
case "disabled":
|
||||
|
||||
Reference in New Issue
Block a user