Compare commits

...

11 Commits

Author SHA1 Message Date
Luis Pater
d182e893b6 Merge pull request #194 from PancakeZik/fix/assistant-content-parroting
fix: replace assistant placeholder text to prevent model parroting
2026-02-07 01:38:58 +08:00
Luis Pater
2e8d49a641 Merge pull request #191 from CheesesNguyen/feat/kiro-api-models-and-context-usage
feat(kiro): add contextUsageEvent handler
2026-02-07 01:33:49 +08:00
Luis Pater
6abd7d27d9 Merge pull request #190 from taetaetae/fix/kiro-claude-compaction-current-user-empty-content
fix(kiro): handle empty content in current user message for compaction
2026-02-07 01:33:01 +08:00
Luis Pater
8fa12af403 Merge pull request #195 from router-for-me/plus
v6.8.1
2026-02-07 01:31:40 +08:00
Luis Pater
77586ed7d3 Merge branch 'main' into plus 2026-02-07 01:31:21 +08:00
Luis Pater
394497fb2f Merge pull request #1465 from router-for-me/kimi-fix
fix(kimi): add OAuth model-alias channel support and cover OAuth excl…
2026-02-07 01:27:30 +08:00
LTbinglingfeng
fc7b6ef086 fix(kimi): add OAuth model-alias channel support and cover OAuth excluded-models with tests 2026-02-07 01:16:39 +08:00
Joao
98edcad39d fix: replace assistant placeholder text to prevent model parroting
Kiro API requires non-empty content on assistant messages, so
CLIProxyAPI injects placeholder text when assistant messages only
contain tool_use blocks (no text). The previous placeholders were
conversational phrases:

- DefaultAssistantContentWithTools: "I'll help you with that."
- DefaultAssistantContent: "I understand."

In agentic sessions with many tool calls, these phrases appeared
dozens of times in conversation history. Opus 4.6 (and likely other
models) picked up on this pattern and started parroting "I'll help
you with that." before every tool call in its actual responses.

Fix: Replace both placeholders with a single dot ".", which
satisfies Kiro's non-empty requirement without giving the model
a phrase to mimic.
2026-02-06 16:42:21 +00:00
Luis Pater
1187aa8222 feat(translator): capture cached token count in usage metadata and handle prompt caching
- Added support to extract and include `cachedContentTokenCount` in `usage.prompt_tokens_details`.
- Logged warnings for failures to set cached token count for better debugging.
2026-02-06 21:28:40 +08:00
CheesesNguyen
16693053f5 feat(kiro): add contextUsageEvent handler and simplify model structs
- Add contextUsageEvent case handler in kiro_executor.go for both
  parseEventStream and streamToChannel functions
- Handle nested format: {"contextUsageEvent": {"contextUsagePercentage": 0.53}}
- Keep KiroModel struct minimal with only essential fields
- Remove unused KiroPromptCachingInfo struct from kiro_model_converter.go
- Remove unused SupportedInputTypes and PromptCaching fields from KiroAPIModel
2026-02-06 11:12:27 +07:00
taetaetae
4e3bad3907 fix(kiro): handle empty content in current user message for compaction
Problem:
- PR #186 fixed empty content for assistant messages and history user messages
- But current user message (isLastMessage == true) was not fixed
- When user message contains only tool_result (no text), content becomes empty
- This causes 'Improperly formed request' errors from Kiro API
- Compaction requests from OpenCode commonly have this pattern

Solution:
- Move empty content check BEFORE the isLastMessage branch
- Apply fallback content to ALL user messages, not just history
- Add DefaultUserContentWithToolResults and DefaultUserContent constants

Fixes compaction failures for OpenCode + Quotio + CLIProxyAPIPlus + Kiro stack
2026-02-06 11:58:43 +09:00
10 changed files with 170 additions and 15 deletions

View File

@@ -236,7 +236,7 @@ nonstream-keepalive-interval: 0
# Global OAuth model name aliases (per channel)
# These aliases rename model IDs for both model listing and request routing.
# Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot.
# Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot, kimi.
# NOTE: Aliases do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
# You can repeat the same name with different aliases to expose multiple client model names.
#oauth-model-alias:
@@ -280,6 +280,9 @@ nonstream-keepalive-interval: 0
# iflow:
# - name: "glm-4.7"
# alias: "glm-god"
# kimi:
# - name: "kimi-k2.5"
# alias: "k2.5"
# kiro:
# - name: "kiro-claude-opus-4-5"
# alias: "op45"
@@ -309,6 +312,8 @@ nonstream-keepalive-interval: 0
# - "vision-model"
# iflow:
# - "tstars2.0"
# kimi:
# - "kimi-k2-thinking"
# kiro:
# - "kiro-claude-haiku-4-5"
# github-copilot:

View File

@@ -238,7 +238,7 @@ func (k *KiroAuth) ListAvailableModels(ctx context.Context, tokenData *KiroToken
Description string `json:"description"`
RateMultiplier float64 `json:"rateMultiplier"`
RateUnit string `json:"rateUnit"`
TokenLimits struct {
TokenLimits *struct {
MaxInputTokens int `json:"maxInputTokens"`
} `json:"tokenLimits"`
} `json:"models"`
@@ -250,13 +250,17 @@ func (k *KiroAuth) ListAvailableModels(ctx context.Context, tokenData *KiroToken
models := make([]*KiroModel, 0, len(result.Models))
for _, m := range result.Models {
maxInputTokens := 0
if m.TokenLimits != nil {
maxInputTokens = m.TokenLimits.MaxInputTokens
}
models = append(models, &KiroModel{
ModelID: m.ModelID,
ModelName: m.ModelName,
Description: m.Description,
RateMultiplier: m.RateMultiplier,
RateUnit: m.RateUnit,
MaxInputTokens: m.TokenLimits.MaxInputTokens,
MaxInputTokens: maxInputTokens,
})
}

View File

@@ -2102,6 +2102,22 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroclaude.Ki
}
}
case "contextUsageEvent":
// Handle context usage events from Kiro API
// Format: {"contextUsageEvent": {"contextUsagePercentage": 0.53}}
if ctxUsage, ok := event["contextUsageEvent"].(map[string]interface{}); ok {
if ctxPct, ok := ctxUsage["contextUsagePercentage"].(float64); ok {
upstreamContextPercentage = ctxPct
log.Debugf("kiro: parseEventStream received contextUsageEvent: %.2f%%", ctxPct*100)
}
} else {
// Try direct field (fallback)
if ctxPct, ok := event["contextUsagePercentage"].(float64); ok {
upstreamContextPercentage = ctxPct
log.Debugf("kiro: parseEventStream received contextUsagePercentage (direct): %.2f%%", ctxPct*100)
}
}
case "error", "exception", "internalServerException", "invalidStateEvent":
// Handle error events from Kiro API stream
errMsg := ""
@@ -2705,6 +2721,22 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
}
}
case "contextUsageEvent":
// Handle context usage events from Kiro API
// Format: {"contextUsageEvent": {"contextUsagePercentage": 0.53}}
if ctxUsage, ok := event["contextUsageEvent"].(map[string]interface{}); ok {
if ctxPct, ok := ctxUsage["contextUsagePercentage"].(float64); ok {
upstreamContextPercentage = ctxPct
log.Debugf("kiro: streamToChannel received contextUsageEvent: %.2f%%", ctxPct*100)
}
} else {
// Try direct field (fallback)
if ctxPct, ok := event["contextUsagePercentage"].(float64); ok {
upstreamContextPercentage = ctxPct
log.Debugf("kiro: streamToChannel received contextUsagePercentage (direct): %.2f%%", ctxPct*100)
}
}
case "error", "exception", "internalServerException":
// Handle error events from Kiro API stream
errMsg := ""

View File

@@ -14,6 +14,7 @@ import (
"time"
. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
log "github.com/sirupsen/logrus"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
@@ -85,6 +86,7 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
// Extract and set usage metadata (token counts).
if usageResult := gjson.GetBytes(rawJSON, "response.usageMetadata"); usageResult.Exists() {
cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int())
}
@@ -97,6 +99,14 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
if thoughtsTokenCount > 0 {
template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
}
// Include cached token count if present (indicates prompt caching is working)
if cachedTokenCount > 0 {
var err error
template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
if err != nil {
log.Warnf("antigravity openai response: failed to set cached_tokens: %v", err)
}
}
}
// Process the main content part of the response.

View File

@@ -608,18 +608,22 @@ func processMessages(messages gjson.Result, modelID, origin string) ([]KiroHisto
if role == "user" {
userMsg, toolResults := BuildUserMessageStruct(msg, modelID, origin)
// CRITICAL: Kiro API requires content to be non-empty for ALL user messages
// This includes both history messages and the current message.
// When user message contains only tool_result (no text), content will be empty.
// This commonly happens in compaction requests from OpenCode.
if strings.TrimSpace(userMsg.Content) == "" {
if len(toolResults) > 0 {
userMsg.Content = kirocommon.DefaultUserContentWithToolResults
} else {
userMsg.Content = kirocommon.DefaultUserContent
}
log.Debugf("kiro: user content was empty, using default: %s", userMsg.Content)
}
if isLastMessage {
currentUserMsg = &userMsg
currentToolResults = toolResults
} else {
// CRITICAL: Kiro API requires content to be non-empty for history messages too
if strings.TrimSpace(userMsg.Content) == "" {
if len(toolResults) > 0 {
userMsg.Content = "Tool results provided."
} else {
userMsg.Content = "Continue"
}
}
// For history messages, embed tool results in context
if len(toolResults) > 0 {
userMsg.UserInputMessageContext = &KiroUserInputMessageContext{

View File

@@ -31,11 +31,23 @@ const (
// DefaultAssistantContentWithTools is the fallback content for assistant messages
// that have tool_use but no text content. Kiro API requires non-empty content.
DefaultAssistantContentWithTools = "I'll help you with that."
// IMPORTANT: Use a minimal neutral string that the model won't mimic in responses.
// Previously "I'll help you with that." which caused the model to parrot it back.
DefaultAssistantContentWithTools = "."
// DefaultAssistantContent is the fallback content for assistant messages
// that have no content at all. Kiro API requires non-empty content.
DefaultAssistantContent = "I understand."
// IMPORTANT: Use a minimal neutral string that the model won't mimic in responses.
// Previously "I understand." which could leak into model behavior.
DefaultAssistantContent = "."
// DefaultUserContentWithToolResults is the fallback content for user messages
// that have only tool_result (no text). Kiro API requires non-empty content.
DefaultUserContentWithToolResults = "Tool results provided."
// DefaultUserContent is the fallback content for user messages
// that have no content at all. Kiro API requires non-empty content.
DefaultUserContent = "Continue"
// KiroAgenticSystemPrompt is injected only for -agentic models to prevent timeouts on large writes.
// AWS Kiro API has a 2-3 minute timeout for large file write operations.

View File

@@ -221,7 +221,7 @@ func modelAliasChannel(auth *Auth) string {
// and auth kind. Returns empty string if the provider/authKind combination doesn't support
// OAuth model alias (e.g., API key authentication).
//
// Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot.
// Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot, kimi.
func OAuthModelAliasChannel(provider, authKind string) string {
provider = strings.ToLower(strings.TrimSpace(provider))
authKind = strings.ToLower(strings.TrimSpace(authKind))
@@ -245,7 +245,7 @@ func OAuthModelAliasChannel(provider, authKind string) string {
return ""
}
return "codex"
case "gemini-cli", "aistudio", "antigravity", "qwen", "iflow", "kiro", "github-copilot":
case "gemini-cli", "aistudio", "antigravity", "qwen", "iflow", "kiro", "github-copilot", "kimi":
return provider
default:
return ""

View File

@@ -79,6 +79,15 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
input: "gemini-2.5-pro(none)",
want: "gemini-2.5-pro-exp-03-25(none)",
},
{
name: "kimi suffix preserved",
aliases: map[string][]internalconfig.OAuthModelAlias{
"kimi": {{Name: "kimi-k2.5", Alias: "k2.5"}},
},
channel: "kimi",
input: "k2.5(high)",
want: "kimi-k2.5(high)",
},
{
name: "case insensitive alias lookup with suffix",
aliases: map[string][]internalconfig.OAuthModelAlias{
@@ -161,6 +170,8 @@ func createAuthForChannel(channel string) *Auth {
return &Auth{Provider: "qwen"}
case "iflow":
return &Auth{Provider: "iflow"}
case "kimi":
return &Auth{Provider: "kimi"}
case "kiro":
return &Auth{Provider: "kiro"}
default:
@@ -168,6 +179,14 @@ func createAuthForChannel(channel string) *Auth {
}
}
func TestOAuthModelAliasChannel_Kimi(t *testing.T) {
t.Parallel()
if got := OAuthModelAliasChannel("kimi", "oauth"); got != "kimi" {
t.Fatalf("OAuthModelAliasChannel() = %q, want %q", got, "kimi")
}
}
func TestApplyOAuthModelAlias_SuffixPreservation(t *testing.T) {
t.Parallel()

View File

@@ -0,0 +1,45 @@
package cliproxy
import (
"testing"
"github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
)
func TestOAuthExcludedModels_KimiOAuth(t *testing.T) {
t.Parallel()
svc := &Service{
cfg: &config.Config{
OAuthExcludedModels: map[string][]string{
"kimi": {"kimi-k2-thinking", "kimi-k2.5"},
},
},
}
got := svc.oauthExcludedModels("kimi", "oauth")
if len(got) != 2 {
t.Fatalf("expected 2 excluded models, got %d", len(got))
}
if got[0] != "kimi-k2-thinking" || got[1] != "kimi-k2.5" {
t.Fatalf("unexpected excluded models: %#v", got)
}
}
func TestOAuthExcludedModels_KimiAPIKeyReturnsNil(t *testing.T) {
t.Parallel()
svc := &Service{
cfg: &config.Config{
OAuthExcludedModels: map[string][]string{
"kimi": {"kimi-k2-thinking"},
},
},
}
got := svc.oauthExcludedModels("kimi", "apikey")
if got != nil {
t.Fatalf("expected nil for apikey auth kind, got %#v", got)
}
}

View File

@@ -90,3 +90,27 @@ func TestApplyOAuthModelAlias_ForkAddsMultipleAliases(t *testing.T) {
t.Fatalf("expected forked model name %q, got %q", "models/g5-2", out[2].Name)
}
}
func TestApplyOAuthModelAlias_KimiRename(t *testing.T) {
cfg := &config.Config{
OAuthModelAlias: map[string][]config.OAuthModelAlias{
"kimi": {
{Name: "kimi-k2.5", Alias: "k2.5"},
},
},
}
models := []*ModelInfo{
{ID: "kimi-k2.5", Name: "models/kimi-k2.5"},
}
out := applyOAuthModelAlias(cfg, "kimi", "oauth", models)
if len(out) != 1 {
t.Fatalf("expected 1 model, got %d", len(out))
}
if out[0].ID != "k2.5" {
t.Fatalf("expected model id %q, got %q", "k2.5", out[0].ID)
}
if out[0].Name != "models/k2.5" {
t.Fatalf("expected model name %q, got %q", "models/k2.5", out[0].Name)
}
}