Merge pull request #194 from PancakeZik/fix/assistant-content-parroting

fix: replace assistant placeholder text to prevent model parroting
Merge pull request #191 from CheesesNguyen/feat/kiro-api-models-and-context-usage
2026-03-30 09:18:12 +00:00 · 2026-02-07 01:38:58 +08:00 · 2026-02-07 01:33:49 +08:00 · 2026-02-07 01:33:01 +08:00 · 2026-02-07 01:31:40 +08:00 · 2026-02-07 01:31:21 +08:00
10 changed files with 170 additions and 15 deletions
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -236,7 +236,7 @@ nonstream-keepalive-interval: 0

 # Global OAuth model name aliases (per channel)
 # These aliases rename model IDs for both model listing and request routing.
-# Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot.
+# Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot, kimi.
 # NOTE: Aliases do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
 # You can repeat the same name with different aliases to expose multiple client model names.
 #oauth-model-alias:
@@ -280,6 +280,9 @@ nonstream-keepalive-interval: 0
 #   iflow:
 #     - name: "glm-4.7"
 #       alias: "glm-god"
+#   kimi:
+#     - name: "kimi-k2.5"
+#       alias: "k2.5"
 #   kiro:
 #     - name: "kiro-claude-opus-4-5"
 #       alias: "op45"
@@ -309,6 +312,8 @@ nonstream-keepalive-interval: 0
 #     - "vision-model"
 #   iflow:
 #     - "tstars2.0"
+#   kimi:
+#     - "kimi-k2-thinking"
 #   kiro:
 #     - "kiro-claude-haiku-4-5"
 #   github-copilot:
--- a/internal/auth/kiro/aws_auth.go
+++ b/internal/auth/kiro/aws_auth.go
@@ -238,7 +238,7 @@ func (k *KiroAuth) ListAvailableModels(ctx context.Context, tokenData *KiroToken
 			Description    string  `json:"description"`
 			RateMultiplier float64 `json:"rateMultiplier"`
 			RateUnit       string  `json:"rateUnit"`
-			TokenLimits    struct {
+			TokenLimits    *struct {
 				MaxInputTokens int `json:"maxInputTokens"`
 			} `json:"tokenLimits"`
 		} `json:"models"`
@@ -250,13 +250,17 @@ func (k *KiroAuth) ListAvailableModels(ctx context.Context, tokenData *KiroToken

 	models := make([]*KiroModel, 0, len(result.Models))
 	for _, m := range result.Models {
+		maxInputTokens := 0
+		if m.TokenLimits != nil {
+			maxInputTokens = m.TokenLimits.MaxInputTokens
+		}
 		models = append(models, &KiroModel{
 			ModelID:        m.ModelID,
 			ModelName:      m.ModelName,
 			Description:    m.Description,
 			RateMultiplier: m.RateMultiplier,
 			RateUnit:       m.RateUnit,
-			MaxInputTokens: m.TokenLimits.MaxInputTokens,
+			MaxInputTokens: maxInputTokens,
 		})
 	}

--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
@@ -2102,6 +2102,22 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroclaude.Ki
 				}
 			}

+		case "contextUsageEvent":
+			// Handle context usage events from Kiro API
+			// Format: {"contextUsageEvent": {"contextUsagePercentage": 0.53}}
+			if ctxUsage, ok := event["contextUsageEvent"].(map[string]interface{}); ok {
+				if ctxPct, ok := ctxUsage["contextUsagePercentage"].(float64); ok {
+					upstreamContextPercentage = ctxPct
+					log.Debugf("kiro: parseEventStream received contextUsageEvent: %.2f%%", ctxPct*100)
+				}
+			} else {
+				// Try direct field (fallback)
+				if ctxPct, ok := event["contextUsagePercentage"].(float64); ok {
+					upstreamContextPercentage = ctxPct
+					log.Debugf("kiro: parseEventStream received contextUsagePercentage (direct): %.2f%%", ctxPct*100)
+				}
+			}
+
 		case "error", "exception", "internalServerException", "invalidStateEvent":
 			// Handle error events from Kiro API stream
 			errMsg := ""
@@ -2705,6 +2721,22 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 				}
 			}

+		case "contextUsageEvent":
+			// Handle context usage events from Kiro API
+			// Format: {"contextUsageEvent": {"contextUsagePercentage": 0.53}}
+			if ctxUsage, ok := event["contextUsageEvent"].(map[string]interface{}); ok {
+				if ctxPct, ok := ctxUsage["contextUsagePercentage"].(float64); ok {
+					upstreamContextPercentage = ctxPct
+					log.Debugf("kiro: streamToChannel received contextUsageEvent: %.2f%%", ctxPct*100)
+				}
+			} else {
+				// Try direct field (fallback)
+				if ctxPct, ok := event["contextUsagePercentage"].(float64); ok {
+					upstreamContextPercentage = ctxPct
+					log.Debugf("kiro: streamToChannel received contextUsagePercentage (direct): %.2f%%", ctxPct*100)
+				}
+			}
+
 		case "error", "exception", "internalServerException":
 			// Handle error events from Kiro API stream
 			errMsg := ""
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
@@ -14,6 +14,7 @@ import (
 	"time"

 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
+	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -85,6 +86,7 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ

 	// Extract and set usage metadata (token counts).
 	if usageResult := gjson.GetBytes(rawJSON, "response.usageMetadata"); usageResult.Exists() {
+		cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
 		if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
 			template, _ = sjson.Set(template, "usage.completion_tokens", candidatesTokenCountResult.Int())
 		}
@@ -97,6 +99,14 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
 		if thoughtsTokenCount > 0 {
 			template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
 		}
+		// Include cached token count if present (indicates prompt caching is working)
+		if cachedTokenCount > 0 {
+			var err error
+			template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
+			if err != nil {
+				log.Warnf("antigravity openai response: failed to set cached_tokens: %v", err)
+			}
+		}
 	}

 	// Process the main content part of the response.
--- a/internal/translator/kiro/claude/kiro_claude_request.go
+++ b/internal/translator/kiro/claude/kiro_claude_request.go
@@ -608,18 +608,22 @@ func processMessages(messages gjson.Result, modelID, origin string) ([]KiroHisto

 		if role == "user" {
 			userMsg, toolResults := BuildUserMessageStruct(msg, modelID, origin)
+			// CRITICAL: Kiro API requires content to be non-empty for ALL user messages
+			// This includes both history messages and the current message.
+			// When user message contains only tool_result (no text), content will be empty.
+			// This commonly happens in compaction requests from OpenCode.
+			if strings.TrimSpace(userMsg.Content) == "" {
+				if len(toolResults) > 0 {
+					userMsg.Content = kirocommon.DefaultUserContentWithToolResults
+				} else {
+					userMsg.Content = kirocommon.DefaultUserContent
+				}
+				log.Debugf("kiro: user content was empty, using default: %s", userMsg.Content)
+			}
 			if isLastMessage {
 				currentUserMsg = &userMsg
 				currentToolResults = toolResults
 			} else {
-				// CRITICAL: Kiro API requires content to be non-empty for history messages too
-				if strings.TrimSpace(userMsg.Content) == "" {
-					if len(toolResults) > 0 {
-						userMsg.Content = "Tool results provided."
-					} else {
-						userMsg.Content = "Continue"
-					}
-				}
 				// For history messages, embed tool results in context
 				if len(toolResults) > 0 {
 					userMsg.UserInputMessageContext = &KiroUserInputMessageContext{
--- a/internal/translator/kiro/common/constants.go
+++ b/internal/translator/kiro/common/constants.go
@@ -31,11 +31,23 @@ const (

 	// DefaultAssistantContentWithTools is the fallback content for assistant messages
 	// that have tool_use but no text content. Kiro API requires non-empty content.
-	DefaultAssistantContentWithTools = "I'll help you with that."
+	// IMPORTANT: Use a minimal neutral string that the model won't mimic in responses.
+	// Previously "I'll help you with that." which caused the model to parrot it back.
+	DefaultAssistantContentWithTools = "."

 	// DefaultAssistantContent is the fallback content for assistant messages
 	// that have no content at all. Kiro API requires non-empty content.
-	DefaultAssistantContent = "I understand."
+	// IMPORTANT: Use a minimal neutral string that the model won't mimic in responses.
+	// Previously "I understand." which could leak into model behavior.
+	DefaultAssistantContent = "."
+
+	// DefaultUserContentWithToolResults is the fallback content for user messages
+	// that have only tool_result (no text). Kiro API requires non-empty content.
+	DefaultUserContentWithToolResults = "Tool results provided."
+
+	// DefaultUserContent is the fallback content for user messages
+	// that have no content at all. Kiro API requires non-empty content.
+	DefaultUserContent = "Continue"

 	// KiroAgenticSystemPrompt is injected only for -agentic models to prevent timeouts on large writes.
 	// AWS Kiro API has a 2-3 minute timeout for large file write operations.
--- a/sdk/cliproxy/auth/oauth_model_alias.go
+++ b/sdk/cliproxy/auth/oauth_model_alias.go
@@ -221,7 +221,7 @@ func modelAliasChannel(auth *Auth) string {
 // and auth kind. Returns empty string if the provider/authKind combination doesn't support
 // OAuth model alias (e.g., API key authentication).
 //
-// Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot.
+// Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot, kimi.
 func OAuthModelAliasChannel(provider, authKind string) string {
 	provider = strings.ToLower(strings.TrimSpace(provider))
 	authKind = strings.ToLower(strings.TrimSpace(authKind))
@@ -245,7 +245,7 @@ func OAuthModelAliasChannel(provider, authKind string) string {
 			return ""
 		}
 		return "codex"
-	case "gemini-cli", "aistudio", "antigravity", "qwen", "iflow", "kiro", "github-copilot":
+	case "gemini-cli", "aistudio", "antigravity", "qwen", "iflow", "kiro", "github-copilot", "kimi":
 		return provider
 	default:
 		return ""
--- a/sdk/cliproxy/auth/oauth_model_alias_test.go
+++ b/sdk/cliproxy/auth/oauth_model_alias_test.go
@@ -79,6 +79,15 @@ func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) {
 			input:   "gemini-2.5-pro(none)",
 			want:    "gemini-2.5-pro-exp-03-25(none)",
 		},
+		{
+			name: "kimi suffix preserved",
+			aliases: map[string][]internalconfig.OAuthModelAlias{
+				"kimi": {{Name: "kimi-k2.5", Alias: "k2.5"}},
+			},
+			channel: "kimi",
+			input:   "k2.5(high)",
+			want:    "kimi-k2.5(high)",
+		},
 		{
 			name: "case insensitive alias lookup with suffix",
 			aliases: map[string][]internalconfig.OAuthModelAlias{
@@ -161,6 +170,8 @@ func createAuthForChannel(channel string) *Auth {
 		return &Auth{Provider: "qwen"}
 	case "iflow":
 		return &Auth{Provider: "iflow"}
+	case "kimi":
+		return &Auth{Provider: "kimi"}
 	case "kiro":
 		return &Auth{Provider: "kiro"}
 	default:
@@ -168,6 +179,14 @@ func createAuthForChannel(channel string) *Auth {
 	}
 }

+func TestOAuthModelAliasChannel_Kimi(t *testing.T) {
+	t.Parallel()
+
+	if got := OAuthModelAliasChannel("kimi", "oauth"); got != "kimi" {
+		t.Fatalf("OAuthModelAliasChannel() = %q, want %q", got, "kimi")
+	}
+}
+
 func TestApplyOAuthModelAlias_SuffixPreservation(t *testing.T) {
 	t.Parallel()

--- a/sdk/cliproxy/service_oauth_excluded_models_test.go
+++ b/sdk/cliproxy/service_oauth_excluded_models_test.go
@@ -0,0 +1,45 @@
+package cliproxy
+
+import (
+	"testing"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
+)
+
+func TestOAuthExcludedModels_KimiOAuth(t *testing.T) {
+	t.Parallel()
+
+	svc := &Service{
+		cfg: &config.Config{
+			OAuthExcludedModels: map[string][]string{
+				"kimi": {"kimi-k2-thinking", "kimi-k2.5"},
+			},
+		},
+	}
+
+	got := svc.oauthExcludedModels("kimi", "oauth")
+	if len(got) != 2 {
+		t.Fatalf("expected 2 excluded models, got %d", len(got))
+	}
+	if got[0] != "kimi-k2-thinking" || got[1] != "kimi-k2.5" {
+		t.Fatalf("unexpected excluded models: %#v", got)
+	}
+}
+
+func TestOAuthExcludedModels_KimiAPIKeyReturnsNil(t *testing.T) {
+	t.Parallel()
+
+	svc := &Service{
+		cfg: &config.Config{
+			OAuthExcludedModels: map[string][]string{
+				"kimi": {"kimi-k2-thinking"},
+			},
+		},
+	}
+
+	got := svc.oauthExcludedModels("kimi", "apikey")
+	if got != nil {
+		t.Fatalf("expected nil for apikey auth kind, got %#v", got)
+	}
+}
+
--- a/sdk/cliproxy/service_oauth_model_alias_test.go
+++ b/sdk/cliproxy/service_oauth_model_alias_test.go
@@ -90,3 +90,27 @@ func TestApplyOAuthModelAlias_ForkAddsMultipleAliases(t *testing.T) {
 		t.Fatalf("expected forked model name %q, got %q", "models/g5-2", out[2].Name)
 	}
 }
+
+func TestApplyOAuthModelAlias_KimiRename(t *testing.T) {
+	cfg := &config.Config{
+		OAuthModelAlias: map[string][]config.OAuthModelAlias{
+			"kimi": {
+				{Name: "kimi-k2.5", Alias: "k2.5"},
+			},
+		},
+	}
+	models := []*ModelInfo{
+		{ID: "kimi-k2.5", Name: "models/kimi-k2.5"},
+	}
+
+	out := applyOAuthModelAlias(cfg, "kimi", "oauth", models)
+	if len(out) != 1 {
+		t.Fatalf("expected 1 model, got %d", len(out))
+	}
+	if out[0].ID != "k2.5" {
+		t.Fatalf("expected model id %q, got %q", "k2.5", out[0].ID)
+	}
+	if out[0].Name != "models/k2.5" {
+		t.Fatalf("expected model name %q, got %q", "models/k2.5", out[0].Name)
+	}
+}
Author	SHA1	Message	Date
Luis Pater	d182e893b6	Merge pull request #194 from PancakeZik/fix/assistant-content-parroting fix: replace assistant placeholder text to prevent model parroting	2026-02-07 01:38:58 +08:00
Luis Pater	2e8d49a641	Merge pull request #191 from CheesesNguyen/feat/kiro-api-models-and-context-usage feat(kiro): add contextUsageEvent handler	2026-02-07 01:33:49 +08:00
Luis Pater	6abd7d27d9	Merge pull request #190 from taetaetae/fix/kiro-claude-compaction-current-user-empty-content fix(kiro): handle empty content in current user message for compaction	2026-02-07 01:33:01 +08:00
Luis Pater	8fa12af403	Merge pull request #195 from router-for-me/plus v6.8.1	2026-02-07 01:31:40 +08:00
Luis Pater	77586ed7d3	Merge branch 'main' into plus	2026-02-07 01:31:21 +08:00
Luis Pater	394497fb2f	Merge pull request #1465 from router-for-me/kimi-fix fix(kimi): add OAuth model-alias channel support and cover OAuth excl…	2026-02-07 01:27:30 +08:00
LTbinglingfeng	fc7b6ef086	fix(kimi): add OAuth model-alias channel support and cover OAuth excluded-models with tests	2026-02-07 01:16:39 +08:00
Joao	98edcad39d	fix: replace assistant placeholder text to prevent model parroting Kiro API requires non-empty content on assistant messages, so CLIProxyAPI injects placeholder text when assistant messages only contain tool_use blocks (no text). The previous placeholders were conversational phrases: - DefaultAssistantContentWithTools: "I'll help you with that." - DefaultAssistantContent: "I understand." In agentic sessions with many tool calls, these phrases appeared dozens of times in conversation history. Opus 4.6 (and likely other models) picked up on this pattern and started parroting "I'll help you with that." before every tool call in its actual responses. Fix: Replace both placeholders with a single dot ".", which satisfies Kiro's non-empty requirement without giving the model a phrase to mimic.	2026-02-06 16:42:21 +00:00
Luis Pater	1187aa8222	feat(translator): capture cached token count in usage metadata and handle prompt caching - Added support to extract and include `cachedContentTokenCount` in `usage.prompt_tokens_details`. - Logged warnings for failures to set cached token count for better debugging.	2026-02-06 21:28:40 +08:00
CheesesNguyen	16693053f5	feat(kiro): add contextUsageEvent handler and simplify model structs - Add contextUsageEvent case handler in kiro_executor.go for both parseEventStream and streamToChannel functions - Handle nested format: {"contextUsageEvent": {"contextUsagePercentage": 0.53}} - Keep KiroModel struct minimal with only essential fields - Remove unused KiroPromptCachingInfo struct from kiro_model_converter.go - Remove unused SupportedInputTypes and PromptCaching fields from KiroAPIModel	2026-02-06 11:12:27 +07:00
taetaetae	4e3bad3907	fix(kiro): handle empty content in current user message for compaction Problem: - PR #186 fixed empty content for assistant messages and history user messages - But current user message (isLastMessage == true) was not fixed - When user message contains only tool_result (no text), content becomes empty - This causes 'Improperly formed request' errors from Kiro API - Compaction requests from OpenCode commonly have this pattern Solution: - Move empty content check BEFORE the isLastMessage branch - Apply fallback content to ALL user messages, not just history - Add DefaultUserContentWithToolResults and DefaultUserContent constants Fixes compaction failures for OpenCode + Quotio + CLIProxyAPIPlus + Kiro stack	2026-02-06 11:58:43 +09:00