Merge pull request #18 from Ravens2121/master

Kiro Executor Stability and API Compatibility Improvements
feat(kiro): enhance Kiro executor stability and compatibility
2026-04-01 10:15:26 +00:00 · 2025-12-11 08:35:22 +08:00 · 2025-12-11 08:13:33 +08:00 · 2025-12-11 05:40:10 +08:00 · 2025-12-11 05:37:22 +08:00 · 2025-12-11 03:17:55 +08:00
15 changed files with 1254 additions and 694 deletions
--- a/internal/api/modules/amp/fallback_handlers.go
+++ b/internal/api/modules/amp/fallback_handlers.go
@@ -133,8 +133,8 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 			return
 		}

-		// Normalize model (handles Gemini thinking suffixes)
-		normalizedModel, _ := util.NormalizeGeminiThinkingModel(modelName)
+		// Normalize model (handles dynamic thinking suffixes)
+		normalizedModel, _ := util.NormalizeThinkingModel(modelName)

 		// Track resolved model for logging (may change if mapping is applied)
 		resolvedModel := normalizedModel
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -26,60 +26,6 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.5 Sonnet",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
-		},
-		{
-			ID:                  "claude-sonnet-4-5-thinking",
-			Object:              "model",
-			Created:             1759104000, // 2025-09-29
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Sonnet Thinking",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-opus-4-5-thinking",
-			Object:              "model",
-			Created:             1761955200, // 2025-11-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Opus Thinking",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-opus-4-5-thinking-low",
-			Object:              "model",
-			Created:             1761955200, // 2025-11-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Opus Thinking Low",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-opus-4-5-thinking-medium",
-			Object:              "model",
-			Created:             1761955200, // 2025-11-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Opus Thinking Medium",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-opus-4-5-thinking-high",
-			Object:              "model",
-			Created:             1761955200, // 2025-11-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Opus Thinking High",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
@@ -92,6 +38,7 @@ func GetClaudeModels() []*ModelInfo {
 			Description:         "Premium model combining maximum intelligence with practical performance",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-opus-4-1-20250805",
@@ -530,58 +477,6 @@ func GetOpenAIModels() []*ModelInfo {
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 		},
-		{
-			ID:                  "gpt-5-minimal",
-			Object:              "model",
-			Created:             1754524800,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-08-07",
-			DisplayName:         "GPT 5 Minimal",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-low",
-			Object:              "model",
-			Created:             1754524800,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-08-07",
-			DisplayName:         "GPT 5 Low",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-medium",
-			Object:              "model",
-			Created:             1754524800,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-08-07",
-			DisplayName:         "GPT 5 Medium",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-high",
-			Object:              "model",
-			Created:             1754524800,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-08-07",
-			DisplayName:         "GPT 5 High",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
 		{
 			ID:                  "gpt-5-codex",
 			Object:              "model",
@@ -595,45 +490,6 @@ func GetOpenAIModels() []*ModelInfo {
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 		},
-		{
-			ID:                  "gpt-5-codex-low",
-			Object:              "model",
-			Created:             1757894400,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-09-15",
-			DisplayName:         "GPT 5 Codex Low",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-codex-medium",
-			Object:              "model",
-			Created:             1757894400,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-09-15",
-			DisplayName:         "GPT 5 Codex Medium",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-codex-high",
-			Object:              "model",
-			Created:             1757894400,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-09-15",
-			DisplayName:         "GPT 5 Codex High",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
 		{
 			ID:                  "gpt-5-codex-mini",
 			Object:              "model",
@@ -647,32 +503,6 @@ func GetOpenAIModels() []*ModelInfo {
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 		},
-		{
-			ID:                  "gpt-5-codex-mini-medium",
-			Object:              "model",
-			Created:             1762473600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-11-07",
-			DisplayName:         "GPT 5 Codex Mini Medium",
-			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-codex-mini-high",
-			Object:              "model",
-			Created:             1762473600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-11-07",
-			DisplayName:         "GPT 5 Codex Mini High",
-			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
 		{
 			ID:                  "gpt-5.1",
 			Object:              "model",
@@ -686,58 +516,6 @@ func GetOpenAIModels() []*ModelInfo {
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 		},
-		{
-			ID:                  "gpt-5.1-none",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 Nothink",
-			Description:         "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-low",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Low",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-medium",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 Medium",
-			Description:         "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-high",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 High",
-			Description:         "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
 		{
 			ID:                  "gpt-5.1-codex",
 			Object:              "model",
@@ -751,45 +529,6 @@ func GetOpenAIModels() []*ModelInfo {
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 		},
-		{
-			ID:                  "gpt-5.1-codex-low",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 Codex Low",
-			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-medium",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 Codex Medium",
-			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-high",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 Codex High",
-			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
 		{
 			ID:                  "gpt-5.1-codex-mini",
 			Object:              "model",
@@ -803,33 +542,6 @@ func GetOpenAIModels() []*ModelInfo {
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 		},
-		{
-			ID:                  "gpt-5.1-codex-mini-medium",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 Codex Mini Medium",
-			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-mini-high",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5.1 Codex Mini High",
-			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-
 		{
 			ID:                  "gpt-5.1-codex-max",
 			Object:              "model",
@@ -843,58 +555,6 @@ func GetOpenAIModels() []*ModelInfo {
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
 		},
-		{
-			ID:                  "gpt-5.1-codex-max-low",
-			Object:              "model",
-			Created:             1763424000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5.1 Codex Max Low",
-			Description:         "Stable version of GPT 5.1 Codex Max Low",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-max-medium",
-			Object:              "model",
-			Created:             1763424000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5.1 Codex Max Medium",
-			Description:         "Stable version of GPT 5.1 Codex Max Medium",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-max-high",
-			Object:              "model",
-			Created:             1763424000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5.1 Codex Max High",
-			Description:         "Stable version of GPT 5.1 Codex Max High",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-max-xhigh",
-			Object:              "model",
-			Created:             1763424000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5.1 Codex Max XHigh",
-			Description:         "Stable version of GPT 5.1 Codex Max XHigh",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
 	}
 }

--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -54,15 +54,22 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
-	modelForUpstream := req.Model
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
-		body, _ = sjson.SetBytes(body, "model", modelOverride)
-		modelForUpstream = modelOverride
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel == "" {
+		upstreamModel = req.Model
 	}
-	// Inject thinking config based on model suffix for thinking variants
-	body = e.injectThinkingConfig(req.Model, body)
+	if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
+		upstreamModel = modelOverride
+	} else if !strings.EqualFold(upstreamModel, req.Model) {
+		if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
+			upstreamModel = modelOverride
+		}
+	}
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	// Inject thinking config based on model metadata for thinking variants
+	body = e.injectThinkingConfig(req.Model, req.Metadata, body)

-	if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
+	if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
 		body = checkSystemInstructions(body)
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -161,11 +168,20 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("claude")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
-		body, _ = sjson.SetBytes(body, "model", modelOverride)
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel == "" {
+		upstreamModel = req.Model
 	}
-	// Inject thinking config based on model suffix for thinking variants
-	body = e.injectThinkingConfig(req.Model, body)
+	if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
+		upstreamModel = modelOverride
+	} else if !strings.EqualFold(upstreamModel, req.Model) {
+		if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
+			upstreamModel = modelOverride
+		}
+	}
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	// Inject thinking config based on model metadata for thinking variants
+	body = e.injectThinkingConfig(req.Model, req.Metadata, body)
 	body = checkSystemInstructions(body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)

@@ -295,13 +311,20 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
-	modelForUpstream := req.Model
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
-		body, _ = sjson.SetBytes(body, "model", modelOverride)
-		modelForUpstream = modelOverride
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel == "" {
+		upstreamModel = req.Model
 	}
+	if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
+		upstreamModel = modelOverride
+	} else if !strings.EqualFold(upstreamModel, req.Model) {
+		if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
+			upstreamModel = modelOverride
+		}
+	}
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

-	if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
+	if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
 		body = checkSystemInstructions(body)
 	}

@@ -427,25 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
 	return betas, body
 }

-// injectThinkingConfig adds thinking configuration based on model name suffix
-func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []byte {
+// injectThinkingConfig adds thinking configuration based on metadata or legacy suffixes.
+func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
 	// Only inject if thinking config is not already present
 	if gjson.GetBytes(body, "thinking").Exists() {
 		return body
 	}

-	var budgetTokens int
-	switch {
-	case strings.HasSuffix(modelName, "-thinking-low"):
-		budgetTokens = 1024
-	case strings.HasSuffix(modelName, "-thinking-medium"):
-		budgetTokens = 8192
-	case strings.HasSuffix(modelName, "-thinking-high"):
-		budgetTokens = 24576
-	case strings.HasSuffix(modelName, "-thinking"):
-		// Default thinking without suffix uses medium budget
-		budgetTokens = 8192
-	default:
+	budgetTokens, ok := resolveClaudeThinkingBudget(modelName, metadata)
+	if !ok || budgetTokens <= 0 {
 		return body
 	}

@@ -454,6 +467,44 @@ func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []b
 	return body
 }

+func resolveClaudeThinkingBudget(modelName string, metadata map[string]any) (int, bool) {
+	budget, include, effort, matched := util.ThinkingFromMetadata(metadata)
+	if matched {
+		if include != nil && !*include {
+			return 0, false
+		}
+		if budget != nil {
+			normalized := util.NormalizeThinkingBudget(modelName, *budget)
+			if normalized > 0 {
+				return normalized, true
+			}
+			return 0, false
+		}
+		if effort != nil {
+			if derived, ok := util.ThinkingEffortToBudget(modelName, *effort); ok && derived > 0 {
+				return derived, true
+			}
+		}
+	}
+	return claudeBudgetFromSuffix(modelName)
+}
+
+func claudeBudgetFromSuffix(modelName string) (int, bool) {
+	lower := strings.ToLower(strings.TrimSpace(modelName))
+	switch {
+	case strings.HasSuffix(lower, "-thinking-low"):
+		return 1024, true
+	case strings.HasSuffix(lower, "-thinking-medium"):
+		return 8192, true
+	case strings.HasSuffix(lower, "-thinking-high"):
+		return 24576, true
+	case strings.HasSuffix(lower, "-thinking"):
+		return 8192, true
+	default:
+		return 0, false
+	}
+}
+
 // ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
 // Anthropic API requires this constraint; violating it returns a 400 error.
 // This function should be called after all thinking configuration is finalized.
@@ -491,35 +542,45 @@ func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
 }

 func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
-	if alias == "" {
+	trimmed := strings.TrimSpace(alias)
+	if trimmed == "" {
 		return ""
 	}
-	// Hardcoded mappings for thinking models to actual Claude model names
-	switch alias {
-	case "claude-opus-4-5-thinking", "claude-opus-4-5-thinking-low", "claude-opus-4-5-thinking-medium", "claude-opus-4-5-thinking-high":
-		return "claude-opus-4-5-20251101"
-	case "claude-sonnet-4-5-thinking":
-		return "claude-sonnet-4-5-20250929"
-	}
+
 	entry := e.resolveClaudeConfig(auth)
 	if entry == nil {
 		return ""
 	}
+
+	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
+
+	// Candidate names to match against configured aliases/names.
+	candidates := []string{strings.TrimSpace(normalizedModel)}
+	if !strings.EqualFold(normalizedModel, trimmed) {
+		candidates = append(candidates, trimmed)
+	}
+	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
+		candidates = append(candidates, original)
+	}
+
 	for i := range entry.Models {
 		model := entry.Models[i]
 		name := strings.TrimSpace(model.Name)
 		modelAlias := strings.TrimSpace(model.Alias)
-		if modelAlias != "" {
-			if strings.EqualFold(modelAlias, alias) {
+
+		for _, candidate := range candidates {
+			if candidate == "" {
+				continue
+			}
+			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
 				if name != "" {
 					return name
 				}
-				return alias
+				return candidate
+			}
+			if name != "" && strings.EqualFold(name, candidate) {
+				return name
 			}
-			continue
-		}
-		if name != "" && strings.EqualFold(name, alias) {
-			return name
 		}
 	}
 	return ""
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -49,14 +49,14 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-
-	body = e.setReasoningEffortByAlias(req.Model, body)
-
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
-
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")

@@ -142,13 +142,16 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

-	body = e.setReasoningEffortByAlias(req.Model, body)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -235,14 +238,16 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 }

 func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)

 	modelForCounting := req.Model

-	body = e.setReasoningEffortByAlias(req.Model, body)
-
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "stream", false)

@@ -261,83 +266,6 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
 }

-func (e *CodexExecutor) setReasoningEffortByAlias(modelName string, payload []byte) []byte {
-	if util.InArray([]string{"gpt-5", "gpt-5-minimal", "gpt-5-low", "gpt-5-medium", "gpt-5-high"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5")
-		switch modelName {
-		case "gpt-5-minimal":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "minimal")
-		case "gpt-5-low":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
-		case "gpt-5-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5-codex", "gpt-5-codex-low", "gpt-5-codex-medium", "gpt-5-codex-high"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex")
-		switch modelName {
-		case "gpt-5-codex-low":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
-		case "gpt-5-codex-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5-codex-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex-mini")
-		switch modelName {
-		case "gpt-5-codex-mini-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5-codex-mini-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5.1", "gpt-5.1-none", "gpt-5.1-low", "gpt-5.1-medium", "gpt-5.1-high"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1")
-		switch modelName {
-		case "gpt-5.1-none":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "none")
-		case "gpt-5.1-low":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
-		case "gpt-5.1-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5.1-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5.1-codex", "gpt-5.1-codex-low", "gpt-5.1-codex-medium", "gpt-5.1-codex-high"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex")
-		switch modelName {
-		case "gpt-5.1-codex-low":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
-		case "gpt-5.1-codex-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5.1-codex-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5.1-codex-mini", "gpt-5.1-codex-mini-medium", "gpt-5.1-codex-mini-high"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-mini")
-		switch modelName {
-		case "gpt-5.1-codex-mini-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5.1-codex-mini-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5.1-codex-max", "gpt-5.1-codex-max-low", "gpt-5.1-codex-max-medium", "gpt-5.1-codex-max-high", "gpt-5.1-codex-max-xhigh"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-max")
-		switch modelName {
-		case "gpt-5.1-codex-max-low":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
-		case "gpt-5.1-codex-max-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5.1-codex-max-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		case "gpt-5.1-codex-max-xhigh":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "xhigh")
-		}
-	}
-	return payload
-}
-
 func tokenizerForCodexModel(model string) (tokenizer.Codec, error) {
 	sanitized := strings.ToLower(strings.TrimSpace(model))
 	switch {
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -75,6 +75,8 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	// Official Gemini API via API key or OAuth bearer
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
@@ -85,6 +87,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	action := "generateContent"
 	if req.Metadata != nil {
@@ -93,7 +96,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		}
 	}
 	baseURL := resolveGeminiBaseURL(auth)
-	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, action)
+	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -167,6 +170,8 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
@@ -176,9 +181,10 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	baseURL := resolveGeminiBaseURL(auth)
-	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, "streamGenerateContent")
+	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, "streamGenerateContent")
 	if opts.Alt == "" {
 		url = url + "?alt=sse"
 	} else {
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -105,10 +105,12 @@ func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyau

 // countTokensWithServiceAccount handles token counting using service account credentials.
 func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
@@ -117,13 +119,14 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 	}
 	translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
 	translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
+	translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")

 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens")
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "countTokens")

 	httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
 	if errNewReq != nil {
@@ -191,10 +194,12 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context

 // countTokensWithAPIKey handles token counting using API key credentials.
 func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
@@ -203,6 +208,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 	}
 	translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
 	translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
+	translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
@@ -286,10 +292,12 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
@@ -301,6 +309,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	action := "generateContent"
 	if req.Metadata != nil {
@@ -309,7 +318,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 		}
 	}
 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action)
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -383,10 +392,12 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
@@ -398,6 +409,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	action := "generateContent"
 	if req.Metadata != nil {
@@ -410,7 +422,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, action)
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -481,10 +493,12 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
@@ -496,9 +510,10 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent")
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "streamGenerateContent")
 	if opts.Alt == "" {
 		url = url + "?alt=sse"
 	} else {
@@ -595,10 +610,12 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
@@ -610,12 +627,13 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	// For API key auth, use simpler URL format without project/location
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "streamGenerateContent")
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, "streamGenerateContent")
 	if opts.Alt == "" {
 		url = url + "?alt=sse"
 	} else {
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -57,6 +57,10 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
+	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+		body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)

 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -139,6 +143,10 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

+	body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
+	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+		body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	}
 	// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
 	toolsResult := gjson.GetBytes(body, "tools")
 	if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
@@ -121,7 +121,12 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 		return resp, fmt.Errorf("kiro: access token not found in auth")
 	}
 	if profileArn == "" {
-		log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
+		// Only warn if not using builder-id auth (which doesn't need profileArn)
+		if auth == nil || auth.Metadata == nil {
+			log.Debugf("kiro: profile ARN not found in auth (may be normal for builder-id)")
+		} else if authMethod, ok := auth.Metadata["auth_method"].(string); !ok || authMethod != "builder-id" {
+			log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
+		}
 	}

 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
@@ -161,10 +166,19 @@ func (e *KiroExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 		currentOrigin = "CLI"
 	}
 	
-	kiroPayload := e.buildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly)
+	// Determine if profileArn should be included based on auth method
+	// profileArn is only needed for social auth (Google OAuth), not for builder-id (AWS SSO)
+	effectiveProfileArn := profileArn
+	if auth != nil && auth.Metadata != nil {
+		if authMethod, ok := auth.Metadata["auth_method"].(string); ok && authMethod == "builder-id" {
+			effectiveProfileArn = "" // Don't include profileArn for builder-id auth
+		}
+	}
+	
+	kiroPayload := e.buildKiroPayload(body, kiroModelID, effectiveProfileArn, currentOrigin, isAgentic, isChatOnly)

 	// Execute with retry on 401/403 and 429 (quota exhausted)
-	resp, err = e.executeWithRetry(ctx, auth, req, opts, accessToken, profileArn, kiroPayload, body, from, to, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly)
+	resp, err = e.executeWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, kiroPayload, body, from, to, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly)
 	return resp, err
 }

@@ -301,9 +315,18 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
 				}
 			}
 			if len(content) > 0 {
-				usageInfo.OutputTokens = int64(len(content) / 4)
+				// Use tiktoken for more accurate output token calculation
+				if enc, encErr := tokenizerForModel(req.Model); encErr == nil {
+					if tokenCount, countErr := enc.Count(content); countErr == nil {
+						usageInfo.OutputTokens = int64(tokenCount)
+					}
+				}
+				// Fallback to character count estimation if tiktoken fails
 				if usageInfo.OutputTokens == 0 {
-					usageInfo.OutputTokens = 1
+					usageInfo.OutputTokens = int64(len(content) / 4)
+					if usageInfo.OutputTokens == 0 {
+						usageInfo.OutputTokens = 1
+					}
 				}
 			}
 			usageInfo.TotalTokens = usageInfo.InputTokens + usageInfo.OutputTokens
@@ -330,7 +353,12 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 		return nil, fmt.Errorf("kiro: access token not found in auth")
 	}
 	if profileArn == "" {
-		log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
+		// Only warn if not using builder-id auth (which doesn't need profileArn)
+		if auth == nil || auth.Metadata == nil {
+			log.Debugf("kiro: profile ARN not found in auth (may be normal for builder-id)")
+		} else if authMethod, ok := auth.Metadata["auth_method"].(string); !ok || authMethod != "builder-id" {
+			log.Warnf("kiro: profile ARN not found in auth, API calls may fail")
+		}
 	}

 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
@@ -370,10 +398,19 @@ func (e *KiroExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 		currentOrigin = "CLI"
 	}
 	
-	kiroPayload := e.buildKiroPayload(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly)
+	// Determine if profileArn should be included based on auth method
+	// profileArn is only needed for social auth (Google OAuth), not for builder-id (AWS SSO)
+	effectiveProfileArn := profileArn
+	if auth != nil && auth.Metadata != nil {
+		if authMethod, ok := auth.Metadata["auth_method"].(string); ok && authMethod == "builder-id" {
+			effectiveProfileArn = "" // Don't include profileArn for builder-id auth
+		}
+	}
+	
+	kiroPayload := e.buildKiroPayload(body, kiroModelID, effectiveProfileArn, currentOrigin, isAgentic, isChatOnly)

 	// Execute stream with retry on 401/403 and 429 (quota exhausted)
-	return e.executeStreamWithRetry(ctx, auth, req, opts, accessToken, profileArn, kiroPayload, body, from, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly)
+	return e.executeStreamWithRetry(ctx, auth, req, opts, accessToken, effectiveProfileArn, kiroPayload, body, from, reporter, currentOrigin, kiroModelID, isAgentic, isChatOnly)
 }

 // executeStreamWithRetry performs the streaming HTTP request with automatic retry on auth errors.
@@ -491,6 +528,12 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox

 		go func(resp *http.Response) {
 			defer close(out)
+			defer func() {
+				if r := recover(); r != nil {
+					log.Errorf("kiro: panic in stream handler: %v", r)
+					out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("internal error: %v", r)}
+				}
+			}()
 			defer func() {
 				if errClose := resp.Body.Close(); errClose != nil {
 					log.Errorf("response body close error: %v", errClose)
@@ -587,10 +630,10 @@ type kiroPayload struct {
 }

 type kiroConversationState struct {
+	ChatTriggerType string               `json:"chatTriggerType"` // Required: "MANUAL" - must be first field
 	ConversationID  string               `json:"conversationId"`
-	History         []kiroHistoryMessage `json:"history"`
 	CurrentMessage  kiroCurrentMessage   `json:"currentMessage"`
-	ChatTriggerType string               `json:"chatTriggerType"` // Required: "MANUAL"
+	History         []kiroHistoryMessage `json:"history,omitempty"` // Only include when non-empty
 }

 type kiroCurrentMessage struct {
@@ -627,9 +670,9 @@ type kiroUserInputMessageContext struct {
 }

 type kiroToolResult struct {
-	ToolUseID string              `json:"toolUseId"`
 	Content   []kiroTextContent   `json:"content"`
 	Status    string              `json:"status"`
+	ToolUseID string              `json:"toolUseId"`
 }

 type kiroTextContent struct {
@@ -735,7 +778,9 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
 	var currentUserMsg *kiroUserInputMessage
 	var currentToolResults []kiroToolResult

-	messagesArray := messages.Array()
+	// Merge adjacent messages with the same role before processing
+	// This reduces API call complexity and improves compatibility
+	messagesArray := mergeAdjacentMessages(messages.Array())
 	for i, msg := range messagesArray {
 		role := msg.Get("role").String()
 		isLastMessage := i == len(messagesArray)-1
@@ -746,6 +791,14 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
 				currentUserMsg = &userMsg
 				currentToolResults = toolResults
 			} else {
+				// CRITICAL: Kiro API requires content to be non-empty for history messages too
+				if strings.TrimSpace(userMsg.Content) == "" {
+					if len(toolResults) > 0 {
+						userMsg.Content = "Tool results provided."
+					} else {
+						userMsg.Content = "Continue"
+					}
+				}
 				// For history messages, embed tool results in context
 				if len(toolResults) > 0 {
 					userMsg.UserInputMessageContext = &kiroUserInputMessageContext{
@@ -758,9 +811,24 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
 			}
 		} else if role == "assistant" {
 			assistantMsg := e.buildAssistantMessageStruct(msg)
-			history = append(history, kiroHistoryMessage{
-				AssistantResponseMessage: &assistantMsg,
-			})
+			// If this is the last message and it's an assistant message,
+			// we need to add it to history and create a "Continue" user message
+			// because Kiro API requires currentMessage to be userInputMessage type
+			if isLastMessage {
+				history = append(history, kiroHistoryMessage{
+					AssistantResponseMessage: &assistantMsg,
+				})
+				// Create a "Continue" user message as currentMessage
+				currentUserMsg = &kiroUserInputMessage{
+					Content: "Continue",
+					ModelID: modelID,
+					Origin:  origin,
+				}
+			} else {
+				history = append(history, kiroHistoryMessage{
+					AssistantResponseMessage: &assistantMsg,
+				})
+			}
 		}
 	}

@@ -777,7 +845,35 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
 		
 		// Add the actual user message
 		contentBuilder.WriteString(currentUserMsg.Content)
-		currentUserMsg.Content = contentBuilder.String()
+		finalContent := contentBuilder.String()
+		
+		// CRITICAL: Kiro API requires content to be non-empty, even when toolResults are present
+		// If content is empty or only whitespace, provide a default message
+		if strings.TrimSpace(finalContent) == "" {
+			if len(currentToolResults) > 0 {
+				finalContent = "Tool results provided."
+			} else {
+				finalContent = "Continue"
+			}
+			log.Debugf("kiro: content was empty, using default: %s", finalContent)
+		}
+		currentUserMsg.Content = finalContent
+		
+		// Deduplicate currentToolResults before adding to context
+		// Kiro API does not accept duplicate toolUseIds
+		if len(currentToolResults) > 0 {
+			seenIDs := make(map[string]bool)
+			uniqueToolResults := make([]kiroToolResult, 0, len(currentToolResults))
+			for _, tr := range currentToolResults {
+				if !seenIDs[tr.ToolUseID] {
+					seenIDs[tr.ToolUseID] = true
+					uniqueToolResults = append(uniqueToolResults, tr)
+				} else {
+					log.Debugf("kiro: skipping duplicate toolResult in currentMessage: %s", tr.ToolUseID)
+				}
+			}
+			currentToolResults = uniqueToolResults
+		}
 		
 		// Build userInputMessageContext with tools and tool results
 		if len(kiroTools) > 0 || len(currentToolResults) > 0 {
@@ -805,21 +901,18 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,
 		}}
 	}
 	
+	// Build payload with correct field order (matches struct definition)
+	// Note: history is omitempty, so nil/empty slice won't be serialized
 	payload := kiroPayload{
 		ConversationState: kiroConversationState{
+			ChatTriggerType: "MANUAL", // Required by Kiro API - must be first
 			ConversationID:  uuid.New().String(),
-			History:         history,
 			CurrentMessage:  currentMessage,
-			ChatTriggerType: "MANUAL", // Required by Kiro API
+			History:         history, // Will be omitted if empty due to omitempty tag
 		},
 		ProfileArn: profileArn,
 	}

-	// Ensure history is not nil (empty array)
-	if payload.ConversationState.History == nil {
-		payload.ConversationState.History = []kiroHistoryMessage{}
-	}
-
 	result, err := json.Marshal(payload)
 	if err != nil {
 		log.Debugf("kiro: failed to marshal payload: %v", err)
@@ -830,11 +923,15 @@ func (e *KiroExecutor) buildKiroPayload(claudeBody []byte, modelID, profileArn,

 // buildUserMessageStruct builds a user message and extracts tool results
 // origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE.
+// IMPORTANT: Kiro API does not accept duplicate toolUseIds, so we deduplicate here.
 func (e *KiroExecutor) buildUserMessageStruct(msg gjson.Result, modelID, origin string) (kiroUserInputMessage, []kiroToolResult) {
 	content := msg.Get("content")
 	var contentBuilder strings.Builder
 	var toolResults []kiroToolResult
 	var images []kiroImage
+	
+	// Track seen toolUseIds to deduplicate - Kiro API rejects duplicate toolUseIds
+	seenToolUseIDs := make(map[string]bool)

 	if content.IsArray() {
 		for _, part := range content.Array() {
@@ -864,6 +961,14 @@ func (e *KiroExecutor) buildUserMessageStruct(msg gjson.Result, modelID, origin
 			case "tool_result":
 				// Extract tool result for API
 				toolUseID := part.Get("tool_use_id").String()
+				
+				// Skip duplicate toolUseIds - Kiro API does not accept duplicates
+				if seenToolUseIDs[toolUseID] {
+					log.Debugf("kiro: skipping duplicate tool_result with toolUseId: %s", toolUseID)
+					continue
+				}
+				seenToolUseIDs[toolUseID] = true
+				
 				isError := part.Get("is_error").Bool()
 				resultContent := part.Get("content")
 				
@@ -1001,6 +1106,12 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse,
 			return content.String(), toolUses, usageInfo, fmt.Errorf("failed to read message: %w", err)
 		}

+		// Validate headersLen to prevent slice out of bounds
+		if headersLen+4 > uint32(len(remaining)) {
+			log.Warnf("kiro: invalid headersLen %d exceeds remaining buffer %d", headersLen, len(remaining))
+			continue
+		}
+
 		// Extract event type from headers
 		eventType := e.extractEventType(remaining[:headersLen+4])

@@ -1018,6 +1129,37 @@ func (e *KiroExecutor) parseEventStream(body io.Reader) (string, []kiroToolUse,
 			continue
 		}

+		// DIAGNOSTIC: Log all received event types for debugging
+		log.Debugf("kiro: parseEventStream received event type: %s", eventType)
+		if log.IsLevelEnabled(log.TraceLevel) {
+			log.Tracef("kiro: parseEventStream event payload: %s", string(payload))
+		}
+
+		// Check for error/exception events in the payload (Kiro API may return errors with HTTP 200)
+		// These can appear as top-level fields or nested within the event
+		if errType, hasErrType := event["_type"].(string); hasErrType {
+			// AWS-style error: {"_type": "com.amazon.aws.codewhisperer#ValidationException", "message": "..."}
+			errMsg := ""
+			if msg, ok := event["message"].(string); ok {
+				errMsg = msg
+			}
+			log.Errorf("kiro: received AWS error in event stream: type=%s, message=%s", errType, errMsg)
+			return "", nil, usageInfo, fmt.Errorf("kiro API error: %s - %s", errType, errMsg)
+		}
+		if errType, hasErrType := event["type"].(string); hasErrType && (errType == "error" || errType == "exception") {
+			// Generic error event
+			errMsg := ""
+			if msg, ok := event["message"].(string); ok {
+				errMsg = msg
+			} else if errObj, ok := event["error"].(map[string]interface{}); ok {
+				if msg, ok := errObj["message"].(string); ok {
+					errMsg = msg
+				}
+			}
+			log.Errorf("kiro: received error event in stream: type=%s, message=%s", errType, errMsg)
+			return "", nil, usageInfo, fmt.Errorf("kiro API error: %s", errMsg)
+		}
+
 		// Handle different event types
 		switch eventType {
 		case "assistantResponseEvent":
@@ -1231,8 +1373,9 @@ func (e *KiroExecutor) buildClaudeResponse(content string, toolUses []kiroToolUs
 // streamToChannel converts AWS Event Stream to channel-based streaming.
 // Supports tool calling - emits tool_use content blocks when tools are used.
 // Includes embedded [Called ...] tool call parsing and input buffering for toolUseEvent.
+// Implements duplicate content filtering using lastContentEvent detection (based on AIClient-2-API).
 func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out chan<- cliproxyexecutor.StreamChunk, targetFormat sdktranslator.Format, model string, originalReq, claudeBody []byte, reporter *usageReporter) {
-	reader := bufio.NewReader(body)
+	reader := bufio.NewReaderSize(body, 20*1024*1024) // 20MB buffer to match other providers
 	var totalUsage usage.Detail
 	var hasToolUses bool // Track if any tool uses were emitted

@@ -1240,6 +1383,15 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 	processedIDs := make(map[string]bool)
 	var currentToolUse *toolUseState

+	// Duplicate content detection - tracks last content event to filter duplicates
+	// Based on AIClient-2-API implementation for Kiro
+	var lastContentEvent string
+
+	// Streaming token calculation - accumulate content for real-time token counting
+	// Based on AIClient-2-API implementation
+	var accumulatedContent strings.Builder
+	accumulatedContent.Grow(4096) // Pre-allocate 4KB capacity to reduce reallocations
+
 	// Translator param for maintaining tool call state across streaming events
 	// IMPORTANT: This must persist across all TranslateStream calls
 	var translatorParam any
@@ -1279,6 +1431,51 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 		prelude := make([]byte, 8)
 		_, err := io.ReadFull(reader, prelude)
 		if err == io.EOF {
+			// Flush any incomplete tool use before ending stream
+			if currentToolUse != nil && !processedIDs[currentToolUse.toolUseID] {
+				log.Warnf("kiro: flushing incomplete tool use at EOF: %s (ID: %s)", currentToolUse.name, currentToolUse.toolUseID)
+				fullInput := currentToolUse.inputBuffer.String()
+				repairedJSON := repairJSON(fullInput)
+				var finalInput map[string]interface{}
+				if err := json.Unmarshal([]byte(repairedJSON), &finalInput); err != nil {
+					log.Warnf("kiro: failed to parse incomplete tool input at EOF: %v", err)
+					finalInput = make(map[string]interface{})
+				}
+				
+				processedIDs[currentToolUse.toolUseID] = true
+				contentBlockIndex++
+				
+				// Send tool_use content block
+				blockStart := e.buildClaudeContentBlockStartEvent(contentBlockIndex, "tool_use", currentToolUse.toolUseID, currentToolUse.name)
+				sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStart, &translatorParam)
+				for _, chunk := range sseData {
+					if chunk != "" {
+						out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+					}
+				}
+				
+				// Send tool input as delta
+				inputBytes, _ := json.Marshal(finalInput)
+				inputDelta := e.buildClaudeInputJsonDeltaEvent(string(inputBytes), contentBlockIndex)
+				sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, inputDelta, &translatorParam)
+				for _, chunk := range sseData {
+					if chunk != "" {
+						out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+					}
+				}
+				
+				// Close block
+				blockStop := e.buildClaudeContentBlockStopEvent(contentBlockIndex)
+				sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, blockStop, &translatorParam)
+				for _, chunk := range sseData {
+					if chunk != "" {
+						out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+					}
+				}
+				
+				hasToolUses = true
+				currentToolUse = nil
+			}
 			break
 		}
 		if err != nil {
@@ -1304,6 +1501,12 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 			return
 		}

+		// Validate headersLen to prevent slice out of bounds
+		if headersLen+4 > uint32(len(remaining)) {
+			log.Warnf("kiro: invalid headersLen %d exceeds remaining buffer %d", headersLen, len(remaining))
+			continue
+		}
+
 		eventType := e.extractEventType(remaining[:headersLen+4])

 		payloadStart := 4 + headersLen
@@ -1317,9 +1520,43 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out

 		var event map[string]interface{}
 		if err := json.Unmarshal(payload, &event); err != nil {
+			log.Warnf("kiro: failed to unmarshal event payload: %v, raw: %s", err, string(payload))
 			continue
 		}

+		// DIAGNOSTIC: Log all received event types for debugging
+		log.Debugf("kiro: streamToChannel received event type: %s", eventType)
+		if log.IsLevelEnabled(log.TraceLevel) {
+			log.Tracef("kiro: streamToChannel event payload: %s", string(payload))
+		}
+
+		// Check for error/exception events in the payload (Kiro API may return errors with HTTP 200)
+		// These can appear as top-level fields or nested within the event
+		if errType, hasErrType := event["_type"].(string); hasErrType {
+			// AWS-style error: {"_type": "com.amazon.aws.codewhisperer#ValidationException", "message": "..."}
+			errMsg := ""
+			if msg, ok := event["message"].(string); ok {
+				errMsg = msg
+			}
+			log.Errorf("kiro: received AWS error in stream: type=%s, message=%s", errType, errMsg)
+			out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("kiro API error: %s - %s", errType, errMsg)}
+			return
+		}
+		if errType, hasErrType := event["type"].(string); hasErrType && (errType == "error" || errType == "exception") {
+			// Generic error event
+			errMsg := ""
+			if msg, ok := event["message"].(string); ok {
+				errMsg = msg
+			} else if errObj, ok := event["error"].(map[string]interface{}); ok {
+				if msg, ok := errObj["message"].(string); ok {
+					errMsg = msg
+				}
+			}
+			log.Errorf("kiro: received error event in stream: type=%s, message=%s", errType, errMsg)
+			out <- cliproxyexecutor.StreamChunk{Err: fmt.Errorf("kiro API error: %s", errMsg)}
+			return
+		}
+
 		// Send message_start on first event
 		if !messageStartSent {
 			msgStart := e.buildClaudeMessageStartEvent(model, totalUsage.InputTokens)
@@ -1364,9 +1601,19 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 				}
 			}

-			// Handle text content
+			// Handle text content with duplicate detection
 			if contentDelta != "" {
+				// Check for duplicate content - skip if identical to last content event
+				// Based on AIClient-2-API implementation for Kiro
+				if contentDelta == lastContentEvent {
+					log.Debugf("kiro: skipping duplicate content event (len: %d)", len(contentDelta))
+					continue
+				}
+				lastContentEvent = contentDelta
+
 				outputLen += len(contentDelta)
+				// Accumulate content for streaming token calculation
+				accumulatedContent.WriteString(contentDelta)
 				// Start text content block if needed
 				if !isTextBlockOpen {
 					contentBlockIndex++
@@ -1538,8 +1785,32 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 		}
 	}

-	// Fallback for output tokens if not received from upstream
-	if totalUsage.OutputTokens == 0 && outputLen > 0 {
+	// Streaming token calculation - calculate output tokens from accumulated content
+	// This provides more accurate token counting than simple character division
+	if totalUsage.OutputTokens == 0 && accumulatedContent.Len() > 0 {
+		// Try to use tiktoken for accurate counting
+		if enc, err := tokenizerForModel(model); err == nil {
+			if tokenCount, countErr := enc.Count(accumulatedContent.String()); countErr == nil {
+				totalUsage.OutputTokens = int64(tokenCount)
+				log.Debugf("kiro: streamToChannel calculated output tokens using tiktoken: %d", totalUsage.OutputTokens)
+			} else {
+				// Fallback on count error: estimate from character count
+				totalUsage.OutputTokens = int64(accumulatedContent.Len() / 4)
+				if totalUsage.OutputTokens == 0 {
+					totalUsage.OutputTokens = 1
+				}
+				log.Debugf("kiro: streamToChannel tiktoken count failed, estimated from chars: %d", totalUsage.OutputTokens)
+			}
+		} else {
+			// Fallback: estimate from character count (roughly 4 chars per token)
+			totalUsage.OutputTokens = int64(accumulatedContent.Len() / 4)
+			if totalUsage.OutputTokens == 0 {
+				totalUsage.OutputTokens = 1
+			}
+			log.Debugf("kiro: streamToChannel estimated output tokens from chars: %d (content len: %d)", totalUsage.OutputTokens, accumulatedContent.Len())
+		}
+	} else if totalUsage.OutputTokens == 0 && outputLen > 0 {
+		// Legacy fallback using outputLen
 		totalUsage.OutputTokens = int64(outputLen / 4)
 		if totalUsage.OutputTokens == 0 {
 			totalUsage.OutputTokens = 1
@@ -1553,9 +1824,18 @@ func (e *KiroExecutor) streamToChannel(ctx context.Context, body io.Reader, out
 		stopReason = "tool_use"
 	}

-	// Send message_delta and message_stop
-	msgStop := e.buildClaudeMessageStopEvent(stopReason, totalUsage)
-	sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam)
+	// Send message_delta event
+	msgDelta := e.buildClaudeMessageDeltaEvent(stopReason, totalUsage)
+	sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgDelta, &translatorParam)
+	for _, chunk := range sseData {
+		if chunk != "" {
+			out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
+		}
+	}
+
+	// Send message_stop event separately
+	msgStop := e.buildClaudeMessageStopOnlyEvent()
+	sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam)
 	for _, chunk := range sseData {
 		if chunk != "" {
 			out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunk + "\n\n")}
@@ -1646,8 +1926,8 @@ func (e *KiroExecutor) buildClaudeContentBlockStopEvent(index int) []byte {
 	return []byte("event: content_block_stop\ndata: " + string(result))
 }

-func (e *KiroExecutor) buildClaudeMessageStopEvent(stopReason string, usageInfo usage.Detail) []byte {
-	// First message_delta
+// buildClaudeMessageDeltaEvent creates the message_delta event with stop_reason and usage.
+func (e *KiroExecutor) buildClaudeMessageDeltaEvent(stopReason string, usageInfo usage.Detail) []byte {
 	deltaEvent := map[string]interface{}{
 		"type": "message_delta",
 		"delta": map[string]interface{}{
@@ -1660,14 +1940,16 @@ func (e *KiroExecutor) buildClaudeMessageStopEvent(stopReason string, usageInfo
 		},
 	}
 	deltaResult, _ := json.Marshal(deltaEvent)
+	return []byte("event: message_delta\ndata: " + string(deltaResult))
+}

-	// Then message_stop
+// buildClaudeMessageStopOnlyEvent creates only the message_stop event.
+func (e *KiroExecutor) buildClaudeMessageStopOnlyEvent() []byte {
 	stopEvent := map[string]interface{}{
 		"type": "message_stop",
 	}
 	stopResult, _ := json.Marshal(stopEvent)
-
-	return []byte("event: message_delta\ndata: " + string(deltaResult) + "\n\nevent: message_stop\ndata: " + string(stopResult))
+	return []byte("event: message_stop\ndata: " + string(stopResult))
 }

 // buildClaudeFinalEvent constructs the final Claude-style event.
@@ -1873,6 +2155,12 @@ func (e *KiroExecutor) streamEventStream(ctx context.Context, body io.Reader, c
 			return fmt.Errorf("failed to read message: %w", err)
 		}

+		// Validate headersLen to prevent slice out of bounds
+		if headersLen+4 > uint32(len(remaining)) {
+			log.Warnf("kiro: invalid headersLen %d exceeds remaining buffer %d", headersLen, len(remaining))
+			continue
+		}
+
 		eventType := e.extractEventType(remaining[:headersLen+4])

 		payloadStart := 4 + headersLen
@@ -1886,6 +2174,7 @@ func (e *KiroExecutor) streamEventStream(ctx context.Context, body io.Reader, c

 		var event map[string]interface{}
 		if err := json.Unmarshal(payload, &event); err != nil {
+			log.Warnf("kiro: failed to unmarshal event payload: %v, raw: %s", err, string(payload))
 			continue
 		}

@@ -1983,9 +2272,19 @@ func (e *KiroExecutor) streamEventStream(ctx context.Context, body io.Reader, c
 	}
 	totalUsage.TotalTokens = totalUsage.InputTokens + totalUsage.OutputTokens

-	// Always use end_turn (no tool_use support)
-	msgStop := e.buildClaudeMessageStopEvent("end_turn", totalUsage)
-	sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam)
+	// Send message_delta event
+	msgDelta := e.buildClaudeMessageDeltaEvent("end_turn", totalUsage)
+	sseData := sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgDelta, &translatorParam)
+	for _, chunk := range sseData {
+		if chunk != "" {
+			c.Writer.Write([]byte(chunk + "\n\n"))
+		}
+	}
+	c.Writer.Flush()
+
+	// Send message_stop event separately
+	msgStop := e.buildClaudeMessageStopOnlyEvent()
+	sseData = sdktranslator.TranslateStream(ctx, sdktranslator.FromString("kiro"), targetFormat, model, originalReq, claudeBody, msgStop, &translatorParam)
 	for _, chunk := range sseData {
 		if chunk != "" {
 			c.Writer.Write([]byte(chunk + "\n\n"))
@@ -2057,6 +2356,128 @@ func (e *KiroExecutor) isTokenExpired(accessToken string) bool {
 	return isExpired
 }

+// ============================================================================
+// Message Merging Support - Merge adjacent messages with the same role
+// Based on AIClient-2-API implementation
+// ============================================================================
+
+// mergeAdjacentMessages merges adjacent messages with the same role.
+// This reduces API call complexity and improves compatibility.
+// Based on AIClient-2-API implementation.
+func mergeAdjacentMessages(messages []gjson.Result) []gjson.Result {
+	if len(messages) <= 1 {
+		return messages
+	}
+
+	var merged []gjson.Result
+	for _, msg := range messages {
+		if len(merged) == 0 {
+			merged = append(merged, msg)
+			continue
+		}
+
+		lastMsg := merged[len(merged)-1]
+		currentRole := msg.Get("role").String()
+		lastRole := lastMsg.Get("role").String()
+
+		if currentRole == lastRole {
+			// Merge content from current message into last message
+			mergedContent := mergeMessageContent(lastMsg, msg)
+			// Create a new merged message JSON
+			mergedMsg := createMergedMessage(lastRole, mergedContent)
+			merged[len(merged)-1] = gjson.Parse(mergedMsg)
+		} else {
+			merged = append(merged, msg)
+		}
+	}
+
+	return merged
+}
+
+// mergeMessageContent merges the content of two messages with the same role.
+// Handles both string content and array content (with text, tool_use, tool_result blocks).
+func mergeMessageContent(msg1, msg2 gjson.Result) string {
+	content1 := msg1.Get("content")
+	content2 := msg2.Get("content")
+
+	// Extract content blocks from both messages
+	var blocks1, blocks2 []map[string]interface{}
+
+	if content1.IsArray() {
+		for _, block := range content1.Array() {
+			blocks1 = append(blocks1, blockToMap(block))
+		}
+	} else if content1.Type == gjson.String {
+		blocks1 = append(blocks1, map[string]interface{}{
+			"type": "text",
+			"text": content1.String(),
+		})
+	}
+
+	if content2.IsArray() {
+		for _, block := range content2.Array() {
+			blocks2 = append(blocks2, blockToMap(block))
+		}
+	} else if content2.Type == gjson.String {
+		blocks2 = append(blocks2, map[string]interface{}{
+			"type": "text",
+			"text": content2.String(),
+		})
+	}
+
+	// Merge text blocks if both end/start with text
+	if len(blocks1) > 0 && len(blocks2) > 0 {
+		if blocks1[len(blocks1)-1]["type"] == "text" && blocks2[0]["type"] == "text" {
+			// Merge the last text block of msg1 with the first text block of msg2
+			text1 := blocks1[len(blocks1)-1]["text"].(string)
+			text2 := blocks2[0]["text"].(string)
+			blocks1[len(blocks1)-1]["text"] = text1 + "\n" + text2
+			blocks2 = blocks2[1:] // Remove the merged block from blocks2
+		}
+	}
+
+	// Combine all blocks
+	allBlocks := append(blocks1, blocks2...)
+
+	// Convert to JSON
+	result, _ := json.Marshal(allBlocks)
+	return string(result)
+}
+
+// blockToMap converts a gjson.Result block to a map[string]interface{}
+func blockToMap(block gjson.Result) map[string]interface{} {
+	result := make(map[string]interface{})
+	block.ForEach(func(key, value gjson.Result) bool {
+		if value.IsObject() {
+			result[key.String()] = blockToMap(value)
+		} else if value.IsArray() {
+			var arr []interface{}
+			for _, item := range value.Array() {
+				if item.IsObject() {
+					arr = append(arr, blockToMap(item))
+				} else {
+					arr = append(arr, item.Value())
+				}
+			}
+			result[key.String()] = arr
+		} else {
+			result[key.String()] = value.Value()
+		}
+		return true
+	})
+	return result
+}
+
+// createMergedMessage creates a JSON string for a merged message
+func createMergedMessage(role string, content string) string {
+	msg := map[string]interface{}{
+		"role":    role,
+		"content": json.RawMessage(content),
+	}
+	result, _ := json.Marshal(msg)
+	return string(result)
+}
+
 // ============================================================================
 // Tool Calling Support - Embedded tool call parsing and input buffering
 // Based on amq2api and AIClient-2-API implementations
@@ -2079,8 +2500,6 @@ var (
 	whitespaceCollapsePattern = regexp.MustCompile(`\s+`)
 	// trailingCommaPattern matches trailing commas before closing braces/brackets
 	trailingCommaPattern = regexp.MustCompile(`,\s*([}\]])`)
-	// unquotedKeyPattern matches unquoted JSON keys that need quoting
-	unquotedKeyPattern = regexp.MustCompile(`([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:`)
 )

 // parseEmbeddedToolCalls extracts [Called tool_name with args: {...}] format from text.
@@ -2246,14 +2665,208 @@ func findMatchingBracket(text string, startPos int) int {
 }

 // repairJSON attempts to fix common JSON issues that may occur in tool call arguments.
-// Based on AIClient-2-API's JSON repair implementation.
+// Based on AIClient-2-API's JSON repair implementation with a more conservative strategy.
+//
+// Conservative repair strategy:
+// 1. First try to parse JSON directly - if valid, return as-is
+// 2. Only attempt repair if parsing fails
+// 3. After repair, validate the result - if still invalid, return original
+//
+// Handles incomplete JSON by balancing brackets and removing trailing incomplete content.
 // Uses pre-compiled regex patterns for performance.
-func repairJSON(raw string) string {
+func repairJSON(jsonString string) string {
+	// Handle empty or invalid input
+	if jsonString == "" {
+		return "{}"
+	}
+	
+	str := strings.TrimSpace(jsonString)
+	if str == "" {
+		return "{}"
+	}
+	
+	// CONSERVATIVE STRATEGY: First try to parse directly
+	// If the JSON is already valid, return it unchanged
+	var testParse interface{}
+	if err := json.Unmarshal([]byte(str), &testParse); err == nil {
+		log.Debugf("kiro: repairJSON - JSON is already valid, returning unchanged")
+		return str
+	}
+	
+	log.Debugf("kiro: repairJSON - JSON parse failed, attempting repair")
+	originalStr := str // Keep original for fallback
+	
+	// First, escape unescaped newlines/tabs within JSON string values
+	str = escapeNewlinesInStrings(str)
 	// Remove trailing commas before closing braces/brackets
-	repaired := trailingCommaPattern.ReplaceAllString(raw, "$1")
-	// Fix unquoted keys (basic attempt - handles simple cases)
-	repaired = unquotedKeyPattern.ReplaceAllString(repaired, `$1"$2":`)
-	return repaired
+	str = trailingCommaPattern.ReplaceAllString(str, "$1")
+	
+	// Calculate bracket balance to detect incomplete JSON
+	braceCount := 0    // {} balance
+	bracketCount := 0  // [] balance
+	inString := false
+	escape := false
+	lastValidIndex := -1
+	
+	for i := 0; i < len(str); i++ {
+		char := str[i]
+		
+		// Handle escape sequences
+		if escape {
+			escape = false
+			continue
+		}
+		
+		if char == '\\' {
+			escape = true
+			continue
+		}
+		
+		// Handle string boundaries
+		if char == '"' {
+			inString = !inString
+			continue
+		}
+		
+		// Skip characters inside strings (they don't affect bracket balance)
+		if inString {
+			continue
+		}
+		
+		// Track bracket balance
+		switch char {
+		case '{':
+			braceCount++
+		case '}':
+			braceCount--
+		case '[':
+			bracketCount++
+		case ']':
+			bracketCount--
+		}
+		
+		// Record last valid position (where brackets are balanced or positive)
+		if braceCount >= 0 && bracketCount >= 0 {
+			lastValidIndex = i
+		}
+	}
+	
+	// If brackets are unbalanced, try to repair
+	if braceCount > 0 || bracketCount > 0 {
+		// Truncate to last valid position if we have incomplete content
+		if lastValidIndex > 0 && lastValidIndex < len(str)-1 {
+			// Check if truncation would help (only truncate if there's trailing garbage)
+			truncated := str[:lastValidIndex+1]
+			// Recount brackets after truncation
+			braceCount = 0
+			bracketCount = 0
+			inString = false
+			escape = false
+			for i := 0; i < len(truncated); i++ {
+				char := truncated[i]
+				if escape {
+					escape = false
+					continue
+				}
+				if char == '\\' {
+					escape = true
+					continue
+				}
+				if char == '"' {
+					inString = !inString
+					continue
+				}
+				if inString {
+					continue
+				}
+				switch char {
+				case '{':
+					braceCount++
+				case '}':
+					braceCount--
+				case '[':
+					bracketCount++
+				case ']':
+					bracketCount--
+				}
+			}
+			str = truncated
+		}
+		
+		// Add missing closing brackets
+		for braceCount > 0 {
+			str += "}"
+			braceCount--
+		}
+		for bracketCount > 0 {
+			str += "]"
+			bracketCount--
+		}
+	}
+	
+	// CONSERVATIVE STRATEGY: Validate repaired JSON
+	// If repair didn't produce valid JSON, return original string
+	if err := json.Unmarshal([]byte(str), &testParse); err != nil {
+		log.Warnf("kiro: repairJSON - repair failed to produce valid JSON, returning original")
+		return originalStr
+	}
+	
+	log.Debugf("kiro: repairJSON - successfully repaired JSON")
+	return str
+}
+
+// escapeNewlinesInStrings escapes literal newlines, tabs, and other control characters
+// that appear inside JSON string values. This handles cases where streaming fragments
+// contain unescaped control characters within string content.
+func escapeNewlinesInStrings(raw string) string {
+	var result strings.Builder
+	result.Grow(len(raw) + 100) // Pre-allocate with some extra space
+
+	inString := false
+	escaped := false
+
+	for i := 0; i < len(raw); i++ {
+		c := raw[i]
+
+		if escaped {
+			// Previous character was backslash, this is an escape sequence
+			result.WriteByte(c)
+			escaped = false
+			continue
+		}
+
+		if c == '\\' && inString {
+			// Start of escape sequence
+			result.WriteByte(c)
+			escaped = true
+			continue
+		}
+
+		if c == '"' {
+			// Toggle string state
+			inString = !inString
+			result.WriteByte(c)
+			continue
+		}
+
+		if inString {
+			// Inside a string, escape control characters
+			switch c {
+			case '\n':
+				result.WriteString("\\n")
+			case '\r':
+				result.WriteString("\\r")
+			case '\t':
+				result.WriteString("\\t")
+			default:
+				result.WriteByte(c)
+			}
+		} else {
+			result.WriteByte(c)
+		}
+	}
+
+	return result.String()
 }

 // processToolUseEvent handles a toolUseEvent from the Kiro stream.
@@ -2330,6 +2943,8 @@ func (e *KiroExecutor) processToolUseEvent(event map[string]interface{}, current

 	// Accumulate input fragments
 	if currentToolUse != nil && inputFragment != "" {
+		// Accumulate fragments directly - they form valid JSON when combined
+		// The fragments are already decoded from JSON, so we just concatenate them
 		currentToolUse.inputBuffer.WriteString(inputFragment)
 		log.Debugf("kiro: accumulated input fragment, total length: %d", currentToolUse.inputBuffer.Len())
 	}
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -58,6 +58,10 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
+	translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
+	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
+	}

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -143,6 +147,10 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
+	translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
+	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
+	}

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -12,8 +12,8 @@ import (
 // applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
 // for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
 func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
-	budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata)
-	if !ok {
+	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
+	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
@@ -29,17 +29,60 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
 // applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
 // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
 func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
-	budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata)
-	if !ok {
+	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
+	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
-	if budgetOverride != nil && util.ModelSupportsThinking(model) {
+	if !util.ModelSupportsThinking(model) {
+		return payload
+	}
+	if budgetOverride != nil {
 		norm := util.NormalizeThinkingBudget(model, *budgetOverride)
 		budgetOverride = &norm
 	}
 	return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
 }

+// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata.
+// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking.
+func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte {
+	if len(metadata) == 0 {
+		return payload
+	}
+	if !util.ModelSupportsThinking(model) {
+		return payload
+	}
+	if gjson.GetBytes(payload, "reasoning.effort").Exists() {
+		return payload
+	}
+	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
+		if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil {
+			return updated
+		}
+	}
+	return payload
+}
+
+// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field)
+// when present in metadata. It avoids overwriting an existing reasoning_effort field.
+func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte {
+	if len(metadata) == 0 {
+		return payload
+	}
+	if !util.ModelSupportsThinking(model) {
+		return payload
+	}
+	if gjson.GetBytes(payload, "reasoning_effort").Exists() {
+		return payload
+	}
+	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
+		if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil {
+			return updated
+		}
+	}
+	return payload
+}
+
 // applyPayloadConfig applies payload default and override rules from configuration
 // to the given JSON payload for the specified model.
 // Defaults only fill missing fields, while overrides always overwrite existing values.
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -12,6 +12,7 @@ import (

 	qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -50,6 +51,10 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
+	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+		body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
@@ -121,6 +126,10 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

+	body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
+	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+		body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	}
 	toolsResult := gjson.GetBytes(body, "tools")
 	// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
 	// This will have no real consequences. It's just to scare Qwen3.
--- a/internal/translator/kiro/openai/chat-completions/kiro_openai_response.go
+++ b/internal/translator/kiro/openai/chat-completions/kiro_openai_response.go
@@ -171,7 +171,7 @@ func convertClaudeEventToOpenAI(jsonStr string, model string) []string {
 		return results

 	case "message_delta":
-		// Final message delta with stop_reason
+		// Final message delta with stop_reason and usage
 		stopReason := root.Get("delta.stop_reason").String()
 		if stopReason != "" {
 			finishReason := "stop"
@@ -196,6 +196,19 @@ func convertClaudeEventToOpenAI(jsonStr string, model string) []string {
 					},
 				},
 			}
+
+			// Extract and include usage information from message_delta event
+			usage := root.Get("usage")
+			if usage.Exists() {
+				inputTokens := usage.Get("input_tokens").Int()
+				outputTokens := usage.Get("output_tokens").Int()
+				response["usage"] = map[string]interface{}{
+					"prompt_tokens":     inputTokens,
+					"completion_tokens": outputTokens,
+					"total_tokens":      inputTokens + outputTokens,
+				}
+			}
+
 			result, _ := json.Marshal(response)
 			results = append(results, string(result))
 		}
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -1,8 +1,6 @@
 package util

 import (
-	"encoding/json"
-	"strconv"
 	"strings"

 	"github.com/tidwall/gjson"
@@ -15,80 +13,6 @@ const (
 	GeminiOriginalModelMetadataKey   = "gemini_original_model"
 )

-func ParseGeminiThinkingSuffix(model string) (string, *int, *bool, bool) {
-	if model == "" {
-		return model, nil, nil, false
-	}
-	lower := strings.ToLower(model)
-	if !strings.HasPrefix(lower, "gemini-") {
-		return model, nil, nil, false
-	}
-
-	if strings.HasSuffix(lower, "-nothinking") {
-		base := model[:len(model)-len("-nothinking")]
-		budgetValue := 0
-		if strings.HasPrefix(lower, "gemini-2.5-pro") {
-			budgetValue = 128
-		}
-		include := false
-		return base, &budgetValue, &include, true
-	}
-
-	// Handle "-reasoning" suffix: enables thinking with dynamic budget (-1)
-	// Maps: gemini-2.5-flash-reasoning -> gemini-2.5-flash with thinkingBudget=-1
-	if strings.HasSuffix(lower, "-reasoning") {
-		base := model[:len(model)-len("-reasoning")]
-		budgetValue := -1 // Dynamic budget
-		include := true
-		return base, &budgetValue, &include, true
-	}
-
-	idx := strings.LastIndex(lower, "-thinking-")
-	if idx == -1 {
-		return model, nil, nil, false
-	}
-
-	digits := model[idx+len("-thinking-"):]
-	if digits == "" {
-		return model, nil, nil, false
-	}
-	end := len(digits)
-	for i := 0; i < len(digits); i++ {
-		if digits[i] < '0' || digits[i] > '9' {
-			end = i
-			break
-		}
-	}
-	if end == 0 {
-		return model, nil, nil, false
-	}
-	valueStr := digits[:end]
-	value, err := strconv.Atoi(valueStr)
-	if err != nil {
-		return model, nil, nil, false
-	}
-	base := model[:idx]
-	budgetValue := value
-	return base, &budgetValue, nil, true
-}
-
-func NormalizeGeminiThinkingModel(modelName string) (string, map[string]any) {
-	baseModel, budget, include, matched := ParseGeminiThinkingSuffix(modelName)
-	if !matched {
-		return baseModel, nil
-	}
-	metadata := map[string]any{
-		GeminiOriginalModelMetadataKey: modelName,
-	}
-	if budget != nil {
-		metadata[GeminiThinkingBudgetMetadataKey] = *budget
-	}
-	if include != nil {
-		metadata[GeminiIncludeThoughtsMetadataKey] = *include
-	}
-	return baseModel, metadata
-}
-
 func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
 	if budget == nil && includeThoughts == nil {
 		return body
@@ -133,80 +57,6 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
 	return updated
 }

-func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
-	if len(metadata) == 0 {
-		return nil, nil, false
-	}
-	var (
-		budgetPtr  *int
-		includePtr *bool
-		matched    bool
-	)
-	if rawBudget, ok := metadata[GeminiThinkingBudgetMetadataKey]; ok {
-		switch v := rawBudget.(type) {
-		case int:
-			budget := v
-			budgetPtr = &budget
-			matched = true
-		case int32:
-			budget := int(v)
-			budgetPtr = &budget
-			matched = true
-		case int64:
-			budget := int(v)
-			budgetPtr = &budget
-			matched = true
-		case float64:
-			budget := int(v)
-			budgetPtr = &budget
-			matched = true
-		case json.Number:
-			if val, err := v.Int64(); err == nil {
-				budget := int(val)
-				budgetPtr = &budget
-				matched = true
-			}
-		}
-	}
-	if rawInclude, ok := metadata[GeminiIncludeThoughtsMetadataKey]; ok {
-		switch v := rawInclude.(type) {
-		case bool:
-			include := v
-			includePtr = &include
-			matched = true
-		case string:
-			if parsed, err := strconv.ParseBool(v); err == nil {
-				include := parsed
-				includePtr = &include
-				matched = true
-			}
-		case json.Number:
-			if val, err := v.Int64(); err == nil {
-				include := val != 0
-				includePtr = &include
-				matched = true
-			}
-		case int:
-			include := v != 0
-			includePtr = &include
-			matched = true
-		case int32:
-			include := v != 0
-			includePtr = &include
-			matched = true
-		case int64:
-			include := v != 0
-			includePtr = &include
-			matched = true
-		case float64:
-			include := v != 0
-			includePtr = &include
-			matched = true
-		}
-	}
-	return budgetPtr, includePtr, matched
-}
-
 // modelsWithDefaultThinking lists models that should have thinking enabled by default
 // when no explicit thinkingConfig is provided.
 var modelsWithDefaultThinking = map[string]bool{
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -0,0 +1,327 @@
+package util
+
+import (
+	"encoding/json"
+	"strconv"
+	"strings"
+)
+
+const (
+	ThinkingBudgetMetadataKey          = "thinking_budget"
+	ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts"
+	ReasoningEffortMetadataKey         = "reasoning_effort"
+	ThinkingOriginalModelMetadataKey   = "thinking_original_model"
+)
+
+// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
+// the normalized base model with extracted metadata. Supported patterns:
+//   - "-thinking-<number>" extracts a numeric budget
+//   - "-thinking-<level>" extracts a reasoning effort level (minimal/low/medium/high/xhigh/auto/none)
+//   - "-thinking" maps to a default reasoning effort of "medium"
+//   - "-reasoning" maps to dynamic budget (-1) and include_thoughts=true
+//   - "-nothinking" maps to budget=0 and include_thoughts=false
+func NormalizeThinkingModel(modelName string) (string, map[string]any) {
+	if modelName == "" {
+		return modelName, nil
+	}
+
+	lower := strings.ToLower(modelName)
+	baseModel := modelName
+
+	var (
+		budgetOverride  *int
+		includeThoughts *bool
+		reasoningEffort *string
+		matched         bool
+	)
+
+	switch {
+	case strings.HasSuffix(lower, "-nothinking"):
+		baseModel = modelName[:len(modelName)-len("-nothinking")]
+		budget := 0
+		include := false
+		budgetOverride = &budget
+		includeThoughts = &include
+		matched = true
+	case strings.HasSuffix(lower, "-reasoning"):
+		baseModel = modelName[:len(modelName)-len("-reasoning")]
+		budget := -1
+		include := true
+		budgetOverride = &budget
+		includeThoughts = &include
+		matched = true
+	default:
+		if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 {
+			value := modelName[idx+len("-thinking-"):]
+			if value != "" {
+				if parsed, ok := parseIntPrefix(value); ok {
+					baseModel = modelName[:idx]
+					budgetOverride = &parsed
+					matched = true
+				} else if effort, okEffort := normalizeReasoningEffort(value); okEffort {
+					baseModel = modelName[:idx]
+					reasoningEffort = &effort
+					matched = true
+				}
+			}
+		} else if strings.HasSuffix(lower, "-thinking") {
+			baseModel = modelName[:len(modelName)-len("-thinking")]
+			effort := "medium"
+			reasoningEffort = &effort
+			matched = true
+		}
+	}
+
+	if !matched {
+		return baseModel, nil
+	}
+
+	metadata := map[string]any{
+		ThinkingOriginalModelMetadataKey: modelName,
+	}
+	if budgetOverride != nil {
+		metadata[ThinkingBudgetMetadataKey] = *budgetOverride
+	}
+	if includeThoughts != nil {
+		metadata[ThinkingIncludeThoughtsMetadataKey] = *includeThoughts
+	}
+	if reasoningEffort != nil {
+		metadata[ReasoningEffortMetadataKey] = *reasoningEffort
+	}
+	return baseModel, metadata
+}
+
+// ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel.
+// It accepts both the new generic keys and legacy Gemini-specific keys.
+func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) {
+	if len(metadata) == 0 {
+		return nil, nil, nil, false
+	}
+
+	var (
+		budgetPtr  *int
+		includePtr *bool
+		effortPtr  *string
+		matched    bool
+	)
+
+	readBudget := func(key string) {
+		if budgetPtr != nil {
+			return
+		}
+		if raw, ok := metadata[key]; ok {
+			if v, okNumber := parseNumberToInt(raw); okNumber {
+				budget := v
+				budgetPtr = &budget
+				matched = true
+			}
+		}
+	}
+
+	readInclude := func(key string) {
+		if includePtr != nil {
+			return
+		}
+		if raw, ok := metadata[key]; ok {
+			switch v := raw.(type) {
+			case bool:
+				val := v
+				includePtr = &val
+				matched = true
+			case *bool:
+				if v != nil {
+					val := *v
+					includePtr = &val
+					matched = true
+				}
+			}
+		}
+	}
+
+	readEffort := func(key string) {
+		if effortPtr != nil {
+			return
+		}
+		if raw, ok := metadata[key]; ok {
+			if val, okStr := raw.(string); okStr && strings.TrimSpace(val) != "" {
+				normalized := strings.ToLower(strings.TrimSpace(val))
+				effortPtr = &normalized
+				matched = true
+			}
+		}
+	}
+
+	readBudget(ThinkingBudgetMetadataKey)
+	readBudget(GeminiThinkingBudgetMetadataKey)
+	readInclude(ThinkingIncludeThoughtsMetadataKey)
+	readInclude(GeminiIncludeThoughtsMetadataKey)
+	readEffort(ReasoningEffortMetadataKey)
+	readEffort("reasoning.effort")
+
+	return budgetPtr, includePtr, effortPtr, matched
+}
+
+// ResolveThinkingConfigFromMetadata derives thinking budget/include overrides,
+// converting reasoning effort strings into budgets when possible.
+func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) {
+	budget, include, effort, matched := ThinkingFromMetadata(metadata)
+	if !matched {
+		return nil, nil, false
+	}
+
+	if budget == nil && effort != nil {
+		if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
+			budget = &derived
+		}
+	}
+	return budget, include, budget != nil || include != nil || effort != nil
+}
+
+// ReasoningEffortFromMetadata resolves a reasoning effort string from metadata,
+// inferring "auto" and "none" when budgets request dynamic or disabled thinking.
+func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
+	budget, include, effort, matched := ThinkingFromMetadata(metadata)
+	if !matched {
+		return "", false
+	}
+	if effort != nil && *effort != "" {
+		return *effort, true
+	}
+	if budget != nil {
+		switch *budget {
+		case -1:
+			return "auto", true
+		case 0:
+			return "none", true
+		}
+	}
+	if include != nil && !*include {
+		return "none", true
+	}
+	return "", true
+}
+
+// ThinkingEffortToBudget maps reasoning effort levels to approximate budgets,
+// clamping the result to the model's supported range.
+func ThinkingEffortToBudget(model, effort string) (int, bool) {
+	if effort == "" {
+		return 0, false
+	}
+	switch strings.ToLower(effort) {
+	case "none":
+		return 0, true
+	case "auto":
+		return NormalizeThinkingBudget(model, -1), true
+	case "minimal":
+		return NormalizeThinkingBudget(model, 512), true
+	case "low":
+		return NormalizeThinkingBudget(model, 1024), true
+	case "medium":
+		return NormalizeThinkingBudget(model, 8192), true
+	case "high":
+		return NormalizeThinkingBudget(model, 24576), true
+	case "xhigh":
+		return NormalizeThinkingBudget(model, 32768), true
+	default:
+		return 0, false
+	}
+}
+
+// ResolveOriginalModel returns the original model name stored in metadata (if present),
+// otherwise falls back to the provided model.
+func ResolveOriginalModel(model string, metadata map[string]any) string {
+	normalize := func(name string) string {
+		if name == "" {
+			return ""
+		}
+		if base, _ := NormalizeThinkingModel(name); base != "" {
+			return base
+		}
+		return strings.TrimSpace(name)
+	}
+
+	if metadata != nil {
+		if v, ok := metadata[ThinkingOriginalModelMetadataKey]; ok {
+			if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
+				if base := normalize(s); base != "" {
+					return base
+				}
+			}
+		}
+		if v, ok := metadata[GeminiOriginalModelMetadataKey]; ok {
+			if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
+				if base := normalize(s); base != "" {
+					return base
+				}
+			}
+		}
+	}
+	// Fallback: try to re-normalize the model name when metadata was dropped.
+	if base := normalize(model); base != "" {
+		return base
+	}
+	return model
+}
+
+func parseIntPrefix(value string) (int, bool) {
+	if value == "" {
+		return 0, false
+	}
+	digits := strings.TrimLeft(value, "-")
+	if digits == "" {
+		return 0, false
+	}
+	end := len(digits)
+	for i := 0; i < len(digits); i++ {
+		if digits[i] < '0' || digits[i] > '9' {
+			end = i
+			break
+		}
+	}
+	if end == 0 {
+		return 0, false
+	}
+	val, err := strconv.Atoi(digits[:end])
+	if err != nil {
+		return 0, false
+	}
+	return val, true
+}
+
+func parseNumberToInt(raw any) (int, bool) {
+	switch v := raw.(type) {
+	case int:
+		return v, true
+	case int32:
+		return int(v), true
+	case int64:
+		return int(v), true
+	case float64:
+		return int(v), true
+	case json.Number:
+		if val, err := v.Int64(); err == nil {
+			return int(val), true
+		}
+	case string:
+		if strings.TrimSpace(v) == "" {
+			return 0, false
+		}
+		if parsed, err := strconv.Atoi(strings.TrimSpace(v)); err == nil {
+			return parsed, true
+		}
+	}
+	return 0, false
+}
+
+func normalizeReasoningEffort(value string) (string, bool) {
+	if value == "" {
+		return "", false
+	}
+	effort := strings.ToLower(strings.TrimSpace(value))
+	switch effort {
+	case "minimal", "low", "medium", "high", "xhigh", "auto", "none":
+		return effort, true
+	default:
+		return "", false
+	}
+}
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -343,18 +343,32 @@ func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string

 	providerName, extractedModelName, isDynamic := h.parseDynamicModel(resolvedModelName)

-	// First, normalize the model name to handle suffixes like "-thinking-128"
-	// This needs to happen before determining the provider for non-dynamic models.
-	normalizedModel, metadata = normalizeModelMetadata(resolvedModelName)
+	targetModelName := resolvedModelName
+	if isDynamic {
+		targetModelName = extractedModelName
+	}
+
+	// Normalize the model name to handle dynamic thinking suffixes before determining the provider.
+	normalizedModel, metadata = normalizeModelMetadata(targetModelName)

 	if isDynamic {
 		providers = []string{providerName}
-		// For dynamic models, the extractedModelName is already normalized by parseDynamicModel
-		// so we use it as the final normalizedModel.
-		normalizedModel = extractedModelName
 	} else {
 		// For non-dynamic models, use the normalizedModel to get the provider name.
 		providers = util.GetProviderName(normalizedModel)
+		if len(providers) == 0 && metadata != nil {
+			if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok {
+				if originalModel, okStr := originalRaw.(string); okStr {
+					originalModel = strings.TrimSpace(originalModel)
+					if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) {
+						if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 {
+							providers = altProviders
+							normalizedModel = originalModel
+						}
+					}
+				}
+			}
+		}
 	}

 	if len(providers) == 0 {
@@ -402,7 +416,7 @@ func cloneBytes(src []byte) []byte {
 }

 func normalizeModelMetadata(modelName string) (string, map[string]any) {
-	return util.NormalizeGeminiThinkingModel(modelName)
+	return util.NormalizeThinkingModel(modelName)
 }

 func cloneMetadata(src map[string]any) map[string]any {
Author	SHA1	Message	Date
Luis Pater	69fccf0015	Merge pull request #18 from Ravens2121/master Kiro Executor Stability and API Compatibility Improvements	2025-12-11 08:35:22 +08:00
Ravens2121	6133bac226	feat(kiro): enhance Kiro executor stability and compatibility ## Changes Overview This commit includes multiple improvements to the Kiro executor for better stability, API compatibility, and code quality. ## Detailed Changes ### 1. Output Token Calculation Improvement (lines 317-330) - Replace simple len(content)/4 estimation with tiktoken-based calculation - Add fallback to character count estimation if tiktoken fails - Improves token counting accuracy for usage tracking ### 2. Stream Handler Panic Recovery (lines 528-533) - Add defer/recover block in streamToChannel goroutine - Prevents single request crashes from affecting the entire service ### 3. Struct Field Reordering (lines 670-673) - Reorder kiroToolResult struct fields: Content, Status, ToolUseID - Ensures consistency with API expectations ### 4. Message Merging Function (lines 778-780, 2356-2483) - Add mergeAdjacentMessages() to combine consecutive messages with same role - Add helper functions: mergeMessageContent(), blockToMap(), createMergedMessage() - Required by Kiro API which doesn't allow adjacent messages from same role ### 5. Empty Content Handling (lines 791-800) - Add default content for empty history messages - User messages with tool results: "Tool results provided." - User messages without tool results: "Continue" ### 6. Assistant Last Message Handling (lines 811-830) - Detect when last message is from assistant - Create synthetic "Continue" user message to satisfy Kiro API requirements - Kiro API requires currentMessage to be userInputMessage type ### 7. Duplicate Content Event Detection (lines 1650-1660) - Track lastContentEvent to detect duplicate streaming events - Skip redundant events to prevent duplicate content in responses - Based on AIClient-2-API implementation for Kiro ### 8. Streaming Token Calculation Enhancement (lines 1785-1817) - Add accumulatedContent buffer for streaming token calculation - Use tiktoken for accurate output token counting during streaming - Add fallback to character count estimation with proper logging ### 9. JSON Repair Enhancement (lines 2665-2818) - Implement conservative JSON repair strategy - First try to parse JSON directly - if valid, return unchanged - Add bracket balancing detection and repair - Only repair when necessary to avoid corrupting valid JSON - Validate repaired JSON before returning ### 10. HELIOS_CHK Filtering Removal (lines 2500-2504, 3004-3039) - Remove filterHeliosDebugInfo function - Remove heliosDebugPattern regex - HELIOS_CHK fields now handled by client-side processing ### 11. Comment Translation - Translate Chinese comments to English for code consistency - Affected areas: token calculation, buffer handling, message processing	2025-12-11 08:13:33 +08:00
Ravens2121	f302be5ce6	Merge branch 'router-for-me:main' into master	2025-12-11 05:40:10 +08:00
Ravens2121	cd4e84a360	feat(kiro): enhance request format, stream handling, and usage tracking ## English Description ### Request Format Fixes - Fix conversationState field order (chatTriggerType must be first) - Add conditional profileArn inclusion based on auth method - builder-id auth (AWS SSO) doesn't require profileArn - social auth (Google OAuth) requires profileArn ### Stream Processing Enhancements - Add headersLen boundary validation to prevent slice out of bounds - Handle incomplete tool use at EOF by flushing pending data - Separate message_delta and message_stop events for proper streaming - Add error logging for JSON unmarshal failures ### JSON Repair Improvements - Add escapeNewlinesInStrings() to handle control characters in JSON strings - Remove incorrect unquotedKeyPattern that broke valid JSON content - Fix handling of streaming fragments with embedded newlines/tabs ### Debug Info Filtering (Optional) - Add filterHeliosDebugInfo() to remove [HELIOS_CHK] blocks - Pattern matches internal state tracking from Kiro/Amazon Q - Currently disabled pending further testing ### Usage Tracking - Add usage information extraction in message_delta response - Include prompt_tokens, completion_tokens, total_tokens in OpenAI format --- ## 中文描述 ### 请求格式修复 - 修复 conversationState 字段顺序（chatTriggerType 必须在第一位） - 根据认证方式条件性包含 profileArn - builder-id 认证（AWS SSO）不需要 profileArn - social 认证（Google OAuth）需要 profileArn ### 流处理增强 - 添加 headersLen 边界验证，防止切片越界 - 在 EOF 时处理未完成的工具调用，刷新待处理数据 - 分离 message_delta 和 message_stop 事件以实现正确的流式传输 - 添加 JSON 反序列化失败的错误日志 ### JSON 修复改进 - 添加 escapeNewlinesInStrings() 处理 JSON 字符串中的控制字符 - 移除错误的 unquotedKeyPattern，该模式会破坏有效的 JSON 内容 - 修复包含嵌入换行符/制表符的流式片段处理 ### 调试信息过滤（可选） - 添加 filterHeliosDebugInfo() 移除 [HELIOS_CHK] 块 - 模式匹配来自 Kiro/Amazon Q 的内部状态跟踪信息 - 目前已禁用，等待进一步测试 ### 使用量跟踪 - 在 message_delta 响应中添加 usage 信息提取 - 以 OpenAI 格式包含 prompt_tokens、completion_tokens、total_tokens	2025-12-11 05:37:22 +08:00
Luis Pater	4360ed8a7b	Merge branch 'router-for-me:main' into main	2025-12-11 03:17:55 +08:00
Luis Pater	423ce97665	feat(util): implement dynamic thinking suffix normalization and refactor budget resolution logic - Added support for parsing and normalizing dynamic thinking model suffixes. - Centralized budget resolution across executors and payload helpers. - Retired legacy Gemini-specific thinking handlers in favor of unified logic. - Updated executors to use metadata-based thinking configuration. - Added `ResolveOriginalModel` utility for resolving normalized upstream models using request metadata. - Updated executors (Gemini, Codex, iFlow, OpenAI, Qwen) to incorporate upstream model resolution and substitute model values in payloads and request URLs. - Ensured fallbacks handle cases with missing or malformed metadata to derive models robustly. - Refactored upstream model resolution to dynamically incorporate metadata for selecting and normalizing models. - Improved handling of thinking configurations and model overrides in executors. - Removed hardcoded thinking model entries and migrated logic to metadata-based resolution. - Updated payload mutations to always include the resolved model.	2025-12-11 03:10:50 +08:00