Merge pull request #19 from router-for-me/plus

v6.6.1
Merge branch 'main' into plus
2026-03-10 15:53:16 +00:00 · 2025-12-11 21:35:16 +08:00 · 2025-12-11 21:33:49 +08:00 · 2025-12-11 21:20:54 +08:00 · 2025-12-11 21:18:19 +08:00 · 2025-12-11 20:12:06 +08:00
46 changed files with 1863 additions and 917 deletions
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -105,7 +105,7 @@ ws-auth: false
 #     excluded-models:
 #       - "claude-opus-4-5-20251101" # exclude specific models (exact match)
 #       - "claude-3-*"               # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
-#       - "*-think"                  # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
+#       - "*-thinking"               # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
 #       - "*haiku*"                  # wildcard matching substring (e.g. claude-3-5-haiku-20241022)

 # Kiro (AWS CodeWhisperer) configuration
--- a/internal/api/modules/amp/fallback_handlers.go
+++ b/internal/api/modules/amp/fallback_handlers.go
@@ -133,8 +133,8 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc
 			return
 		}

-		// Normalize model (handles Gemini thinking suffixes)
-		normalizedModel, _ := util.NormalizeGeminiThinkingModel(modelName)
+		// Normalize model (handles dynamic thinking suffixes)
+		normalizedModel, _ := util.NormalizeThinkingModel(modelName)

 		// Track resolved model for logging (may change if mapping is applied)
 		resolvedModel := normalizedModel
--- a/internal/api/modules/amp/routes.go
+++ b/internal/api/modules/amp/routes.go
@@ -156,6 +156,7 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
 	rootMiddleware := []gin.HandlerFunc{m.managementAvailabilityMiddleware(), noCORSMiddleware(), m.localhostOnlyMiddleware()}
 	engine.GET("/threads/*path", append(rootMiddleware, proxyHandler)...)
 	engine.GET("/threads.rss", append(rootMiddleware, proxyHandler)...)
+	engine.GET("/news.rss", append(rootMiddleware, proxyHandler)...)

 	// Root-level auth routes for CLI login flow
 	// Amp uses multiple auth routes: /auth/cli-login, /auth/callback, /auth/sign-in, /auth/logout
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -16,6 +16,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.5 Haiku",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			// Thinking: not supported for Haiku models
 		},
 		{
 			ID:                  "claude-sonnet-4-5-20250929",
@@ -26,60 +27,6 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.5 Sonnet",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
-		},
-		{
-			ID:                  "claude-sonnet-4-5-thinking",
-			Object:              "model",
-			Created:             1759104000, // 2025-09-29
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Sonnet Thinking",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-opus-4-5-thinking",
-			Object:              "model",
-			Created:             1761955200, // 2025-11-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Opus Thinking",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-opus-4-5-thinking-low",
-			Object:              "model",
-			Created:             1761955200, // 2025-11-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Opus Thinking Low",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-opus-4-5-thinking-medium",
-			Object:              "model",
-			Created:             1761955200, // 2025-11-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Opus Thinking Medium",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
-		},
-		{
-			ID:                  "claude-opus-4-5-thinking-high",
-			Object:              "model",
-			Created:             1761955200, // 2025-11-01
-			OwnedBy:             "anthropic",
-			Type:                "claude",
-			DisplayName:         "Claude 4.5 Opus Thinking High",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
 			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
@@ -92,6 +39,7 @@ func GetClaudeModels() []*ModelInfo {
 			Description:         "Premium model combining maximum intelligence with practical performance",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-opus-4-1-20250805",
@@ -102,6 +50,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.1 Opus",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-opus-4-20250514",
@@ -112,6 +61,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4 Opus",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-sonnet-4-20250514",
@@ -122,6 +72,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4 Sonnet",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-3-7-sonnet-20250219",
@@ -132,6 +83,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 3.7 Sonnet",
 			ContextLength:       128000,
 			MaxCompletionTokens: 8192,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-3-5-haiku-20241022",
@@ -142,6 +94,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 3.5 Haiku",
 			ContextLength:       128000,
 			MaxCompletionTokens: 8192,
+			// Thinking: not supported for Haiku models
 		},
 	}
 }
@@ -529,58 +482,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-minimal",
-			Object:              "model",
-			Created:             1754524800,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-08-07",
-			DisplayName:         "GPT 5 Minimal",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-low",
-			Object:              "model",
-			Created:             1754524800,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-08-07",
-			DisplayName:         "GPT 5 Low",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-medium",
-			Object:              "model",
-			Created:             1754524800,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-08-07",
-			DisplayName:         "GPT 5 Medium",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-high",
-			Object:              "model",
-			Created:             1754524800,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-08-07",
-			DisplayName:         "GPT 5 High",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5-codex",
@@ -594,45 +496,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-codex-low",
-			Object:              "model",
-			Created:             1757894400,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-09-15",
-			DisplayName:         "GPT 5 Codex Low",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-codex-medium",
-			Object:              "model",
-			Created:             1757894400,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-09-15",
-			DisplayName:         "GPT 5 Codex Medium",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-codex-high",
-			Object:              "model",
-			Created:             1757894400,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-09-15",
-			DisplayName:         "GPT 5 Codex High",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5-codex-mini",
@@ -646,32 +510,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-codex-mini-medium",
-			Object:              "model",
-			Created:             1762473600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-11-07",
-			DisplayName:         "GPT 5 Codex Mini Medium",
-			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5-codex-mini-high",
-			Object:              "model",
-			Created:             1762473600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5-2025-11-07",
-			DisplayName:         "GPT 5 Codex Mini High",
-			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1",
@@ -685,58 +524,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-none",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Low",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-low",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Low",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-medium",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Medium",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-high",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 High",
-			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex",
@@ -745,50 +533,12 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-low",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex Low",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-medium",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex Medium",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-high",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex High",
-			Description:         "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.",
+			DisplayName:         "GPT 5.1 Codex",
+			Description:         "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex-mini",
@@ -797,39 +547,13 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex Mini",
-			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
+			DisplayName:         "GPT 5.1 Codex Mini",
+			Description:         "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
-		{
-			ID:                  "gpt-5.1-codex-mini-medium",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex Mini Medium",
-			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-mini-high",
-			Object:              "model",
-			Created:             1762905600,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Codex Mini High",
-			Description:         "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-
 		{
 			ID:                  "gpt-5.1-codex-max",
 			Object:              "model",
@@ -837,63 +561,12 @@ func GetOpenAIModels() []*ModelInfo {
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5 Codex Max",
-			Description:         "Stable version of GPT 5 Codex Max",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-max-low",
-			Object:              "model",
-			Created:             1763424000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5 Codex Max Low",
-			Description:         "Stable version of GPT 5 Codex Max Low",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-max-medium",
-			Object:              "model",
-			Created:             1763424000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5 Codex Max Medium",
-			Description:         "Stable version of GPT 5 Codex Max Medium",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-max-high",
-			Object:              "model",
-			Created:             1763424000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5 Codex Max High",
-			Description:         "Stable version of GPT 5 Codex Max High",
-			ContextLength:       400000,
-			MaxCompletionTokens: 128000,
-			SupportedParameters: []string{"tools"},
-		},
-		{
-			ID:                  "gpt-5.1-codex-max-xhigh",
-			Object:              "model",
-			Created:             1763424000,
-			OwnedBy:             "openai",
-			Type:                "openai",
-			Version:             "gpt-5.1-max",
-			DisplayName:         "GPT 5 Codex Max XHigh",
-			Description:         "Stable version of GPT 5 Codex Max XHigh",
+			DisplayName:         "GPT 5.1 Codex Max",
+			Description:         "Stable version of GPT 5.1 Codex Max",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
 		},
 	}
 }
@@ -950,6 +623,7 @@ func GetIFlowModels() []*ModelInfo {
 		DisplayName string
 		Description string
 		Created     int64
+		Thinking    *ThinkingSupport
 	}{
 		{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
 		{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
@@ -959,17 +633,17 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
 		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
 		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
-		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 general model", Created: 1762387200},
+		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
 		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
 		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
 		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
-		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
+		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
 		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
 		{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
-		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
+		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
 		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
 		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
-		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
+		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
 	for _, entry := range entries {
@@ -981,6 +655,7 @@ func GetIFlowModels() []*ModelInfo {
 			Type:        "iflow",
 			DisplayName: entry.DisplayName,
 			Description: entry.Description,
+			Thinking:    entry.Thinking,
 		})
 	}
 	return models
@@ -991,17 +666,20 @@ func GetIFlowModels() []*ModelInfo {
 type AntigravityModelConfig struct {
 	Thinking            *ThinkingSupport
 	MaxCompletionTokens int
+	Name                string
 }

 // GetAntigravityModelConfig returns static configuration for antigravity models.
 // Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup.
 func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 	return map[string]*AntigravityModelConfig{
-		"gemini-2.5-flash":                  {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
-		"gemini-2.5-flash-lite":             {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
-		"gemini-3-pro-preview":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
-		"gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
-		"gemini-claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"gemini-2.5-flash":                        {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
+		"gemini-2.5-flash-lite":                   {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
+		"gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"},
+		"gemini-3-pro-preview":                    {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-preview"},
+		"gemini-3-pro-image-preview":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-image-preview"},
+		"gemini-claude-sonnet-4-5-thinking":       {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"gemini-claude-opus-4-5-thinking":         {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 	}
 }

@@ -1192,17 +870,6 @@ func GetGitHubCopilotModels() []*ModelInfo {
 // GetKiroModels returns the Kiro (AWS CodeWhisperer) model definitions
 func GetKiroModels() []*ModelInfo {
 	return []*ModelInfo{
-		{
-			ID:                  "kiro-auto",
-			Object:              "model",
-			Created:             1732752000, // 2024-11-28
-			OwnedBy:             "aws",
-			Type:                "kiro",
-			DisplayName:         "Kiro Auto",
-			Description:         "Automatic model selection by AWS CodeWhisperer",
-			ContextLength:       200000,
-			MaxCompletionTokens: 64000,
-		},
 		{
 			ID:                  "kiro-claude-opus-4.5",
 			Object:              "model",
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -63,6 +63,9 @@ type ThinkingSupport struct {
 	ZeroAllowed bool `json:"zero_allowed,omitempty"`
 	// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
 	DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
+	// Levels defines discrete reasoning effort levels (e.g., "low", "medium", "high").
+	// When set, the model uses level-based reasoning instead of token budgets.
+	Levels []string `json:"levels,omitempty"`
 }

 // ModelRegistration tracks a model's availability
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -309,11 +309,9 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	to := sdktranslator.FromString("gemini")
 	payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
 	payload = applyThinkingMetadata(payload, req.Metadata, req.Model)
+	payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
 	payload = util.ConvertThinkingLevelToBudget(payload)
-	if budget := gjson.GetBytes(payload, "generationConfig.thinkingConfig.thinkingBudget"); budget.Exists() {
-		normalized := util.NormalizeThinkingBudget(req.Model, int(budget.Int()))
-		payload, _ = sjson.SetBytes(payload, "generationConfig.thinkingConfig.thinkingBudget", normalized)
-	}
+	payload = util.NormalizeGeminiThinkingBudget(req.Model, payload)
 	payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
 	payload = fixGeminiImageAspectRatio(req.Model, payload)
 	payload = applyPayloadConfig(e.cfg, req.Model, payload)
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -28,18 +28,18 @@ import (
 )

 const (
-	antigravityBaseURLDaily        = "https://daily-cloudcode-pa.sandbox.googleapis.com"
-	antigravityBaseURLAutopush     = "https://autopush-cloudcode-pa.sandbox.googleapis.com"
-	antigravityBaseURLProd         = "https://cloudcode-pa.googleapis.com"
-	antigravityStreamPath          = "/v1internal:streamGenerateContent"
-	antigravityGeneratePath        = "/v1internal:generateContent"
-	antigravityModelsPath          = "/v1internal:fetchAvailableModels"
-	antigravityClientID            = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
-	antigravityClientSecret        = "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"
-	defaultAntigravityAgent        = "antigravity/1.11.5 windows/amd64"
-	antigravityAuthType            = "antigravity"
-	refreshSkew                    = 3000 * time.Second
-	streamScannerBuffer        int = 20_971_520
+	antigravityBaseURLDaily = "https://daily-cloudcode-pa.sandbox.googleapis.com"
+	// antigravityBaseURLAutopush     = "https://autopush-cloudcode-pa.sandbox.googleapis.com"
+	antigravityBaseURLProd      = "https://cloudcode-pa.googleapis.com"
+	antigravityStreamPath       = "/v1internal:streamGenerateContent"
+	antigravityGeneratePath     = "/v1internal:generateContent"
+	antigravityModelsPath       = "/v1internal:fetchAvailableModels"
+	antigravityClientID         = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
+	antigravityClientSecret     = "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"
+	defaultAntigravityAgent     = "antigravity/1.11.5 windows/amd64"
+	antigravityAuthType         = "antigravity"
+	refreshSkew                 = 3000 * time.Second
+	streamScannerBuffer     int = 52_428_800 // 50MB
 )

 var (
@@ -81,6 +81,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)

 	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
@@ -175,6 +176,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

 	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
@@ -377,9 +379,14 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 		for originalName := range result.Map() {
 			aliasName := modelName2Alias(originalName)
 			if aliasName != "" {
+				cfg := modelConfig[aliasName]
+				modelName := aliasName
+				if cfg != nil && cfg.Name != "" {
+					modelName = cfg.Name
+				}
 				modelInfo := &registry.ModelInfo{
 					ID:          aliasName,
-					Name:        aliasName,
+					Name:        modelName,
 					Description: aliasName,
 					DisplayName: aliasName,
 					Version:     aliasName,
@@ -389,7 +396,7 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 					Type:        antigravityAuthType,
 				}
 				// Look up Thinking support from static config using alias name
-				if cfg, ok := modelConfig[aliasName]; ok {
+				if cfg != nil {
 					if cfg.Thinking != nil {
 						modelInfo.Thinking = cfg.Thinking
 					}
@@ -658,7 +665,7 @@ func buildBaseURL(auth *cliproxyauth.Auth) string {
 	if baseURLs := antigravityBaseURLFallbackOrder(auth); len(baseURLs) > 0 {
 		return baseURLs[0]
 	}
-	return antigravityBaseURLAutopush
+	return antigravityBaseURLDaily
 }

 func resolveHost(base string) string {
@@ -694,7 +701,7 @@ func antigravityBaseURLFallbackOrder(auth *cliproxyauth.Auth) []string {
 	}
 	return []string{
 		antigravityBaseURLDaily,
-		antigravityBaseURLAutopush,
+		// antigravityBaseURLAutopush,
 		antigravityBaseURLProd,
 	}
 }
@@ -842,9 +849,12 @@ func normalizeAntigravityThinking(model string, payload []byte) []byte {
 		effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload)
 		if effectiveMax > 0 && normalized >= effectiveMax {
 			normalized = effectiveMax - 1
-			if normalized < 1 {
-				normalized = 1
-			}
+		}
+		minBudget := antigravityMinThinkingBudget(model)
+		if minBudget > 0 && normalized >= 0 && normalized < minBudget {
+			// Budget is below minimum, remove thinking config entirely
+			payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig")
+			return payload
 		}
 		if setDefaultMax {
 			if res, errSet := sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax); errSet == nil {
@@ -872,3 +882,12 @@ func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromM
 	}
 	return 0, false
 }
+
+// antigravityMinThinkingBudget returns the minimum thinking budget for a model.
+// Falls back to -1 if no model info is found.
+func antigravityMinThinkingBudget(model string) int {
+	if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.Thinking != nil {
+		return modelInfo.Thinking.Min
+	}
+	return -1
+}
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -54,15 +54,22 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
-	modelForUpstream := req.Model
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
-		body, _ = sjson.SetBytes(body, "model", modelOverride)
-		modelForUpstream = modelOverride
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel == "" {
+		upstreamModel = req.Model
 	}
-	// Inject thinking config based on model suffix for thinking variants
-	body = e.injectThinkingConfig(req.Model, body)
+	if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
+		upstreamModel = modelOverride
+	} else if !strings.EqualFold(upstreamModel, req.Model) {
+		if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
+			upstreamModel = modelOverride
+		}
+	}
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	// Inject thinking config based on model metadata for thinking variants
+	body = e.injectThinkingConfig(req.Model, req.Metadata, body)

-	if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
+	if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
 		body = checkSystemInstructions(body)
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
@@ -161,11 +168,20 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("claude")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
-		body, _ = sjson.SetBytes(body, "model", modelOverride)
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel == "" {
+		upstreamModel = req.Model
 	}
-	// Inject thinking config based on model suffix for thinking variants
-	body = e.injectThinkingConfig(req.Model, body)
+	if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
+		upstreamModel = modelOverride
+	} else if !strings.EqualFold(upstreamModel, req.Model) {
+		if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
+			upstreamModel = modelOverride
+		}
+	}
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	// Inject thinking config based on model metadata for thinking variants
+	body = e.injectThinkingConfig(req.Model, req.Metadata, body)
 	body = checkSystemInstructions(body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)

@@ -238,7 +254,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 		// If from == to (Claude → Claude), directly forward the SSE stream without translation
 		if from == to {
 			scanner := bufio.NewScanner(decodedBody)
-			scanner.Buffer(nil, 20_971_520)
+			scanner.Buffer(nil, 52_428_800) // 50MB
 			for scanner.Scan() {
 				line := scanner.Bytes()
 				appendAPIResponseChunk(ctx, e.cfg, line)
@@ -261,7 +277,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A

 		// For other formats, use translation
 		scanner := bufio.NewScanner(decodedBody)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
@@ -295,13 +311,20 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	// Use streaming translation to preserve function calling, except for claude.
 	stream := from != to
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
-	modelForUpstream := req.Model
-	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
-		body, _ = sjson.SetBytes(body, "model", modelOverride)
-		modelForUpstream = modelOverride
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel == "" {
+		upstreamModel = req.Model
 	}
+	if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" {
+		upstreamModel = modelOverride
+	} else if !strings.EqualFold(upstreamModel, req.Model) {
+		if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
+			upstreamModel = modelOverride
+		}
+	}
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

-	if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
+	if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") {
 		body = checkSystemInstructions(body)
 	}

@@ -427,31 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
 	return betas, body
 }

-// injectThinkingConfig adds thinking configuration based on model name suffix
-func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []byte {
-	// Only inject if thinking config is not already present
-	if gjson.GetBytes(body, "thinking").Exists() {
+// injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
+// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
+// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
+func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
+	budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
+	if !ok {
 		return body
 	}
-
-	var budgetTokens int
-	switch {
-	case strings.HasSuffix(modelName, "-thinking-low"):
-		budgetTokens = 1024
-	case strings.HasSuffix(modelName, "-thinking-medium"):
-		budgetTokens = 8192
-	case strings.HasSuffix(modelName, "-thinking-high"):
-		budgetTokens = 24576
-	case strings.HasSuffix(modelName, "-thinking"):
-		// Default thinking without suffix uses medium budget
-		budgetTokens = 8192
-	default:
-		return body
-	}
-
-	body, _ = sjson.SetBytes(body, "thinking.type", "enabled")
-	body, _ = sjson.SetBytes(body, "thinking.budget_tokens", budgetTokens)
-	return body
+	return util.ApplyClaudeThinkingConfig(body, budget)
 }

 // ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
@@ -491,35 +498,45 @@ func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
 }

 func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
-	if alias == "" {
+	trimmed := strings.TrimSpace(alias)
+	if trimmed == "" {
 		return ""
 	}
-	// Hardcoded mappings for thinking models to actual Claude model names
-	switch alias {
-	case "claude-opus-4-5-thinking", "claude-opus-4-5-thinking-low", "claude-opus-4-5-thinking-medium", "claude-opus-4-5-thinking-high":
-		return "claude-opus-4-5-20251101"
-	case "claude-sonnet-4-5-thinking":
-		return "claude-sonnet-4-5-20250929"
-	}
+
 	entry := e.resolveClaudeConfig(auth)
 	if entry == nil {
 		return ""
 	}
+
+	normalizedModel, metadata := util.NormalizeThinkingModel(trimmed)
+
+	// Candidate names to match against configured aliases/names.
+	candidates := []string{strings.TrimSpace(normalizedModel)}
+	if !strings.EqualFold(normalizedModel, trimmed) {
+		candidates = append(candidates, trimmed)
+	}
+	if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) {
+		candidates = append(candidates, original)
+	}
+
 	for i := range entry.Models {
 		model := entry.Models[i]
 		name := strings.TrimSpace(model.Name)
 		modelAlias := strings.TrimSpace(model.Alias)
-		if modelAlias != "" {
-			if strings.EqualFold(modelAlias, alias) {
+
+		for _, candidate := range candidates {
+			if candidate == "" {
+				continue
+			}
+			if modelAlias != "" && strings.EqualFold(modelAlias, candidate) {
 				if name != "" {
 					return name
 				}
-				return alias
+				return candidate
+			}
+			if name != "" && strings.EqualFold(name, candidate) {
+				return name
 			}
-			continue
-		}
-		if name != "" && strings.EqualFold(name, alias) {
-			return name
 		}
 	}
 	return ""
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -49,14 +49,18 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-
-	body = e.setReasoningEffortByAlias(req.Model, body)
-
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return resp, errValidate
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
-
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")

@@ -142,13 +146,20 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

-	body = e.setReasoningEffortByAlias(req.Model, body)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return nil, errValidate
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -205,7 +216,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
@@ -235,14 +246,16 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 }

 func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)

 	modelForCounting := req.Model

-	body = e.setReasoningEffortByAlias(req.Model, body)
-
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "stream", false)

@@ -261,83 +274,6 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
 }

-func (e *CodexExecutor) setReasoningEffortByAlias(modelName string, payload []byte) []byte {
-	if util.InArray([]string{"gpt-5", "gpt-5-minimal", "gpt-5-low", "gpt-5-medium", "gpt-5-high"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5")
-		switch modelName {
-		case "gpt-5-minimal":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "minimal")
-		case "gpt-5-low":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
-		case "gpt-5-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5-codex", "gpt-5-codex-low", "gpt-5-codex-medium", "gpt-5-codex-high"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex")
-		switch modelName {
-		case "gpt-5-codex-low":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
-		case "gpt-5-codex-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5-codex-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex-mini")
-		switch modelName {
-		case "gpt-5-codex-mini-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5-codex-mini-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5.1", "gpt-5.1-none", "gpt-5.1-low", "gpt-5.1-medium", "gpt-5.1-high"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1")
-		switch modelName {
-		case "gpt-5.1-none":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "none")
-		case "gpt-5.1-low":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
-		case "gpt-5.1-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5.1-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5.1-codex", "gpt-5.1-codex-low", "gpt-5.1-codex-medium", "gpt-5.1-codex-high"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex")
-		switch modelName {
-		case "gpt-5.1-codex-low":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
-		case "gpt-5.1-codex-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5.1-codex-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5.1-codex-mini", "gpt-5.1-codex-mini-medium", "gpt-5.1-codex-mini-high"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-mini")
-		switch modelName {
-		case "gpt-5.1-codex-mini-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5.1-codex-mini-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5.1-codex-max", "gpt-5.1-codex-max-low", "gpt-5.1-codex-max-medium", "gpt-5.1-codex-max-high", "gpt-5.1-codex-max-xhigh"}, modelName) {
-		payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-max")
-		switch modelName {
-		case "gpt-5.1-codex-max-low":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
-		case "gpt-5.1-codex-max-medium":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
-		case "gpt-5.1-codex-max-high":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
-		case "gpt-5.1-codex-max-xhigh":
-			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "xhigh")
-		}
-	}
-	return payload
-}
-
 func tokenizerForCodexModel(model string) (tokenizer.Codec, error) {
 	sanitized := strings.ToLower(strings.TrimSpace(model))
 	switch {
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -64,6 +64,8 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	to := sdktranslator.FromString("gemini-cli")
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
+	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
+	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
 	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
 	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
@@ -199,6 +201,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	to := sdktranslator.FromString("gemini-cli")
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
+	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
+	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
 	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
 	basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload)
@@ -305,7 +309,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 			}()
 			if opts.Alt == "" {
 				scanner := bufio.NewScanner(resp.Body)
-				scanner.Buffer(nil, 20_971_520)
+				scanner.Buffer(nil, 52_428_800) // 50MB
 				var param any
 				for scanner.Scan() {
 					line := scanner.Bytes()
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -75,14 +75,19 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	// Official Gemini API via API key or OAuth bearer
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = applyThinkingMetadata(body, req.Metadata, req.Model)
+	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
+	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	action := "generateContent"
 	if req.Metadata != nil {
@@ -91,7 +96,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		}
 	}
 	baseURL := resolveGeminiBaseURL(auth)
-	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, action)
+	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -165,16 +170,21 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	body = applyThinkingMetadata(body, req.Metadata, req.Model)
+	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
+	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	baseURL := resolveGeminiBaseURL(auth)
-	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, "streamGenerateContent")
+	url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, "streamGenerateContent")
 	if opts.Alt == "" {
 		url = url + "?alt=sse"
 	} else {
@@ -239,7 +249,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -105,10 +105,12 @@ func (e *GeminiVertexExecutor) CountTokens(ctx context.Context, auth *cliproxyau

 // countTokensWithServiceAccount handles token counting using service account credentials.
 func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
@@ -117,13 +119,14 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context
 	}
 	translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
 	translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
+	translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings")

 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens")
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "countTokens")

 	httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq))
 	if errNewReq != nil {
@@ -191,10 +194,12 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context

 // countTokensWithAPIKey handles token counting using API key credentials.
 func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
@@ -203,6 +208,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *
 	}
 	translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
 	translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
+	translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel)
 	respCtx := context.WithValue(ctx, "alt", opts.Alt)
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
 	translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig")
@@ -286,19 +292,24 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
 		}
 		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
 	}
+	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
+	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	action := "generateContent"
 	if req.Metadata != nil {
@@ -307,7 +318,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au
 		}
 	}
 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action)
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -381,19 +392,24 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
 		}
 		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
 	}
+	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
+	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	action := "generateContent"
 	if req.Metadata != nil {
@@ -406,7 +422,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, action)
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, action)
 	if opts.Alt != "" && action != "countTokens" {
 		url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 	}
@@ -477,22 +493,27 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
 		}
 		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
 	}
+	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
+	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	baseURL := vertexBaseURL(location)
-	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent")
+	url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "streamGenerateContent")
 	if opts.Alt == "" {
 		url = url + "?alt=sse"
 	} else {
@@ -558,7 +579,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
@@ -589,25 +610,30 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 	reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth)
 	defer reporter.trackFailure(ctx, &err)

+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
+	if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
 		if budgetOverride != nil {
 			norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
 			budgetOverride = &norm
 		}
 		body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
 	}
+	body = util.ApplyDefaultThinkingIfNeeded(req.Model, body)
+	body = util.NormalizeGeminiThinkingBudget(req.Model, body)
 	body = util.StripThinkingConfigIfUnsupported(req.Model, body)
 	body = fixGeminiImageAspectRatio(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
+	body, _ = sjson.SetBytes(body, "model", upstreamModel)

 	// For API key auth, use simpler URL format without project/location
 	if baseURL == "" {
 		baseURL = "https://generativelanguage.googleapis.com"
 	}
-	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "streamGenerateContent")
+	url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, "streamGenerateContent")
 	if opts.Alt == "" {
 		url = url + "?alt=sse"
 	} else {
@@ -670,7 +696,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -57,6 +57,15 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
+		body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	}
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return resp, errValidate
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)

 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -139,6 +148,15 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
+		body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	}
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return nil, errValidate
+	}
 	// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
 	toolsResult := gjson.GetBytes(body, "tools")
 	if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
@@ -201,7 +219,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		}()

 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB 
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
--- a/internal/runtime/executor/logging_helpers.go
+++ b/internal/runtime/executor/logging_helpers.go
@@ -157,7 +157,7 @@ func appendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byt
 	if ginCtx == nil {
 		return
 	}
-	_, attempt := ensureAttempt(ginCtx)
+	attempts, attempt := ensureAttempt(ginCtx)
 	ensureResponseIntro(attempt)

 	if !attempt.headersWritten {
@@ -175,6 +175,8 @@ func appendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byt
 	}
 	attempt.response.WriteString(string(data))
 	attempt.bodyHasContent = true
+
+	updateAggregatedResponse(ginCtx, attempts)
 }

 func ginContextFrom(ctx context.Context) *gin.Context {
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -58,6 +58,15 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
+	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
+		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
+	}
+	translated = normalizeThinkingConfig(translated, upstreamModel)
+	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
+		return resp, errValidate
+	}

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -143,6 +152,15 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
+	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
+		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
+	}
+	translated = normalizeThinkingConfig(translated, upstreamModel)
+	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
+		return nil, errValidate
+	}

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -206,7 +224,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -1,6 +1,8 @@
 package executor

 import (
+	"fmt"
+	"net/http"
 	"strings"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -9,11 +11,11 @@ import (
 	"github.com/tidwall/sjson"
 )

-// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
+// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
 func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
-	budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata)
-	if !ok {
+	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
+	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
@@ -26,20 +28,44 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
 	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
 }

-// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
+// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
 func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
-	budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(metadata)
-	if !ok {
+	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
+	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
-	if budgetOverride != nil && util.ModelSupportsThinking(model) {
+	if !util.ModelSupportsThinking(model) {
+		return payload
+	}
+	if budgetOverride != nil {
 		norm := util.NormalizeThinkingBudget(model, *budgetOverride)
 		budgetOverride = &norm
 	}
 	return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
 }

+// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
+// Metadata values take precedence over any existing field when the model supports thinking, intentionally
+// overwriting caller-provided values to honor suffix/default metadata priority.
+func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
+	if len(metadata) == 0 {
+		return payload
+	}
+	if !util.ModelSupportsThinking(model) {
+		return payload
+	}
+	if field == "" {
+		return payload
+	}
+	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
+		if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
+			return updated
+		}
+	}
+	return payload
+}
+
 // applyPayloadConfig applies payload default and override rules from configuration
 // to the given JSON payload for the specified model.
 // Defaults only fill missing fields, while overrides always overwrite existing values.
@@ -189,3 +215,93 @@ func matchModelPattern(pattern, model string) bool {
 	}
 	return pi == len(pattern)
 }
+
+// normalizeThinkingConfig normalizes thinking-related fields in the payload
+// based on model capabilities. For models without thinking support, it strips
+// reasoning fields. For models with level-based thinking, it validates and
+// normalizes the reasoning effort level.
+func normalizeThinkingConfig(payload []byte, model string) []byte {
+	if len(payload) == 0 || model == "" {
+		return payload
+	}
+
+	if !util.ModelSupportsThinking(model) {
+		return stripThinkingFields(payload)
+	}
+
+	if util.ModelUsesThinkingLevels(model) {
+		return normalizeReasoningEffortLevel(payload, model)
+	}
+
+	return payload
+}
+
+// stripThinkingFields removes thinking-related fields from the payload for
+// models that do not support thinking.
+func stripThinkingFields(payload []byte) []byte {
+	fieldsToRemove := []string{
+		"reasoning",
+		"reasoning_effort",
+		"reasoning.effort",
+	}
+	out := payload
+	for _, field := range fieldsToRemove {
+		if gjson.GetBytes(out, field).Exists() {
+			out, _ = sjson.DeleteBytes(out, field)
+		}
+	}
+	return out
+}
+
+// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort
+// or reasoning.effort field for level-based thinking models.
+func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
+	out := payload
+
+	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
+		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
+			out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
+		}
+	}
+
+	if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
+		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
+			out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
+		}
+	}
+
+	return out
+}
+
+// validateThinkingConfig checks for unsupported reasoning levels on level-based models.
+// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
+// downgrading requests.
+func validateThinkingConfig(payload []byte, model string) error {
+	if len(payload) == 0 || model == "" {
+		return nil
+	}
+	if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
+		return nil
+	}
+
+	levels := util.GetModelThinkingLevels(model)
+	checkField := func(path string) error {
+		if effort := gjson.GetBytes(payload, path); effort.Exists() {
+			if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
+				return statusErr{
+					code: http.StatusBadRequest,
+					msg:  fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
+				}
+			}
+		}
+		return nil
+	}
+
+	if err := checkField("reasoning_effort"); err != nil {
+		return err
+	}
+	if err := checkField("reasoning.effort"); err != nil {
+		return err
+	}
+	return nil
+}
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -12,6 +12,7 @@ import (

 	qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -50,6 +51,15 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
+		body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	}
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return resp, errValidate
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)

 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
@@ -121,6 +131,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
+		body, _ = sjson.SetBytes(body, "model", upstreamModel)
+	}
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return nil, errValidate
+	}
 	toolsResult := gjson.GetBytes(body, "tools")
 	// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
 	// This will have no real consequences. It's just to scare Qwen3.
@@ -181,7 +200,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		scanner.Buffer(nil, 20_971_520)
+		scanner.Buffer(nil, 52_428_800) // 50MB
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/translator/antigravity/claude/antigravity_claude_request.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_request.go
@@ -123,6 +123,15 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 							functionResponse := client.FunctionResponse{ID: toolCallID, Name: funcName, Response: map[string]interface{}{"result": responseData}}
 							clientContent.Parts = append(clientContent.Parts, client.Part{FunctionResponse: &functionResponse})
 						}
+					} else if contentTypeResult.Type == gjson.String && contentTypeResult.String() == "image" {
+						sourceResult := contentResult.Get("source")
+						if sourceResult.Get("type").String() == "base64" {
+							inlineData := &client.InlineData{
+								MimeType: sourceResult.Get("media_type").String(),
+								Data:     sourceResult.Get("data").String(),
+							}
+							clientContent.Parts = append(clientContent.Parts, client.Part{InlineData: inlineData})
+						}
 					}
 				}
 				contents = append(contents, clientContent)
@@ -180,7 +189,6 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
-				budget = util.NormalizeThinkingBudget(modelName, budget)
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 			}
--- a/internal/translator/antigravity/claude/antigravity_claude_response.go
+++ b/internal/translator/antigravity/claude/antigravity_claude_response.go
@@ -12,6 +12,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	"github.com/tidwall/gjson"
@@ -34,8 +35,12 @@ type Params struct {
 	TotalTokenCount      int64  // Cached total token count from usage metadata
 	HasSentFinalEvents   bool   // Indicates if final content/message events have been sent
 	HasToolUse           bool   // Indicates if tool use was observed in the stream
+	HasContent           bool   // Tracks whether any content (text, thinking, or tool use) has been output
 }

+// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
+var toolUseIDCounter uint64
+
 // ConvertAntigravityResponseToClaude performs sophisticated streaming response format conversion.
 // This function implements a complex state machine that translates backend client responses
 // into Claude Code-compatible Server-Sent Events (SSE) format. It manages different response types
@@ -65,11 +70,14 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq

 	if bytes.Equal(rawJSON, []byte("[DONE]")) {
 		output := ""
-		appendFinalEvents(params, &output, true)
-
-		return []string{
-			output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
+		// Only send final events if we have actually output content
+		if params.HasContent {
+			appendFinalEvents(params, &output, true)
+			return []string{
+				output + "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
+			}
 		}
+		return []string{}
 	}

 	output := ""
@@ -115,10 +123,12 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 						output = output + "event: content_block_delta\n"
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":""}}`, params.ResponseIndex), "delta.signature", thoughtSignature.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
+						params.HasContent = true
 					} else if params.ResponseType == 2 { // Continue existing thinking block if already in thinking state
 						output = output + "event: content_block_delta\n"
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
+						params.HasContent = true
 					} else {
 						// Transition from another state to thinking
 						// First, close any existing content block
@@ -142,6 +152,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, params.ResponseIndex), "delta.thinking", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
 						params.ResponseType = 2 // Set state to thinking
+						params.HasContent = true
 					}
 				} else {
 					finishReasonResult := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason")
@@ -152,6 +163,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 							output = output + "event: content_block_delta\n"
 							data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
 							output = output + fmt.Sprintf("data: %s\n\n\n", data)
+							params.HasContent = true
 						} else {
 							// Transition from another state to text content
 							// First, close any existing content block
@@ -175,6 +187,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 								data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, params.ResponseIndex), "delta.text", partTextResult.String())
 								output = output + fmt.Sprintf("data: %s\n\n\n", data)
 								params.ResponseType = 1 // Set state to content
+								params.HasContent = true
 							}
 						}
 					}
@@ -216,7 +229,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq

 				// Create the tool use block with unique ID and function details
 				data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, params.ResponseIndex)
-				data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
 				data, _ = sjson.Set(data, "content_block.name", fcName)
 				output = output + fmt.Sprintf("data: %s\n\n\n", data)

@@ -226,6 +239,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
 					output = output + fmt.Sprintf("data: %s\n\n\n", data)
 				}
 				params.ResponseType = 3
+				params.HasContent = true
 			}
 		}
 	}
@@ -265,6 +279,11 @@ func appendFinalEvents(params *Params, output *string, force bool) {
 		return
 	}

+	// Only send final events if we have actually output content
+	if !params.HasContent {
+		return
+	}
+
 	if params.ResponseType != 0 {
 		*output = *output + "event: content_block_stop\n"
 		*output = *output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, params.ResponseIndex)
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -48,13 +48,13 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		case "low":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
+			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		case "medium":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
+			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		case "high":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
+			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		default:
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
@@ -66,15 +66,15 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
-			var normalized int
+			var budget int

 			if v := tc.Get("thinkingBudget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			} else if v := tc.Get("thinking_budget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			}

@@ -82,7 +82,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if v := tc.Get("include_thoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget && normalized != 0 {
+			} else if setBudget && budget != 0 {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 			}
 		}
@@ -94,7 +94,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() {
 			if t.Get("type").String() == "enabled" {
 				if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
-					budget := util.NormalizeThinkingBudget(modelName, int(b.Int()))
+					budget := int(b.Int())
 					out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 					out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 				}
@@ -102,15 +102,6 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		}
 	}

-	// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
-	// This matches the official Gemini CLI behavior which always sends:
-	// { thinkingBudget: -1, includeThoughts: true }
-	// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
-	if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
-		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-	}
-
 	// Temperature/top_p/top_k/max_tokens
 	if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
 		out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go
@@ -11,6 +11,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
@@ -24,6 +25,9 @@ type convertCliResponseToOpenAIChatParams struct {
 	FunctionIndex int
 }

+// functionCallIDCounter provides a process-wide unique counter for function call identifiers.
+var functionCallIDCounter uint64
+
 // ConvertAntigravityResponseToOpenAI translates a single chunk of a streaming response from the
 // Gemini CLI API format to the OpenAI Chat Completions streaming format.
 // It processes various Gemini CLI event types and transforms them into OpenAI-compatible JSON responses.
@@ -146,7 +150,7 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq

 				functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
 				fcName := functionCallResult.Get("name").String()
-				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
 				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
 				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
 				if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
--- a/internal/translator/claude/gemini/claude_gemini_response.go
+++ b/internal/translator/claude/gemini/claude_gemini_response.go
@@ -331,9 +331,8 @@ func ConvertClaudeResponseToGeminiNonStream(_ context.Context, modelName string,
 	streamingEvents := make([][]byte, 0)

 	scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
-	// Use a smaller initial buffer (64KB) that can grow up to 20MB if needed
-	// This prevents allocating 20MB for every request regardless of size
-	scanner.Buffer(make([]byte, 64*1024), 20_971_520)
+	buffer := make([]byte, 52_428_800) // 50MB
+	scanner.Buffer(buffer, 52_428_800)
 	for scanner.Scan() {
 		line := scanner.Bytes()
 		// log.Debug(string(line))
--- a/internal/translator/claude/openai/responses/claude_openai-responses_response.go
+++ b/internal/translator/claude/openai/responses/claude_openai-responses_response.go
@@ -445,8 +445,8 @@ func ConvertClaudeResponseToOpenAIResponsesNonStream(_ context.Context, _ string
 		// Use a simple scanner to iterate through raw bytes
 		// Note: extremely large responses may require increasing the buffer
 		scanner := bufio.NewScanner(bytes.NewReader(rawJSON))
-		buf := make([]byte, 20_971_520)
-		scanner.Buffer(buf, 20_971_520)
+		buf := make([]byte, 52_428_800) // 50MB
+		scanner.Buffer(buf, 52_428_800)
 		for scanner.Scan() {
 			line := scanner.Bytes()
 			if !bytes.HasPrefix(line, dataTag) {
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
@@ -165,7 +165,6 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
-				budget = util.NormalizeThinkingBudget(modelName, budget)
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 			}
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_response.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_response.go
@@ -12,6 +12,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	"github.com/tidwall/gjson"
@@ -25,8 +26,12 @@ type Params struct {
 	HasFirstResponse bool // Indicates if the initial message_start event has been sent
 	ResponseType     int  // Current response type: 0=none, 1=content, 2=thinking, 3=function
 	ResponseIndex    int  // Index counter for content blocks in the streaming response
+	HasContent       bool // Tracks whether any content (text, thinking, or tool use) has been output
 }

+// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
+var toolUseIDCounter uint64
+
 // ConvertGeminiCLIResponseToClaude performs sophisticated streaming response format conversion.
 // This function implements a complex state machine that translates backend client responses
 // into Claude Code-compatible Server-Sent Events (SSE) format. It manages different response types
@@ -53,9 +58,13 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 	}

 	if bytes.Equal(rawJSON, []byte("[DONE]")) {
-		return []string{
-			"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
+		// Only send message_stop if we have actually output content
+		if (*param).(*Params).HasContent {
+			return []string{
+				"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
+			}
 		}
+		return []string{}
 	}

 	// Track whether tools are being used in this response chunk
@@ -103,7 +112,8 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 					if (*param).(*Params).ResponseType == 2 {
 						sb.WriteString("event: content_block_delta\n")
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
-						sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
+						output = output + fmt.Sprintf("data: %s\n\n\n", data)
+						(*param).(*Params).HasContent = true
 					} else {
 						// Transition from another state to thinking
 						// First, close any existing content block
@@ -122,6 +132,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
 						sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
 						(*param).(*Params).ResponseType = 2 // Set state to thinking
+						(*param).(*Params).HasContent = true
 					}
 				} else {
 					// Process regular text content (user-visible output)
@@ -129,7 +140,8 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 					if (*param).(*Params).ResponseType == 1 {
 						sb.WriteString("event: content_block_delta\n")
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
-						sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
+						output = output + fmt.Sprintf("data: %s\n\n\n", data)
+						(*param).(*Params).HasContent = true
 					} else {
 						// Transition from another state to text content
 						// First, close any existing content block
@@ -148,6 +160,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
 						sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
 						(*param).(*Params).ResponseType = 1 // Set state to content
+						(*param).(*Params).HasContent = true
 					}
 				}
 			} else if functionCallResult.Exists() {
@@ -180,7 +193,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque

 				// Create the tool use block with unique ID and function details
 				data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, (*param).(*Params).ResponseIndex)
-				data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
 				data, _ = sjson.Set(data, "content_block.name", fcName)
 				sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))

@@ -190,6 +203,7 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 					sb.WriteString(fmt.Sprintf("data: %s\n\n\n", data))
 				}
 				(*param).(*Params).ResponseType = 3
+				(*param).(*Params).HasContent = true
 			}
 		}
 	}
@@ -198,28 +212,31 @@ func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalReque
 	// Process usage metadata and finish reason when present in the response
 	if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
 		if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
-			// Close the final content block
-			sb.WriteString("event: content_block_stop\n")
-			sb.WriteString(fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
-			sb.WriteString("\n\n\n")
+			// Only send final events if we have actually output content
+			if (*param).(*Params).HasContent {
+				// Close the final content block
+				output = output + "event: content_block_stop\n"
+				output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
+				output = output + "\n\n\n"

-			// Send the final message delta with usage information and stop reason
-			sb.WriteString("event: message_delta\n")
-			sb.WriteString(`data: `)
+				// Send the final message delta with usage information and stop reason
+				output = output + "event: message_delta\n"
+				output = output + `data: `

-			// Create the message delta template with appropriate stop reason
-			template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
-			// Set tool_use stop reason if tools were used in this response
-			if usedTool {
-				template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
+				// Create the message delta template with appropriate stop reason
+				template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
+				// Set tool_use stop reason if tools were used in this response
+				if usedTool {
+					template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
+				}
+
+				// Include thinking tokens in output token count if present
+				thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
+				template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
+				template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
+
+				output = output + template + "\n\n\n"
 			}
-
-			// Include thinking tokens in output token count if present
-			thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
-			template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
-			template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
-
-			sb.WriteString(template + "\n\n\n")
 		}
 	}

--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -48,13 +48,13 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		case "low":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
+			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		case "medium":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
+			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		case "high":
-			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
+			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768)
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 		default:
 			out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
@@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
-			var normalized int
+			var budget int

 			if v := tc.Get("thinkingBudget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			} else if v := tc.Get("thinking_budget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			}

@@ -82,21 +82,12 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if v := tc.Get("include_thoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget && normalized != 0 {
+			} else if setBudget && budget != 0 {
 				out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 			}
 		}
 	}

-	// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
-	// This matches the official Gemini CLI behavior which always sends:
-	// { thinkingBudget: -1, includeThoughts: true }
-	// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
-	if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
-		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
-		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
-	}
-
 	// Temperature/top_p/top_k
 	if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
 		out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
@@ -11,6 +11,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
@@ -24,6 +25,9 @@ type convertCliResponseToOpenAIChatParams struct {
 	FunctionIndex int
 }

+// functionCallIDCounter provides a process-wide unique counter for function call identifiers.
+var functionCallIDCounter uint64
+
 // ConvertCliResponseToOpenAI translates a single chunk of a streaming response from the
 // Gemini CLI API format to the OpenAI Chat Completions streaming format.
 // It processes various Gemini CLI event types and transforms them into OpenAI-compatible JSON responses.
@@ -146,7 +150,7 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ

 				functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
 				fcName := functionCallResult.Get("name").String()
-				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
 				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
 				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
 				if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -158,7 +158,6 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 		if t.Get("type").String() == "enabled" {
 			if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
 				budget := int(b.Int())
-				budget = util.NormalizeThinkingBudget(modelName, budget)
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 			}
--- a/internal/translator/gemini/claude/gemini_claude_response.go
+++ b/internal/translator/gemini/claude/gemini_claude_response.go
@@ -12,6 +12,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	"github.com/tidwall/gjson"
@@ -24,8 +25,12 @@ type Params struct {
 	HasFirstResponse bool
 	ResponseType     int
 	ResponseIndex    int
+	HasContent       bool // Tracks whether any content (text, thinking, or tool use) has been output
 }

+// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
+var toolUseIDCounter uint64
+
 // ConvertGeminiResponseToClaude performs sophisticated streaming response format conversion.
 // This function implements a complex state machine that translates backend client responses
 // into Claude-compatible Server-Sent Events (SSE) format. It manages different response types
@@ -53,9 +58,13 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 	}

 	if bytes.Equal(rawJSON, []byte("[DONE]")) {
-		return []string{
-			"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
+		// Only send message_stop if we have actually output content
+		if (*param).(*Params).HasContent {
+			return []string{
+				"event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n\n",
+			}
 		}
+		return []string{}
 	}

 	// Track whether tools are being used in this response chunk
@@ -104,6 +113,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 						output = output + "event: content_block_delta\n"
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
+						(*param).(*Params).HasContent = true
 					} else {
 						// Transition from another state to thinking
 						// First, close any existing content block
@@ -127,6 +137,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex), "delta.thinking", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
 						(*param).(*Params).ResponseType = 2 // Set state to thinking
+						(*param).(*Params).HasContent = true
 					}
 				} else {
 					// Process regular text content (user-visible output)
@@ -135,6 +146,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 						output = output + "event: content_block_delta\n"
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
+						(*param).(*Params).HasContent = true
 					} else {
 						// Transition from another state to text content
 						// First, close any existing content block
@@ -158,6 +170,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 						data, _ := sjson.Set(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex), "delta.text", partTextResult.String())
 						output = output + fmt.Sprintf("data: %s\n\n\n", data)
 						(*param).(*Params).ResponseType = 1 // Set state to content
+						(*param).(*Params).HasContent = true
 					}
 				}
 			} else if functionCallResult.Exists() {
@@ -197,7 +210,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR

 				// Create the tool use block with unique ID and function details
 				data := fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, (*param).(*Params).ResponseIndex)
-				data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				data, _ = sjson.Set(data, "content_block.id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1)))
 				data, _ = sjson.Set(data, "content_block.name", fcName)
 				output = output + fmt.Sprintf("data: %s\n\n\n", data)

@@ -207,6 +220,7 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 					output = output + fmt.Sprintf("data: %s\n\n\n", data)
 				}
 				(*param).(*Params).ResponseType = 3
+				(*param).(*Params).HasContent = true
 			}
 		}
 	}
@@ -214,23 +228,26 @@ func ConvertGeminiResponseToClaude(_ context.Context, _ string, originalRequestR
 	usageResult := gjson.GetBytes(rawJSON, "usageMetadata")
 	if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
 		if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
-			output = output + "event: content_block_stop\n"
-			output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
-			output = output + "\n\n\n"
+			// Only send final events if we have actually output content
+			if (*param).(*Params).HasContent {
+				output = output + "event: content_block_stop\n"
+				output = output + fmt.Sprintf(`data: {"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex)
+				output = output + "\n\n\n"

-			output = output + "event: message_delta\n"
-			output = output + `data: `
+				output = output + "event: message_delta\n"
+				output = output + `data: `

-			template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
-			if usedTool {
-				template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
+				template := `{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
+				if usedTool {
+					template = `{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`
+				}
+
+				thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
+				template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
+				template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
+
+				output = output + template + "\n\n\n"
 			}
-
-			thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
-			template, _ = sjson.Set(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
-			template, _ = sjson.Set(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
-
-			output = output + template + "\n\n\n"
 		}
 	}

--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go
@@ -48,13 +48,13 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
 			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "low":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
+			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
 			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "medium":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
+			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
 			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "high":
-			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
+			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
 			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		default:
 			out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
@@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
 		if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
-			var normalized int
+			var budget int

 			if v := tc.Get("thinkingBudget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			} else if v := tc.Get("thinking_budget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			}

@@ -82,7 +82,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if v := tc.Get("include_thoughts"); v.Exists() {
 				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
-			} else if setBudget && normalized != 0 {
+			} else if setBudget && budget != 0 {
 				out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
 			}
 		}
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
@@ -11,6 +11,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	"github.com/tidwall/gjson"
@@ -23,6 +24,9 @@ type convertGeminiResponseToOpenAIChatParams struct {
 	FunctionIndex int
 }

+// functionCallIDCounter provides a process-wide unique counter for function call identifiers.
+var functionCallIDCounter uint64
+
 // ConvertGeminiResponseToOpenAI translates a single chunk of a streaming response from the
 // Gemini API format to the OpenAI Chat Completions streaming format.
 // It processes various Gemini event types and transforms them into OpenAI-compatible JSON responses.
@@ -148,7 +152,7 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR

 				functionCallTemplate := `{"id": "","index": 0,"type": "function","function": {"name": "","arguments": ""}}`
 				fcName := functionCallResult.Get("name").String()
-				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
 				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "index", functionCallIndex)
 				functionCallTemplate, _ = sjson.Set(functionCallTemplate, "function.name", fcName)
 				if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
@@ -281,7 +285,7 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
 				}
 				functionCallItemTemplate := `{"id": "","type": "function","function": {"name": "","arguments": ""}}`
 				fcName := functionCallResult.Get("name").String()
-				functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d", fcName, time.Now().UnixNano()))
+				functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "id", fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&functionCallIDCounter, 1)))
 				functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.name", fcName)
 				if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
 					functionCallItemTemplate, _ = sjson.Set(functionCallItemTemplate, "function.arguments", fcArgsResult.Raw)
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -400,16 +400,16 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "minimal":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
+			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "low":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096))
+			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096)
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "medium":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
+			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		case "high":
-			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
+			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 32768)
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 		default:
 			out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
@@ -421,32 +421,22 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 	if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
 		if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
 			var setBudget bool
-			var normalized int
+			var budget int
 			if v := tc.Get("thinking_budget"); v.Exists() {
-				normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
-				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
+				budget = int(v.Int())
+				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
 				setBudget = true
 			}
 			if v := tc.Get("include_thoughts"); v.Exists() {
 				out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
 			} else if setBudget {
-				if normalized != 0 {
+				if budget != 0 {
 					out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
 				}
 			}
 		}
 	}

-	// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
-	// This matches the official Gemini CLI behavior which always sends:
-	// { thinkingBudget: -1, includeThoughts: true }
-	// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
-	if !gjson.Get(out, "generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
-		out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
-		out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
-		// log.Debugf("Applied default thinkingConfig for gemini-3-pro-preview (matches Gemini CLI): thinkingBudget=-1, include_thoughts=true")
-	}
-
 	result := []byte(out)
 	result = common.AttachDefaultSafetySettings(result, "safetySettings")
 	return result
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go
@@ -5,6 +5,7 @@ import (
 	"context"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	"github.com/tidwall/gjson"
@@ -37,6 +38,12 @@ type geminiToResponsesState struct {
 	FuncCallIDs map[int]string
 }

+// responseIDCounter provides a process-wide unique counter for synthesized response identifiers.
+var responseIDCounter uint64
+
+// funcCallIDCounter provides a process-wide unique counter for function call identifiers.
+var funcCallIDCounter uint64
+
 func emitEvent(event string, payload string) string {
 	return fmt.Sprintf("event: %s\ndata: %s", event, payload)
 }
@@ -205,7 +212,7 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string,
 					st.FuncArgsBuf[idx] = &strings.Builder{}
 				}
 				if st.FuncCallIDs[idx] == "" {
-					st.FuncCallIDs[idx] = fmt.Sprintf("call_%d", time.Now().UnixNano())
+					st.FuncCallIDs[idx] = fmt.Sprintf("call_%d_%d", time.Now().UnixNano(), atomic.AddUint64(&funcCallIDCounter, 1))
 				}
 				st.FuncNames[idx] = name

@@ -464,7 +471,7 @@ func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string
 	// id: prefer provider responseId, otherwise synthesize
 	id := root.Get("responseId").String()
 	if id == "" {
-		id = fmt.Sprintf("resp_%x", time.Now().UnixNano())
+		id = fmt.Sprintf("resp_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&responseIDCounter, 1))
 	}
 	// Normalize to response-style id (prefix resp_ if missing)
 	if !strings.HasPrefix(id, "resp_") {
@@ -575,7 +582,7 @@ func ConvertGeminiResponseToOpenAIResponsesNonStream(_ context.Context, _ string
 			if fc := p.Get("functionCall"); fc.Exists() {
 				name := fc.Get("name").String()
 				args := fc.Get("args")
-				callID := fmt.Sprintf("call_%x", time.Now().UnixNano())
+				callID := fmt.Sprintf("call_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&funcCallIDCounter, 1))
 				outputs = append(outputs, map[string]interface{}{
 					"id":     fmt.Sprintf("fc_%s", callID),
 					"type":   "function_call",
--- a/internal/translator/kiro/claude/kiro_claude.go
+++ b/internal/translator/kiro/claude/kiro_claude.go
@@ -1,5 +1,6 @@
 // Package claude provides translation between Kiro and Claude formats.
-// Since Kiro uses Claude-compatible format internally, translations are mostly pass-through.
+// Since Kiro executor generates Claude-compatible SSE format internally (with event: prefix),
+// translations are pass-through.
 package claude

 import (
@@ -14,6 +15,8 @@ func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bo
 }

 // ConvertKiroResponseToClaude converts Kiro streaming response to Claude format.
+// Kiro executor already generates complete SSE format with "event:" prefix,
+// so this is a simple pass-through.
 func ConvertKiroResponseToClaude(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string {
 	return []string{string(rawResponse)}
 }
--- a/internal/translator/kiro/openai/chat-completions/kiro_openai_response.go
+++ b/internal/translator/kiro/openai/chat-completions/kiro_openai_response.go
@@ -4,6 +4,7 @@ package chat_completions
 import (
 	"context"
 	"encoding/json"
+	"strings"
 	"time"

 	"github.com/google/uuid"
@@ -13,15 +14,58 @@ import (
 // ConvertKiroResponseToOpenAI converts Kiro streaming response to OpenAI SSE format.
 // Handles Claude SSE events: content_block_start, content_block_delta, input_json_delta,
 // content_block_stop, message_delta, and message_stop.
+// Input may be in SSE format: "event: xxx\ndata: {...}" or raw JSON.
 func ConvertKiroResponseToOpenAI(ctx context.Context, model string, originalRequest, request, rawResponse []byte, param *any) []string {
-	root := gjson.ParseBytes(rawResponse)
+	raw := string(rawResponse)
+	var results []string
+
+	// Handle SSE format: extract JSON from "data: " lines
+	// Input format: "event: message_start\ndata: {...}"
+	lines := strings.Split(raw, "\n")
+	for _, line := range lines {
+		line = strings.TrimSpace(line)
+		if strings.HasPrefix(line, "data: ") {
+			jsonPart := strings.TrimPrefix(line, "data: ")
+			chunks := convertClaudeEventToOpenAI(jsonPart, model)
+			results = append(results, chunks...)
+		} else if strings.HasPrefix(line, "{") {
+			// Raw JSON (backward compatibility)
+			chunks := convertClaudeEventToOpenAI(line, model)
+			results = append(results, chunks...)
+		}
+	}
+
+	return results
+}
+
+// convertClaudeEventToOpenAI converts a single Claude JSON event to OpenAI format
+func convertClaudeEventToOpenAI(jsonStr string, model string) []string {
+	root := gjson.Parse(jsonStr)
 	var results []string

 	eventType := root.Get("type").String()

 	switch eventType {
 	case "message_start":
-		// Initial message event - could emit initial chunk if needed
+		// Initial message event - emit initial chunk with role
+		response := map[string]interface{}{
+			"id":      "chatcmpl-" + uuid.New().String()[:24],
+			"object":  "chat.completion.chunk",
+			"created": time.Now().Unix(),
+			"model":   model,
+			"choices": []map[string]interface{}{
+				{
+					"index": 0,
+					"delta": map[string]interface{}{
+						"role":    "assistant",
+						"content": "",
+					},
+					"finish_reason": nil,
+				},
+			},
+		}
+		result, _ := json.Marshal(response)
+		results = append(results, string(result))
 		return results

 	case "content_block_start":
@@ -127,7 +171,7 @@ func ConvertKiroResponseToOpenAI(ctx context.Context, model string, originalRequ
 		return results

 	case "message_delta":
-		// Final message delta with stop_reason
+		// Final message delta with stop_reason and usage
 		stopReason := root.Get("delta.stop_reason").String()
 		if stopReason != "" {
 			finishReason := "stop"
@@ -152,6 +196,19 @@ func ConvertKiroResponseToOpenAI(ctx context.Context, model string, originalRequ
 					},
 				},
 			}
+
+			// Extract and include usage information from message_delta event
+			usage := root.Get("usage")
+			if usage.Exists() {
+				inputTokens := usage.Get("input_tokens").Int()
+				outputTokens := usage.Get("output_tokens").Int()
+				response["usage"] = map[string]interface{}{
+					"prompt_tokens":     inputTokens,
+					"completion_tokens": outputTokens,
+					"total_tokens":      inputTokens + outputTokens,
+				}
+			}
+
 			result, _ := json.Marshal(response)
 			results = append(results, string(result))
 		}
--- a/internal/translator/openai/openai/responses/openai_openai-responses_response.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_response.go
@@ -5,6 +5,7 @@ import (
 	"context"
 	"fmt"
 	"strings"
+	"sync/atomic"
 	"time"

 	"github.com/tidwall/gjson"
@@ -41,6 +42,9 @@ type oaiToResponsesState struct {
 	UsageSeen        bool
 }

+// responseIDCounter provides a process-wide unique counter for synthesized response identifiers.
+var responseIDCounter uint64
+
 func emitRespEvent(event string, payload string) string {
 	return fmt.Sprintf("event: %s\ndata: %s", event, payload)
 }
@@ -590,7 +594,7 @@ func ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(_ context.Co
 	// id: use provider id if present, otherwise synthesize
 	id := root.Get("id").String()
 	if id == "" {
-		id = fmt.Sprintf("resp_%x", time.Now().UnixNano())
+		id = fmt.Sprintf("resp_%x_%d", time.Now().UnixNano(), atomic.AddUint64(&responseIDCounter, 1))
 	}
 	resp, _ = sjson.Set(resp, "id", id)

--- a/internal/util/claude_thinking.go
+++ b/internal/util/claude_thinking.go
@@ -0,0 +1,46 @@
+package util
+
+import (
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload.
+// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget.
+// If budget is nil or the payload already has thinking config, it returns the payload unchanged.
+func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
+	if budget == nil {
+		return body
+	}
+	if gjson.GetBytes(body, "thinking").Exists() {
+		return body
+	}
+	if *budget <= 0 {
+		return body
+	}
+	updated := body
+	updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled")
+	updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget)
+	return updated
+}
+
+// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models.
+// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
+// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
+func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
+	budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
+	if !matched {
+		return nil, false
+	}
+	if include != nil && !*include {
+		return nil, true
+	}
+	if budget == nil {
+		return nil, true
+	}
+	normalized := NormalizeThinkingBudget(modelName, *budget)
+	if normalized <= 0 {
+		return nil, true
+	}
+	return &normalized, true
+}
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -1,8 +1,6 @@
 package util

 import (
-	"encoding/json"
-	"strconv"
 	"strings"

 	"github.com/tidwall/gjson"
@@ -15,80 +13,6 @@ const (
 	GeminiOriginalModelMetadataKey   = "gemini_original_model"
 )

-func ParseGeminiThinkingSuffix(model string) (string, *int, *bool, bool) {
-	if model == "" {
-		return model, nil, nil, false
-	}
-	lower := strings.ToLower(model)
-	if !strings.HasPrefix(lower, "gemini-") {
-		return model, nil, nil, false
-	}
-
-	if strings.HasSuffix(lower, "-nothinking") {
-		base := model[:len(model)-len("-nothinking")]
-		budgetValue := 0
-		if strings.HasPrefix(lower, "gemini-2.5-pro") {
-			budgetValue = 128
-		}
-		include := false
-		return base, &budgetValue, &include, true
-	}
-
-	// Handle "-reasoning" suffix: enables thinking with dynamic budget (-1)
-	// Maps: gemini-2.5-flash-reasoning -> gemini-2.5-flash with thinkingBudget=-1
-	if strings.HasSuffix(lower, "-reasoning") {
-		base := model[:len(model)-len("-reasoning")]
-		budgetValue := -1 // Dynamic budget
-		include := true
-		return base, &budgetValue, &include, true
-	}
-
-	idx := strings.LastIndex(lower, "-thinking-")
-	if idx == -1 {
-		return model, nil, nil, false
-	}
-
-	digits := model[idx+len("-thinking-"):]
-	if digits == "" {
-		return model, nil, nil, false
-	}
-	end := len(digits)
-	for i := 0; i < len(digits); i++ {
-		if digits[i] < '0' || digits[i] > '9' {
-			end = i
-			break
-		}
-	}
-	if end == 0 {
-		return model, nil, nil, false
-	}
-	valueStr := digits[:end]
-	value, err := strconv.Atoi(valueStr)
-	if err != nil {
-		return model, nil, nil, false
-	}
-	base := model[:idx]
-	budgetValue := value
-	return base, &budgetValue, nil, true
-}
-
-func NormalizeGeminiThinkingModel(modelName string) (string, map[string]any) {
-	baseModel, budget, include, matched := ParseGeminiThinkingSuffix(modelName)
-	if !matched {
-		return baseModel, nil
-	}
-	metadata := map[string]any{
-		GeminiOriginalModelMetadataKey: modelName,
-	}
-	if budget != nil {
-		metadata[GeminiThinkingBudgetMetadataKey] = *budget
-	}
-	if include != nil {
-		metadata[GeminiIncludeThoughtsMetadataKey] = *include
-	}
-	return baseModel, metadata
-}
-
 func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
 	if budget == nil && includeThoughts == nil {
 		return body
@@ -133,78 +57,45 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
 	return updated
 }

-func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
-	if len(metadata) == 0 {
-		return nil, nil, false
+// modelsWithDefaultThinking lists models that should have thinking enabled by default
+// when no explicit thinkingConfig is provided.
+var modelsWithDefaultThinking = map[string]bool{
+	"gemini-3-pro-preview": true,
+}
+
+// ModelHasDefaultThinking returns true if the model should have thinking enabled by default.
+func ModelHasDefaultThinking(model string) bool {
+	return modelsWithDefaultThinking[model]
+}
+
+// ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it.
+// For standard Gemini API format (generationConfig.thinkingConfig path).
+// Returns the modified body if thinkingConfig was added, otherwise returns the original.
+func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
+	if !ModelHasDefaultThinking(model) {
+		return body
 	}
-	var (
-		budgetPtr  *int
-		includePtr *bool
-		matched    bool
-	)
-	if rawBudget, ok := metadata[GeminiThinkingBudgetMetadataKey]; ok {
-		switch v := rawBudget.(type) {
-		case int:
-			budget := v
-			budgetPtr = &budget
-			matched = true
-		case int32:
-			budget := int(v)
-			budgetPtr = &budget
-			matched = true
-		case int64:
-			budget := int(v)
-			budgetPtr = &budget
-			matched = true
-		case float64:
-			budget := int(v)
-			budgetPtr = &budget
-			matched = true
-		case json.Number:
-			if val, err := v.Int64(); err == nil {
-				budget := int(val)
-				budgetPtr = &budget
-				matched = true
-			}
-		}
+	if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() {
+		return body
 	}
-	if rawInclude, ok := metadata[GeminiIncludeThoughtsMetadataKey]; ok {
-		switch v := rawInclude.(type) {
-		case bool:
-			include := v
-			includePtr = &include
-			matched = true
-		case string:
-			if parsed, err := strconv.ParseBool(v); err == nil {
-				include := parsed
-				includePtr = &include
-				matched = true
-			}
-		case json.Number:
-			if val, err := v.Int64(); err == nil {
-				include := val != 0
-				includePtr = &include
-				matched = true
-			}
-		case int:
-			include := v != 0
-			includePtr = &include
-			matched = true
-		case int32:
-			include := v != 0
-			includePtr = &include
-			matched = true
-		case int64:
-			include := v != 0
-			includePtr = &include
-			matched = true
-		case float64:
-			include := v != 0
-			includePtr = &include
-			matched = true
-		}
+	updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1)
+	updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true)
+	return updated
+}
+
+// ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it.
+// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
+// Returns the modified body if thinkingConfig was added, otherwise returns the original.
+func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
+	if !ModelHasDefaultThinking(model) {
+		return body
 	}
-	return budgetPtr, includePtr, matched
+	if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
+		return body
+	}
+	updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
+	updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true)
+	return updated
 }

 // StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
@@ -223,6 +114,32 @@ func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
 	return updated
 }

+// NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini
+// request body (generationConfig.thinkingConfig.thinkingBudget path).
+func NormalizeGeminiThinkingBudget(model string, body []byte) []byte {
+	const budgetPath = "generationConfig.thinkingConfig.thinkingBudget"
+	budget := gjson.GetBytes(body, budgetPath)
+	if !budget.Exists() {
+		return body
+	}
+	normalized := NormalizeThinkingBudget(model, int(budget.Int()))
+	updated, _ := sjson.SetBytes(body, budgetPath, normalized)
+	return updated
+}
+
+// NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI
+// request body (request.generationConfig.thinkingConfig.thinkingBudget path).
+func NormalizeGeminiCLIThinkingBudget(model string, body []byte) []byte {
+	const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget"
+	budget := gjson.GetBytes(body, budgetPath)
+	if !budget.Exists() {
+		return body
+	}
+	normalized := NormalizeThinkingBudget(model, int(budget.Int()))
+	updated, _ := sjson.SetBytes(body, budgetPath, normalized)
+	return updated
+}
+
 // ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel"
 // and converts it to "thinkingBudget".
 // "high" -> 32768
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -1,6 +1,8 @@
 package util

 import (
+	"strings"
+
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 )

@@ -67,3 +69,39 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
 	}
 	return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
 }
+
+// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
+// Returns nil if the model has no thinking support or no levels defined.
+func GetModelThinkingLevels(model string) []string {
+	if model == "" {
+		return nil
+	}
+	info := registry.GetGlobalRegistry().GetModelInfo(model)
+	if info == nil || info.Thinking == nil {
+		return nil
+	}
+	return info.Thinking.Levels
+}
+
+// ModelUsesThinkingLevels reports whether the model uses discrete reasoning
+// effort levels instead of numeric budgets.
+func ModelUsesThinkingLevels(model string) bool {
+	levels := GetModelThinkingLevels(model)
+	return len(levels) > 0
+}
+
+// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
+// level for the given model. Returns false when the level is not supported.
+func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
+	levels := GetModelThinkingLevels(model)
+	if len(levels) == 0 {
+		return "", false
+	}
+	loweredEffort := strings.ToLower(strings.TrimSpace(effort))
+	for _, lvl := range levels {
+		if strings.ToLower(lvl) == loweredEffort {
+			return lvl, true
+		}
+	}
+	return "", false
+}
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -0,0 +1,313 @@
+package util
+
+import (
+	"encoding/json"
+	"strconv"
+	"strings"
+)
+
+const (
+	ThinkingBudgetMetadataKey          = "thinking_budget"
+	ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts"
+	ReasoningEffortMetadataKey         = "reasoning_effort"
+	ThinkingOriginalModelMetadataKey   = "thinking_original_model"
+)
+
+// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
+// the normalized base model with extracted metadata. Supported pattern:
+//   - "(<value>)" where value can be:
+//   - A numeric budget (e.g., "(8192)", "(16384)")
+//   - A reasoning effort level (e.g., "(high)", "(medium)", "(low)")
+//
+// Examples:
+//   - "claude-sonnet-4-5-20250929(16384)" → budget=16384
+//   - "gpt-5.1(high)" → reasoning_effort="high"
+//   - "gemini-2.5-pro(32768)" → budget=32768
+//
+// Note: Empty parentheses "()" are not supported and will be ignored.
+func NormalizeThinkingModel(modelName string) (string, map[string]any) {
+	if modelName == "" {
+		return modelName, nil
+	}
+
+	baseModel := modelName
+
+	var (
+		budgetOverride  *int
+		reasoningEffort *string
+		matched         bool
+	)
+
+	// Match "(<value>)" pattern at the end of the model name
+	if idx := strings.LastIndex(modelName, "("); idx != -1 {
+		if !strings.HasSuffix(modelName, ")") {
+			// Incomplete parenthesis, ignore
+			return baseModel, nil
+		}
+
+		value := modelName[idx+1 : len(modelName)-1] // Extract content between ( and )
+		if value == "" {
+			// Empty parentheses not supported
+			return baseModel, nil
+		}
+
+		candidateBase := modelName[:idx]
+
+		// Auto-detect: pure numeric → budget, string → reasoning effort level
+		if parsed, ok := parseIntPrefix(value); ok {
+			// Numeric value: treat as thinking budget
+			baseModel = candidateBase
+			budgetOverride = &parsed
+			matched = true
+		} else {
+			// String value: treat as reasoning effort level
+			baseModel = candidateBase
+			raw := strings.ToLower(strings.TrimSpace(value))
+			if raw != "" {
+				reasoningEffort = &raw
+				matched = true
+			}
+		}
+	}
+
+	if !matched {
+		return baseModel, nil
+	}
+
+	metadata := map[string]any{
+		ThinkingOriginalModelMetadataKey: modelName,
+	}
+	if budgetOverride != nil {
+		metadata[ThinkingBudgetMetadataKey] = *budgetOverride
+	}
+	if reasoningEffort != nil {
+		metadata[ReasoningEffortMetadataKey] = *reasoningEffort
+	}
+	return baseModel, metadata
+}
+
+// ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel.
+// It accepts both the new generic keys and legacy Gemini-specific keys.
+func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) {
+	if len(metadata) == 0 {
+		return nil, nil, nil, false
+	}
+
+	var (
+		budgetPtr  *int
+		includePtr *bool
+		effortPtr  *string
+		matched    bool
+	)
+
+	readBudget := func(key string) {
+		if budgetPtr != nil {
+			return
+		}
+		if raw, ok := metadata[key]; ok {
+			if v, okNumber := parseNumberToInt(raw); okNumber {
+				budget := v
+				budgetPtr = &budget
+				matched = true
+			}
+		}
+	}
+
+	readInclude := func(key string) {
+		if includePtr != nil {
+			return
+		}
+		if raw, ok := metadata[key]; ok {
+			switch v := raw.(type) {
+			case bool:
+				val := v
+				includePtr = &val
+				matched = true
+			case *bool:
+				if v != nil {
+					val := *v
+					includePtr = &val
+					matched = true
+				}
+			}
+		}
+	}
+
+	readEffort := func(key string) {
+		if effortPtr != nil {
+			return
+		}
+		if raw, ok := metadata[key]; ok {
+			if val, okStr := raw.(string); okStr && strings.TrimSpace(val) != "" {
+				normalized := strings.ToLower(strings.TrimSpace(val))
+				effortPtr = &normalized
+				matched = true
+			}
+		}
+	}
+
+	readBudget(ThinkingBudgetMetadataKey)
+	readBudget(GeminiThinkingBudgetMetadataKey)
+	readInclude(ThinkingIncludeThoughtsMetadataKey)
+	readInclude(GeminiIncludeThoughtsMetadataKey)
+	readEffort(ReasoningEffortMetadataKey)
+	readEffort("reasoning.effort")
+
+	return budgetPtr, includePtr, effortPtr, matched
+}
+
+// ResolveThinkingConfigFromMetadata derives thinking budget/include overrides,
+// converting reasoning effort strings into budgets when possible.
+func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) {
+	budget, include, effort, matched := ThinkingFromMetadata(metadata)
+	if !matched {
+		return nil, nil, false
+	}
+
+	if budget == nil && effort != nil {
+		if derived, ok := ThinkingEffortToBudget(model, *effort); ok {
+			budget = &derived
+		}
+	}
+	return budget, include, budget != nil || include != nil || effort != nil
+}
+
+// ReasoningEffortFromMetadata resolves a reasoning effort string from metadata,
+// inferring "auto" and "none" when budgets request dynamic or disabled thinking.
+func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
+	budget, include, effort, matched := ThinkingFromMetadata(metadata)
+	if !matched {
+		return "", false
+	}
+	if effort != nil && *effort != "" {
+		return strings.ToLower(strings.TrimSpace(*effort)), true
+	}
+	if budget != nil {
+		switch *budget {
+		case -1:
+			return "auto", true
+		case 0:
+			return "none", true
+		}
+	}
+	if include != nil && !*include {
+		return "none", true
+	}
+	return "", true
+}
+
+// ThinkingEffortToBudget maps reasoning effort levels to approximate budgets,
+// clamping the result to the model's supported range.
+func ThinkingEffortToBudget(model, effort string) (int, bool) {
+	if effort == "" {
+		return 0, false
+	}
+	normalized, ok := NormalizeReasoningEffortLevel(model, effort)
+	if !ok {
+		normalized = strings.ToLower(strings.TrimSpace(effort))
+	}
+	switch normalized {
+	case "none":
+		return 0, true
+	case "auto":
+		return NormalizeThinkingBudget(model, -1), true
+	case "minimal":
+		return NormalizeThinkingBudget(model, 512), true
+	case "low":
+		return NormalizeThinkingBudget(model, 1024), true
+	case "medium":
+		return NormalizeThinkingBudget(model, 8192), true
+	case "high":
+		return NormalizeThinkingBudget(model, 24576), true
+	case "xhigh":
+		return NormalizeThinkingBudget(model, 32768), true
+	default:
+		return 0, false
+	}
+}
+
+// ResolveOriginalModel returns the original model name stored in metadata (if present),
+// otherwise falls back to the provided model.
+func ResolveOriginalModel(model string, metadata map[string]any) string {
+	normalize := func(name string) string {
+		if name == "" {
+			return ""
+		}
+		if base, _ := NormalizeThinkingModel(name); base != "" {
+			return base
+		}
+		return strings.TrimSpace(name)
+	}
+
+	if metadata != nil {
+		if v, ok := metadata[ThinkingOriginalModelMetadataKey]; ok {
+			if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
+				if base := normalize(s); base != "" {
+					return base
+				}
+			}
+		}
+		if v, ok := metadata[GeminiOriginalModelMetadataKey]; ok {
+			if s, okStr := v.(string); okStr && strings.TrimSpace(s) != "" {
+				if base := normalize(s); base != "" {
+					return base
+				}
+			}
+		}
+	}
+	// Fallback: try to re-normalize the model name when metadata was dropped.
+	if base := normalize(model); base != "" {
+		return base
+	}
+	return model
+}
+
+func parseIntPrefix(value string) (int, bool) {
+	if value == "" {
+		return 0, false
+	}
+	digits := strings.TrimLeft(value, "-")
+	if digits == "" {
+		return 0, false
+	}
+	end := len(digits)
+	for i := 0; i < len(digits); i++ {
+		if digits[i] < '0' || digits[i] > '9' {
+			end = i
+			break
+		}
+	}
+	if end == 0 {
+		return 0, false
+	}
+	val, err := strconv.Atoi(digits[:end])
+	if err != nil {
+		return 0, false
+	}
+	return val, true
+}
+
+func parseNumberToInt(raw any) (int, bool) {
+	switch v := raw.(type) {
+	case int:
+		return v, true
+	case int32:
+		return int(v), true
+	case int64:
+		return int(v), true
+	case float64:
+		return int(v), true
+	case json.Number:
+		if val, err := v.Int64(); err == nil {
+			return int(val), true
+		}
+	case string:
+		if strings.TrimSpace(v) == "" {
+			return 0, false
+		}
+		if parsed, err := strconv.Atoi(strings.TrimSpace(v)); err == nil {
+			return parsed, true
+		}
+	}
+	return 0, false
+}
--- a/internal/watcher/watcher.go
+++ b/internal/watcher/watcher.go
@@ -1272,7 +1272,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 		}
 		for i := range cfg.KiroKey {
 			kk := cfg.KiroKey[i]
-			var accessToken, profileArn string
+			var accessToken, profileArn, refreshToken string

 			// Try to load from token file first
 			if kk.TokenFile != "" && kAuth != nil {
@@ -1282,6 +1282,7 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 				} else {
 					accessToken = tokenData.AccessToken
 					profileArn = tokenData.ProfileArn
+					refreshToken = tokenData.RefreshToken
 				}
 			}

@@ -1292,6 +1293,9 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 			if kk.ProfileArn != "" {
 				profileArn = kk.ProfileArn
 			}
+			if kk.RefreshToken != "" {
+				refreshToken = kk.RefreshToken
+			}

 			if accessToken == "" {
 				log.Warnf("kiro config[%d] missing access_token, skipping", i)
@@ -1313,6 +1317,9 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 			if kk.AgentTaskType != "" {
 				attrs["agent_task_type"] = kk.AgentTaskType
 			}
+			if refreshToken != "" {
+				attrs["refresh_token"] = refreshToken
+			}
 			proxyURL := strings.TrimSpace(kk.ProxyURL)
 			a := &coreauth.Auth{
 				ID:         id,
@@ -1324,6 +1331,14 @@ func (w *Watcher) SnapshotCoreAuths() []*coreauth.Auth {
 				CreatedAt:  now,
 				UpdatedAt:  now,
 			}
+
+			if refreshToken != "" {
+				if a.Metadata == nil {
+					a.Metadata = make(map[string]any)
+				}
+				a.Metadata["refresh_token"] = refreshToken
+			}
+
 			out = append(out, a)
 		}
 		for i := range cfg.OpenAICompatibility {
--- a/sdk/api/handlers/claude/code_handlers.go
+++ b/sdk/api/handlers/claude/code_handlers.go
@@ -271,6 +271,11 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
 				continue
 			}
 			if errMsg != nil {
+				status := http.StatusInternalServerError
+				if errMsg.StatusCode > 0 {
+					status = errMsg.StatusCode
+				}
+				c.Status(status)
 				// An error occurred: emit as a proper SSE error event
 				errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
 				_, _ = writer.WriteString("event: error\n")
@@ -278,6 +283,7 @@ func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.
 				_, _ = writer.Write(errorBytes)
 				_, _ = writer.WriteString("\n\n")
 				_ = writer.Flush()
+				flusher.Flush()
 			}
 			var execErr error
 			if errMsg != nil {
--- a/sdk/api/handlers/gemini/gemini_handlers.go
+++ b/sdk/api/handlers/gemini/gemini_handlers.go
@@ -48,8 +48,24 @@ func (h *GeminiAPIHandler) Models() []map[string]any {
 // GeminiModels handles the Gemini models listing endpoint.
 // It returns a JSON response containing available Gemini models and their specifications.
 func (h *GeminiAPIHandler) GeminiModels(c *gin.Context) {
+	rawModels := h.Models()
+	normalizedModels := make([]map[string]any, 0, len(rawModels))
+	defaultMethods := []string{"generateContent"}
+	for _, model := range rawModels {
+		normalizedModel := make(map[string]any, len(model))
+		for k, v := range model {
+			normalizedModel[k] = v
+		}
+		if name, ok := normalizedModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") {
+			normalizedModel["name"] = "models/" + name
+		}
+		if _, ok := normalizedModel["supportedGenerationMethods"]; !ok {
+			normalizedModel["supportedGenerationMethods"] = defaultMethods
+		}
+		normalizedModels = append(normalizedModels, normalizedModel)
+	}
 	c.JSON(http.StatusOK, gin.H{
-		"models": h.Models(),
+		"models": normalizedModels,
 	})
 }

--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -343,18 +343,32 @@ func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string

 	providerName, extractedModelName, isDynamic := h.parseDynamicModel(resolvedModelName)

-	// First, normalize the model name to handle suffixes like "-thinking-128"
-	// This needs to happen before determining the provider for non-dynamic models.
-	normalizedModel, metadata = normalizeModelMetadata(resolvedModelName)
+	targetModelName := resolvedModelName
+	if isDynamic {
+		targetModelName = extractedModelName
+	}
+
+	// Normalize the model name to handle dynamic thinking suffixes before determining the provider.
+	normalizedModel, metadata = normalizeModelMetadata(targetModelName)

 	if isDynamic {
 		providers = []string{providerName}
-		// For dynamic models, the extractedModelName is already normalized by parseDynamicModel
-		// so we use it as the final normalizedModel.
-		normalizedModel = extractedModelName
 	} else {
 		// For non-dynamic models, use the normalizedModel to get the provider name.
 		providers = util.GetProviderName(normalizedModel)
+		if len(providers) == 0 && metadata != nil {
+			if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok {
+				if originalModel, okStr := originalRaw.(string); okStr {
+					originalModel = strings.TrimSpace(originalModel)
+					if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) {
+						if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 {
+							providers = altProviders
+							normalizedModel = originalModel
+						}
+					}
+				}
+			}
+		}
 	}

 	if len(providers) == 0 {
@@ -402,7 +416,7 @@ func cloneBytes(src []byte) []byte {
 }

 func normalizeModelMetadata(modelName string) (string, map[string]any) {
-	return util.NormalizeGeminiThinkingModel(modelName)
+	return util.NormalizeThinkingModel(modelName)
 }

 func cloneMetadata(src map[string]any) map[string]any {
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -788,7 +788,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) {
 							Created:     time.Now().Unix(),
 							OwnedBy:     compat.Name,
 							Type:        "openai-compatibility",
-							DisplayName: m.Name,
+							DisplayName: modelID,
 						})
 					}
 					// Register and return
Author	SHA1	Message	Date
Luis Pater	242b4d5754	Merge pull request #19 from router-for-me/plus v6.6.1	2025-12-11 21:35:16 +08:00
Luis Pater	4ce7c61a17	Merge branch 'main' into plus	2025-12-11 21:33:49 +08:00
Luis Pater	a74ee3f319	Merge pull request #481 from sususu98/fix/increase-buffer-size fix: increase buffer size for stream scanners to 50MB across multiple executors	2025-12-11 21:20:54 +08:00
Luis Pater	564bcbaa54	Merge pull request #487 from router-for-me/amp fix(amp): set status on claude stream errors	2025-12-11 21:18:19 +08:00
hkfires	88bdd25f06	fix(amp): set status on claude stream errors	2025-12-11 20:12:06 +08:00
hkfires	e79f65fd8e	refactor(thinking): use parentheses for metadata suffix	2025-12-11 18:39:07 +08:00
Luis Pater	2760989401	Merge pull request #485 from router-for-me/think Think	2025-12-11 18:27:00 +08:00
hkfires	facfe7c518	refactor(thinking): use bracket tags for thinking meta Align thinking suffix handling on a single bracket-style marker. NormalizeThinkingModel strips a terminal `[value]` segment from model identifiers and turns it into either a thinking budget (for numeric values) or a reasoning effort hint (for strings). Emission of `ThinkingIncludeThoughtsMetadataKey` is removed. Executor helpers and the example config are updated so their comments reference the new `[value]` suffix format instead of the legacy dash variants. BREAKING CHANGE: dash-based thinking suffixes (`-thinking`, `-thinking-N`, `-reasoning`, `-nothinking`) are no longer parsed for thinking metadata; only `[value]` annotations are recognized.	2025-12-11 18:17:28 +08:00
hkfires	6285459c08	fix(runtime): unify claude thinking config resolution	2025-12-11 17:20:44 +08:00
hkfires	21bbceca0c	docs(runtime): document reasoning effort precedence	2025-12-11 16:35:36 +08:00
hkfires	f6300c72b7	fix(runtime): validate thinking config in iflow and qwen	2025-12-11 16:21:50 +08:00
hkfires	007572b58e	fix(util): do not strip thinking suffix on registered models NormalizeThinkingModel now checks ModelSupportsThinking before removing "-thinking" or "-thinking-<ver>", avoiding accidental parsing of model names where the suffix is part of the official id (e.g., kimi-k2-thinking, qwen3-235b-a22b-thinking-2507). The registry adds ThinkingSupport metadata for several models and propagates it via ModelInfo (e.g., kimi-k2-thinking, deepseek-r1, qwen3-235b-a22b-thinking-2507, minimax-m2), enabling accurate detection of thinking-capable models and correcting base model inference.	2025-12-11 15:52:14 +08:00
hkfires	3a81ab22fd	fix(runtime): unify reasoning effort metadata overrides	2025-12-11 14:35:05 +08:00
hkfires	519da2e042	fix(runtime): validate reasoning effort levels	2025-12-11 12:36:54 +08:00
hkfires	169f4295d0	fix(util): align reasoning effort handling with registry	2025-12-11 12:20:12 +08:00
hkfires	d06d0eab2f	fix(util): centralize reasoning effort normalization	2025-12-11 12:14:51 +08:00
hkfires	3ffd120ae9	feat(runtime): add thinking config normalization	2025-12-11 11:51:33 +08:00
hkfires	a03d514095	feat(registry): add thinking metadata for models	2025-12-11 11:28:44 +08:00
Luis Pater	69fccf0015	Merge pull request #18 from Ravens2121/master Kiro Executor Stability and API Compatibility Improvements	2025-12-11 08:35:22 +08:00
Luis Pater	1da03bfe15	Merge pull request #479 from router-for-me/claude fix(claude): prevent final events when no content streamed	2025-12-11 08:18:59 +08:00
Ravens2121	6133bac226	feat(kiro): enhance Kiro executor stability and compatibility ## Changes Overview This commit includes multiple improvements to the Kiro executor for better stability, API compatibility, and code quality. ## Detailed Changes ### 1. Output Token Calculation Improvement (lines 317-330) - Replace simple len(content)/4 estimation with tiktoken-based calculation - Add fallback to character count estimation if tiktoken fails - Improves token counting accuracy for usage tracking ### 2. Stream Handler Panic Recovery (lines 528-533) - Add defer/recover block in streamToChannel goroutine - Prevents single request crashes from affecting the entire service ### 3. Struct Field Reordering (lines 670-673) - Reorder kiroToolResult struct fields: Content, Status, ToolUseID - Ensures consistency with API expectations ### 4. Message Merging Function (lines 778-780, 2356-2483) - Add mergeAdjacentMessages() to combine consecutive messages with same role - Add helper functions: mergeMessageContent(), blockToMap(), createMergedMessage() - Required by Kiro API which doesn't allow adjacent messages from same role ### 5. Empty Content Handling (lines 791-800) - Add default content for empty history messages - User messages with tool results: "Tool results provided." - User messages without tool results: "Continue" ### 6. Assistant Last Message Handling (lines 811-830) - Detect when last message is from assistant - Create synthetic "Continue" user message to satisfy Kiro API requirements - Kiro API requires currentMessage to be userInputMessage type ### 7. Duplicate Content Event Detection (lines 1650-1660) - Track lastContentEvent to detect duplicate streaming events - Skip redundant events to prevent duplicate content in responses - Based on AIClient-2-API implementation for Kiro ### 8. Streaming Token Calculation Enhancement (lines 1785-1817) - Add accumulatedContent buffer for streaming token calculation - Use tiktoken for accurate output token counting during streaming - Add fallback to character count estimation with proper logging ### 9. JSON Repair Enhancement (lines 2665-2818) - Implement conservative JSON repair strategy - First try to parse JSON directly - if valid, return unchanged - Add bracket balancing detection and repair - Only repair when necessary to avoid corrupting valid JSON - Validate repaired JSON before returning ### 10. HELIOS_CHK Filtering Removal (lines 2500-2504, 3004-3039) - Remove filterHeliosDebugInfo function - Remove heliosDebugPattern regex - HELIOS_CHK fields now handled by client-side processing ### 11. Comment Translation - Translate Chinese comments to English for code consistency - Affected areas: token calculation, buffer handling, message processing	2025-12-11 08:13:33 +08:00
Ravens2121	f302be5ce6	Merge branch 'router-for-me:main' into master	2025-12-11 05:40:10 +08:00
Ravens2121	cd4e84a360	feat(kiro): enhance request format, stream handling, and usage tracking ## English Description ### Request Format Fixes - Fix conversationState field order (chatTriggerType must be first) - Add conditional profileArn inclusion based on auth method - builder-id auth (AWS SSO) doesn't require profileArn - social auth (Google OAuth) requires profileArn ### Stream Processing Enhancements - Add headersLen boundary validation to prevent slice out of bounds - Handle incomplete tool use at EOF by flushing pending data - Separate message_delta and message_stop events for proper streaming - Add error logging for JSON unmarshal failures ### JSON Repair Improvements - Add escapeNewlinesInStrings() to handle control characters in JSON strings - Remove incorrect unquotedKeyPattern that broke valid JSON content - Fix handling of streaming fragments with embedded newlines/tabs ### Debug Info Filtering (Optional) - Add filterHeliosDebugInfo() to remove [HELIOS_CHK] blocks - Pattern matches internal state tracking from Kiro/Amazon Q - Currently disabled pending further testing ### Usage Tracking - Add usage information extraction in message_delta response - Include prompt_tokens, completion_tokens, total_tokens in OpenAI format --- ## 中文描述 ### 请求格式修复 - 修复 conversationState 字段顺序（chatTriggerType 必须在第一位） - 根据认证方式条件性包含 profileArn - builder-id 认证（AWS SSO）不需要 profileArn - social 认证（Google OAuth）需要 profileArn ### 流处理增强 - 添加 headersLen 边界验证，防止切片越界 - 在 EOF 时处理未完成的工具调用，刷新待处理数据 - 分离 message_delta 和 message_stop 事件以实现正确的流式传输 - 添加 JSON 反序列化失败的错误日志 ### JSON 修复改进 - 添加 escapeNewlinesInStrings() 处理 JSON 字符串中的控制字符 - 移除错误的 unquotedKeyPattern，该模式会破坏有效的 JSON 内容 - 修复包含嵌入换行符/制表符的流式片段处理 ### 调试信息过滤（可选） - 添加 filterHeliosDebugInfo() 移除 [HELIOS_CHK] 块 - 模式匹配来自 Kiro/Amazon Q 的内部状态跟踪信息 - 目前已禁用，等待进一步测试 ### 使用量跟踪 - 在 message_delta 响应中添加 usage 信息提取 - 以 OpenAI 格式包含 prompt_tokens、completion_tokens、total_tokens	2025-12-11 05:37:22 +08:00
Luis Pater	4360ed8a7b	Merge branch 'router-for-me:main' into main	2025-12-11 03:17:55 +08:00
Luis Pater	423ce97665	feat(util): implement dynamic thinking suffix normalization and refactor budget resolution logic - Added support for parsing and normalizing dynamic thinking model suffixes. - Centralized budget resolution across executors and payload helpers. - Retired legacy Gemini-specific thinking handlers in favor of unified logic. - Updated executors to use metadata-based thinking configuration. - Added `ResolveOriginalModel` utility for resolving normalized upstream models using request metadata. - Updated executors (Gemini, Codex, iFlow, OpenAI, Qwen) to incorporate upstream model resolution and substitute model values in payloads and request URLs. - Ensured fallbacks handle cases with missing or malformed metadata to derive models robustly. - Refactored upstream model resolution to dynamically incorporate metadata for selecting and normalizing models. - Improved handling of thinking configurations and model overrides in executors. - Removed hardcoded thinking model entries and migrated logic to metadata-based resolution. - Updated payload mutations to always include the resolved model.	2025-12-11 03:10:50 +08:00
Luis Pater	b27a175fef	Merge pull request #17 from Ravens2121/master 由AI进行更改修复了Kiro供应商的Claude协议与OpenAI协议。（对比AIClient-2-API项目进行变更）	2025-12-11 01:31:47 +08:00
Ravens	8d5f89ccfd	fix(kiro): fix translator format mismatch for OpenAI protocol Amp-Thread-ID: https://ampcode.com/threads/T-019b092b-f2de-72a1-b428-72511c0de628 Co-authored-by: Amp <amp@ampcode.com>	2025-12-11 01:15:00 +08:00
Ravens	084e2666cb	fix(kiro): add SSE event: prefix for Claude client compatibility Amp-Thread-ID: https://ampcode.com/threads/T-019b08fc-ff96-766e-a942-63dd35ed28c6 Co-authored-by: Amp <amp@ampcode.com>	2025-12-11 00:14:20 +08:00
Luis Pater	2eb2dbb266	Merge branch 'router-for-me:main' into main	2025-12-10 23:56:59 +08:00
Luis Pater	e717939edb	Fixed: #478 feat(antigravity): add support for inline image data in client responses	2025-12-10 23:55:53 +08:00
Ravens2121	7758a86d1e	Merge branch 'router-for-me:main' into main	2025-12-10 23:25:23 +08:00
sususu	76c563d161	fix(executor): increase buffer size for stream scanners to 50MB across multiple executors	2025-12-10 23:20:04 +08:00
hkfires	a89514951f	fix(claude): prevent final events when no content streamed	2025-12-10 22:19:55 +08:00
Luis Pater	9f72a875f8	Merge branch 'router-for-me:main' into main	2025-12-10 16:54:31 +08:00
Luis Pater	94d61c7b2b	fix(logging): update response aggregation logic to include all attempts	2025-12-10 16:53:48 +08:00
Luis Pater	f999650322	Merge branch 'router-for-me:main' into main	2025-12-10 16:04:18 +08:00
Luis Pater	1249b07eb8	feat(responses): add unique identifiers for responses, function calls, and tool uses	2025-12-10 16:02:54 +08:00
Luis Pater	38319b0483	Merge branch 'router-for-me:main' into main	2025-12-10 15:29:19 +08:00
Luis Pater	6b37f33d31	feat(antigravity): add unique identifier for tool use blocks in response	2025-12-10 15:27:57 +08:00
Luis Pater	af543238aa	Merge pull request #16 from fuko2935/fix/remove-unstable-kiro-auto fix(registry): remove unstable kiro-auto model	2025-12-10 00:23:31 +08:00
Luis Pater	2de27c560b	Merge pull request #15 from StriveMario/feature/fix-kiro-refreshtoken fix kiro cannot refresh the token	2025-12-10 00:22:36 +08:00
Luis Pater	773ed6cc64	Merge branch 'router-for-me:main' into main	2025-12-10 00:17:43 +08:00
fuko2935	a594338bc5	fix(registry): remove unstable kiro-auto model - Removes kiro-auto from static model registry - Removes kiro-auto mapping from executor - Fixes compatibility issues reported in #7 Fixes #7	2025-12-09 19:14:40 +03:00
Luis Pater	f25f419e5a	fix(antigravity): remove references to `autopush` endpoint and update fallback logic	2025-12-10 00:13:20 +08:00
Luis Pater	1fd1ccca17	Merge branch 'router-for-me:main' into main	2025-12-09 21:13:08 +08:00
Luis Pater	b7e382008f	Merge pull request #465 from router-for-me/think Move thinking budget normalization from translators to executor	2025-12-09 21:10:33 +08:00
hkfires	70d6b95097	feat(amp): add /news.rss proxy route	2025-12-09 21:05:06 +08:00
hkfires	9b202b6c1c	fix(executor): centralize default thinking config	2025-12-09 21:05:06 +08:00
hkfires	6a66b6801a	feat(executor): enforce minimum thinking budget for antigravity models	2025-12-09 21:05:06 +08:00
hkfires	5b6d201408	refactor(translator): remove thinking budget normalization across all translators	2025-12-09 21:05:06 +08:00
hkfires	5ec9b5e5a9	feat(executor): normalize thinking budget across all Gemini executors	2025-12-09 21:05:06 +08:00
Luis Pater	5db3b58717	Merge pull request #470 from router-for-me/agry fix(gemini): normalize model listing output	2025-12-09 21:00:29 +08:00
Mario	1fa5514d56	fix kiro cannot refresh the token	2025-12-09 20:13:16 +08:00
hkfires	347769b3e3	fix(openai-compat): use model id for auth model display	2025-12-09 18:09:14 +08:00
hkfires	3cfe7008a2	fix(registry): update gpt 5.1 model names	2025-12-09 17:55:21 +08:00
hkfires	da23ddb061	fix(gemini): normalize model listing output	2025-12-09 17:34:15 +08:00
hkfires	e5312fb5a2	feat(antigravity): support canonical names for antigravity models	2025-12-09 16:54:13 +08:00
Ravens2121	1770c491db	Merge branch 'router-for-me:main' into main	2025-12-07 22:25:42 +08:00
Your Name	2bf9e08b31	style(kiro): convert Chinese comments to English in base64 image handling	2025-12-07 21:51:24 +08:00
Ravens2121	f56bfaa689	Merge branch 'router-for-me:main' into main	2025-12-07 21:35:16 +08:00
Your Name	5d716dc796	fix(kiro): 修复 base64 图片格式转换问题	2025-12-07 21:34:44 +08:00