Merge branch 'router-for-me:main' into main

feat(runtime): add model alias support and enhance payload rule matching
- Introduced `payloadModelAliases` and `payloadModelCandidates` functions to support model aliases for improved flexibility. - Updated rule matching logic to handle multiple model candidates. - Refactored variable naming in executor to improve code clarity and consistency.
2026-03-09 23:33:24 +00:00 · 2026-01-17 05:17:38 +08:00 · 2026-01-17 05:05:24 +08:00 · 2026-01-17 04:44:09 +08:00 · 2026-01-17 04:12:29 +08:00 · 2026-01-16 20:01:35 +08:00
20 changed files with 683 additions and 47 deletions
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -71,7 +71,7 @@ type Config struct {
 	// WebsocketAuth enables or disables authentication for the WebSocket API.
 	WebsocketAuth bool `yaml:"ws-auth" json:"ws-auth"`

-	// CodexInstructionsEnabled controls whether custom Codex instructions are injected.
+	// CodexInstructionsEnabled controls whether official Codex instructions are injected.
 	// When false (default), CodexInstructionsForModel returns immediately without modification.
 	// When true, the original instruction injection logic is used.
 	CodexInstructionsEnabled bool `yaml:"codex-instructions-enabled" json:"codex-instructions-enabled"`
--- a/internal/config/oauth_model_alias_migration.go
+++ b/internal/config/oauth_model_alias_migration.go
@@ -122,6 +122,23 @@ func migrateFromOldField(configFile string, root *yaml.Node, rootMap *yaml.Node,
 		newAliases[channel] = converted
 	}

+	// For antigravity channel, supplement missing default aliases
+	if antigravityEntries, exists := newAliases["antigravity"]; exists {
+		// Build a set of already configured model names (upstream names)
+		configuredModels := make(map[string]bool, len(antigravityEntries))
+		for _, entry := range antigravityEntries {
+			configuredModels[entry.Name] = true
+		}
+
+		// Add missing default aliases
+		for _, defaultAlias := range defaultAntigravityAliases() {
+			if !configuredModels[defaultAlias.Name] {
+				antigravityEntries = append(antigravityEntries, defaultAlias)
+			}
+		}
+		newAliases["antigravity"] = antigravityEntries
+	}
+
 	// Build new node
 	newNode := buildOAuthModelAliasNode(newAliases)

--- a/internal/config/oauth_model_alias_migration_test.go
+++ b/internal/config/oauth_model_alias_migration_test.go
@@ -114,6 +114,23 @@ func TestMigrateOAuthModelAlias_ConvertsAntigravityModels(t *testing.T) {
 	if !strings.Contains(content, "gemini-3-pro-high") {
 		t.Fatal("expected gemini-3-pro-preview to be converted to gemini-3-pro-high")
 	}
+
+	// Verify missing default aliases were supplemented
+	if !strings.Contains(content, "gemini-3-pro-image") {
+		t.Fatal("expected missing default alias gemini-3-pro-image to be added")
+	}
+	if !strings.Contains(content, "gemini-3-flash") {
+		t.Fatal("expected missing default alias gemini-3-flash to be added")
+	}
+	if !strings.Contains(content, "claude-sonnet-4-5") {
+		t.Fatal("expected missing default alias claude-sonnet-4-5 to be added")
+	}
+	if !strings.Contains(content, "claude-sonnet-4-5-thinking") {
+		t.Fatal("expected missing default alias claude-sonnet-4-5-thinking to be added")
+	}
+	if !strings.Contains(content, "claude-opus-4-5-thinking") {
+		t.Fatal("expected missing default alias claude-opus-4-5-thinking to be added")
+	}
 }

 func TestMigrateOAuthModelAlias_AddsDefaultIfNeitherExists(t *testing.T) {
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -847,6 +847,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
 		},
 		{
 			ID:                  "gpt-5-mini",
@@ -858,6 +859,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5 Mini via GitHub Copilot",
 			ContextLength:       128000,
 			MaxCompletionTokens: 16384,
+			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
 		},
 		{
 			ID:                  "gpt-5-codex",
@@ -869,6 +871,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5 Codex via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/responses"},
 		},
 		{
 			ID:                  "gpt-5.1",
@@ -880,6 +883,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5.1 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
 		},
 		{
 			ID:                  "gpt-5.1-codex",
@@ -891,6 +895,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5.1 Codex via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/responses"},
 		},
 		{
 			ID:                  "gpt-5.1-codex-mini",
@@ -902,6 +907,19 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5.1 Codex Mini via GitHub Copilot",
 			ContextLength:       128000,
 			MaxCompletionTokens: 16384,
+			SupportedEndpoints:  []string{"/responses"},
+		},
+		{
+			ID:                  "gpt-5.1-codex-max",
+			Object:              "model",
+			Created:             now,
+			OwnedBy:             "github-copilot",
+			Type:                "github-copilot",
+			DisplayName:         "GPT-5.1 Codex Max",
+			Description:         "OpenAI GPT-5.1 Codex Max via GitHub Copilot",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/responses"},
 		},
 		{
 			ID:                  "gpt-5.2",
@@ -913,6 +931,19 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "OpenAI GPT-5.2 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
+		},
+		{
+			ID:                  "gpt-5.2-codex",
+			Object:              "model",
+			Created:             now,
+			OwnedBy:             "github-copilot",
+			Type:                "github-copilot",
+			DisplayName:         "GPT-5.2 Codex",
+			Description:         "OpenAI GPT-5.2 Codex via GitHub Copilot",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32768,
+			SupportedEndpoints:  []string{"/responses"},
 		},
 		{
 			ID:                  "claude-haiku-4.5",
@@ -924,6 +955,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "Anthropic Claude Haiku 4.5 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "claude-opus-4.1",
@@ -935,6 +967,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "Anthropic Claude Opus 4.1 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "claude-opus-4.5",
@@ -946,6 +979,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "Anthropic Claude Opus 4.5 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "claude-sonnet-4",
@@ -957,6 +991,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "Anthropic Claude Sonnet 4 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "claude-sonnet-4.5",
@@ -968,6 +1003,7 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			Description:         "Anthropic Claude Sonnet 4.5 via GitHub Copilot",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			SupportedEndpoints:  []string{"/chat/completions"},
 		},
 		{
 			ID:                  "gemini-2.5-pro",
@@ -981,13 +1017,24 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			MaxCompletionTokens: 65536,
 		},
 		{
-			ID:                  "gemini-3-pro",
+			ID:                  "gemini-3-pro-preview",
 			Object:              "model",
 			Created:             now,
 			OwnedBy:             "github-copilot",
 			Type:                "github-copilot",
-			DisplayName:         "Gemini 3 Pro",
-			Description:         "Google Gemini 3 Pro via GitHub Copilot",
+			DisplayName:         "Gemini 3 Pro (Preview)",
+			Description:         "Google Gemini 3 Pro Preview via GitHub Copilot",
+			ContextLength:       1048576,
+			MaxCompletionTokens: 65536,
+		},
+		{
+			ID:                  "gemini-3-flash-preview",
+			Object:              "model",
+			Created:             now,
+			OwnedBy:             "github-copilot",
+			Type:                "github-copilot",
+			DisplayName:         "Gemini 3 Flash (Preview)",
+			Description:         "Google Gemini 3 Flash Preview via GitHub Copilot",
 			ContextLength:       1048576,
 			MaxCompletionTokens: 65536,
 		},
@@ -1003,15 +1050,16 @@ func GetGitHubCopilotModels() []*ModelInfo {
 			MaxCompletionTokens: 16384,
 		},
 		{
-			ID:                  "raptor-mini",
+			ID:                  "oswe-vscode-prime",
 			Object:              "model",
 			Created:             now,
 			OwnedBy:             "github-copilot",
 			Type:                "github-copilot",
-			DisplayName:         "Raptor Mini",
-			Description:         "Raptor Mini via GitHub Copilot",
+			DisplayName:         "Raptor mini (Preview)",
+			Description:         "Raptor mini via GitHub Copilot",
 			ContextLength:       128000,
 			MaxCompletionTokens: 16384,
+			SupportedEndpoints:  []string{"/chat/completions", "/responses"},
 		},
 	}
 }
@@ -1020,6 +1068,18 @@ func GetGitHubCopilotModels() []*ModelInfo {
 func GetKiroModels() []*ModelInfo {
 	return []*ModelInfo{
 		// --- Base Models ---
+		{
+			ID:                  "kiro-auto",
+			Object:              "model",
+			Created:             1732752000,
+			OwnedBy:             "aws",
+			Type:                "kiro",
+			DisplayName:         "Kiro Auto",
+			Description:         "Automatic model selection by Kiro",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true},
+		},
 		{
 			ID:                  "kiro-claude-opus-4-5",
 			Object:              "model",
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -47,6 +47,8 @@ type ModelInfo struct {
 	MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
 	// SupportedParameters lists supported parameters
 	SupportedParameters []string `json:"supported_parameters,omitempty"`
+	// SupportedEndpoints lists supported API endpoints (e.g., "/chat/completions", "/responses").
+	SupportedEndpoints []string `json:"supported_endpoints,omitempty"`

 	// Thinking holds provider-specific reasoning/thinking budget capabilities.
 	// This is optional and currently used for Gemini thinking budget normalization.
@@ -476,6 +478,9 @@ func cloneModelInfo(model *ModelInfo) *ModelInfo {
 	if len(model.SupportedParameters) > 0 {
 		copyModel.SupportedParameters = append([]string(nil), model.SupportedParameters...)
 	}
+	if len(model.SupportedEndpoints) > 0 {
+		copyModel.SupportedEndpoints = append([]string(nil), model.SupportedEndpoints...)
+	}
 	return &copyModel
 }

@@ -988,6 +993,9 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string)
 		if len(model.SupportedParameters) > 0 {
 			result["supported_parameters"] = model.SupportedParameters
 		}
+		if len(model.SupportedEndpoints) > 0 {
+			result["supported_endpoints"] = model.SupportedEndpoints
+		}
 		return result

 	case "claude", "kiro", "antigravity":
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -517,8 +517,8 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte {
 		}
 		if usageResult := responseNode.Get("usageMetadata"); usageResult.Exists() {
 			usageRaw = usageResult.Raw
-		} else if usageResult := root.Get("usageMetadata"); usageResult.Exists() {
-			usageRaw = usageResult.Raw
+		} else if usageMetadataResult := root.Get("usageMetadata"); usageMetadataResult.Exists() {
+			usageRaw = usageMetadataResult.Raw
 		}

 		if partsResult := responseNode.Get("candidates.0.content.parts"); partsResult.IsArray() {
@@ -642,7 +642,6 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 			err = errReq
 			return nil, err
 		}
-
 		httpResp, errDo := httpClient.Do(httpReq)
 		if errDo != nil {
 			recordAPIResponseError(ctx, e.cfg, errDo)
@@ -1004,10 +1003,10 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 			case "chat_20706", "chat_23310", "gemini-2.5-flash-thinking", "gemini-3-pro-low", "gemini-2.5-pro":
 				continue
 			}
-			cfg := modelConfig[modelID]
+			modelCfg := modelConfig[modelID]
 			modelName := modelID
-			if cfg != nil && cfg.Name != "" {
-				modelName = cfg.Name
+			if modelCfg != nil && modelCfg.Name != "" {
+				modelName = modelCfg.Name
 			}
 			modelInfo := &registry.ModelInfo{
 				ID:          modelID,
@@ -1021,12 +1020,12 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 				Type:        antigravityAuthType,
 			}
 			// Look up Thinking support from static config using upstream model name.
-			if cfg != nil {
-				if cfg.Thinking != nil {
-					modelInfo.Thinking = cfg.Thinking
+			if modelCfg != nil {
+				if modelCfg.Thinking != nil {
+					modelInfo.Thinking = modelCfg.Thinking
 				}
-				if cfg.MaxCompletionTokens > 0 {
-					modelInfo.MaxCompletionTokens = cfg.MaxCompletionTokens
+				if modelCfg.MaxCompletionTokens > 0 {
+					modelInfo.MaxCompletionTokens = modelCfg.MaxCompletionTokens
 				}
 			}
 			models = append(models, modelInfo)
@@ -1221,7 +1220,7 @@ func (e *AntigravityExecutor) buildRequest(ctx context.Context, auth *cliproxyau
 		payload = []byte(strJSON)
 	}

-	if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-preview") {
+	if strings.Contains(modelName, "claude") || strings.Contains(modelName, "gemini-3-pro-high") {
 		systemInstructionPartsResult := gjson.GetBytes(payload, "request.systemInstruction.parts")
 		payload, _ = sjson.SetBytes(payload, "request.systemInstruction.role", "user")
 		payload, _ = sjson.SetBytes(payload, "request.systemInstruction.parts.0.text", systemInstruction)
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -106,6 +106,10 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
+	body, _ = sjson.DeleteBytes(body, "safety_identifier")
+	if !gjson.GetBytes(body, "instructions").Exists() {
+		body, _ = sjson.SetBytes(body, "instructions", "")
+	}

 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -212,7 +216,11 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
+	body, _ = sjson.DeleteBytes(body, "safety_identifier")
 	body, _ = sjson.SetBytes(body, "model", baseModel)
+	if !gjson.GetBytes(body, "instructions").Exists() {
+		body, _ = sjson.SetBytes(body, "instructions", "")
+	}

 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -316,7 +324,11 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	body, _ = sjson.SetBytes(body, "model", baseModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
+	body, _ = sjson.DeleteBytes(body, "safety_identifier")
 	body, _ = sjson.SetBytes(body, "stream", false)
+	if !gjson.GetBytes(body, "instructions").Exists() {
+		body, _ = sjson.SetBytes(body, "instructions", "")
+	}

 	enc, err := tokenizerForCodexModel(baseModel)
 	if err != nil {
--- a/internal/runtime/executor/github_copilot_executor.go
+++ b/internal/runtime/executor/github_copilot_executor.go
@@ -23,6 +23,7 @@ import (
 const (
 	githubCopilotBaseURL       = "https://api.githubcopilot.com"
 	githubCopilotChatPath      = "/chat/completions"
+	githubCopilotResponsesPath = "/responses"
 	githubCopilotAuthType      = "github-copilot"
 	githubCopilotTokenCacheTTL = 25 * time.Minute
 	// tokenExpiryBuffer is the time before expiry when we should refresh the token.
@@ -106,7 +107,11 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	defer reporter.trackFailure(ctx, &err)

 	from := opts.SourceFormat
+	useResponses := useGitHubCopilotResponsesEndpoint(from)
 	to := sdktranslator.FromString("openai")
+	if useResponses {
+		to = sdktranslator.FromString("openai-response")
+	}
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
@@ -117,7 +122,11 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "stream", false)

-	url := githubCopilotBaseURL + githubCopilotChatPath
+	path := githubCopilotChatPath
+	if useResponses {
+		path = githubCopilotResponsesPath
+	}
+	url := githubCopilotBaseURL + path
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
 	if err != nil {
 		return resp, err
@@ -172,6 +181,9 @@ func (e *GitHubCopilotExecutor) Execute(ctx context.Context, auth *cliproxyauth.
 	appendAPIResponseChunk(ctx, e.cfg, data)

 	detail := parseOpenAIUsage(data)
+	if useResponses && detail.TotalTokens == 0 {
+		detail = parseOpenAIResponsesUsage(data)
+	}
 	if detail.TotalTokens > 0 {
 		reporter.publish(ctx, detail)
 	}
@@ -194,7 +206,11 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	defer reporter.trackFailure(ctx, &err)

 	from := opts.SourceFormat
+	useResponses := useGitHubCopilotResponsesEndpoint(from)
 	to := sdktranslator.FromString("openai")
+	if useResponses {
+		to = sdktranslator.FromString("openai-response")
+	}
 	originalPayload := bytes.Clone(req.Payload)
 	if len(opts.OriginalRequest) > 0 {
 		originalPayload = bytes.Clone(opts.OriginalRequest)
@@ -205,9 +221,15 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 	body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	// Enable stream options for usage stats in stream
-	body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
+	if !useResponses {
+		body, _ = sjson.SetBytes(body, "stream_options.include_usage", true)
+	}

-	url := githubCopilotBaseURL + githubCopilotChatPath
+	path := githubCopilotChatPath
+	if useResponses {
+		path = githubCopilotResponsesPath
+	}
+	url := githubCopilotBaseURL + path
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
 	if err != nil {
 		return nil, err
@@ -283,6 +305,10 @@ func (e *GitHubCopilotExecutor) ExecuteStream(ctx context.Context, auth *cliprox
 				}
 				if detail, ok := parseOpenAIStreamUsage(line); ok {
 					reporter.publish(ctx, detail)
+				} else if useResponses {
+					if detail, ok := parseOpenAIResponsesStreamUsage(line); ok {
+						reporter.publish(ctx, detail)
+					}
 				}
 			}

@@ -393,6 +419,10 @@ func (e *GitHubCopilotExecutor) normalizeModel(_ string, body []byte) []byte {
 	return body
 }

+func useGitHubCopilotResponsesEndpoint(sourceFormat sdktranslator.Format) bool {
+	return sourceFormat.String() == "openai-response"
+}
+
 // isHTTPSuccess checks if the status code indicates success (2xx).
 func isHTTPSuccess(statusCode int) bool {
 	return statusCode >= 200 && statusCode < 300
--- a/internal/runtime/executor/kiro_executor.go
+++ b/internal/runtime/executor/kiro_executor.go
@@ -53,6 +53,7 @@ const (
 	kiroIDEUserAgent     = "aws-sdk-js/1.0.18 ua/2.1 os/darwin#25.0.0 lang/js md/nodejs#20.16.0 api/codewhispererstreaming#1.0.18 m/E KiroIDE-0.2.13-66c23a8c5d15afabec89ef9954ef52a119f10d369df04d548fc6c1eac694b0d1"
 	kiroIDEAmzUserAgent  = "aws-sdk-js/1.0.18 KiroIDE-0.2.13-66c23a8c5d15afabec89ef9954ef52a119f10d369df04d548fc6c1eac694b0d1"
 	kiroIDEAgentModeSpec = "spec"
+	kiroAgentModeVibe    = "vibe"
 )

 // Real-time usage estimation configuration
@@ -98,7 +99,7 @@ var kiroEndpointConfigs = []kiroEndpointConfig{
 		Name:      "CodeWhisperer",
 	},
 	{
-		URL:       "https://q.us-east-1.amazonaws.com/",
+		URL:       "https://q.us-east-1.amazonaws.com/generateAssistantResponse",
 		Origin:    "CLI",
 		AmzTarget: "AmazonQDeveloperStreamingService.SendMessage",
 		Name:      "AmazonQ",
@@ -232,7 +233,9 @@ func (e *KiroExecutor) PrepareRequest(req *http.Request, auth *cliproxyauth.Auth
 	} else {
 		req.Header.Set("User-Agent", kiroUserAgent)
 		req.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
+		req.Header.Set("x-amzn-kiro-agent-mode", kiroAgentModeVibe)
 	}
+	req.Header.Set("x-amzn-codewhisperer-optout", "true")
 	req.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
 	req.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
 	req.Header.Set("Authorization", "Bearer "+accessToken)
@@ -350,7 +353,9 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
 			} else {
 				httpReq.Header.Set("User-Agent", kiroUserAgent)
 				httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
+				httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroAgentModeVibe)
 			}
+			httpReq.Header.Set("x-amzn-codewhisperer-optout", "true")
 			httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
 			httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())

@@ -683,7 +688,9 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
 			} else {
 				httpReq.Header.Set("User-Agent", kiroUserAgent)
 				httpReq.Header.Set("X-Amz-User-Agent", kiroFullUserAgent)
+				httpReq.Header.Set("x-amzn-kiro-agent-mode", kiroAgentModeVibe)
 			}
+			httpReq.Header.Set("x-amzn-codewhisperer-optout", "true")
 			httpReq.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
 			httpReq.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())

--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -25,6 +25,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	if model == "" {
 		return payload
 	}
+	candidates := payloadModelCandidates(cfg, model, protocol)
 	out := payload
 	source := original
 	if len(source) == 0 {
@@ -34,7 +35,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply default rules: first write wins per field across all matching rules.
 	for i := range rules.Default {
 		rule := &rules.Default[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -59,7 +60,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply default raw rules: first write wins per field across all matching rules.
 	for i := range rules.DefaultRaw {
 		rule := &rules.DefaultRaw[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -88,7 +89,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply override rules: last write wins per field across all matching rules.
 	for i := range rules.Override {
 		rule := &rules.Override[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -106,7 +107,7 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	// Apply override raw rules: last write wins per field across all matching rules.
 	for i := range rules.OverrideRaw {
 		rule := &rules.OverrideRaw[i]
-		if !payloadRuleMatchesModel(rule, model, protocol) {
+		if !payloadRuleMatchesModels(rule, protocol, candidates) {
 			continue
 		}
 		for path, value := range rule.Params {
@@ -128,6 +129,18 @@ func applyPayloadConfigWithRoot(cfg *config.Config, model, protocol, root string
 	return out
 }

+func payloadRuleMatchesModels(rule *config.PayloadRule, protocol string, models []string) bool {
+	if rule == nil || len(models) == 0 {
+		return false
+	}
+	for _, model := range models {
+		if payloadRuleMatchesModel(rule, model, protocol) {
+			return true
+		}
+	}
+	return false
+}
+
 func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) bool {
 	if rule == nil {
 		return false
@@ -150,6 +163,65 @@ func payloadRuleMatchesModel(rule *config.PayloadRule, model, protocol string) b
 	return false
 }

+func payloadModelCandidates(cfg *config.Config, model, protocol string) []string {
+	model = strings.TrimSpace(model)
+	if model == "" {
+		return nil
+	}
+	candidates := []string{model}
+	if cfg == nil {
+		return candidates
+	}
+	aliases := payloadModelAliases(cfg, model, protocol)
+	if len(aliases) == 0 {
+		return candidates
+	}
+	seen := map[string]struct{}{strings.ToLower(model): struct{}{}}
+	for _, alias := range aliases {
+		alias = strings.TrimSpace(alias)
+		if alias == "" {
+			continue
+		}
+		key := strings.ToLower(alias)
+		if _, ok := seen[key]; ok {
+			continue
+		}
+		seen[key] = struct{}{}
+		candidates = append(candidates, alias)
+	}
+	return candidates
+}
+
+func payloadModelAliases(cfg *config.Config, model, protocol string) []string {
+	if cfg == nil {
+		return nil
+	}
+	model = strings.TrimSpace(model)
+	if model == "" {
+		return nil
+	}
+	channel := strings.ToLower(strings.TrimSpace(protocol))
+	if channel == "" {
+		return nil
+	}
+	entries := cfg.OAuthModelAlias[channel]
+	if len(entries) == 0 {
+		return nil
+	}
+	aliases := make([]string, 0, 2)
+	for _, entry := range entries {
+		if !strings.EqualFold(strings.TrimSpace(entry.Name), model) {
+			continue
+		}
+		alias := strings.TrimSpace(entry.Alias)
+		if alias == "" {
+			continue
+		}
+		aliases = append(aliases, alias)
+	}
+	return aliases
+}
+
 // buildPayloadPath combines an optional root path with a relative parameter path.
 // When root is empty, the parameter path is used as-is. When root is non-empty,
 // the parameter path is treated as relative to root.
--- a/internal/runtime/executor/usage_helpers.go
+++ b/internal/runtime/executor/usage_helpers.go
@@ -236,6 +236,44 @@ func parseOpenAIStreamUsage(line []byte) (usage.Detail, bool) {
 	return detail, true
 }

+func parseOpenAIResponsesUsageDetail(usageNode gjson.Result) usage.Detail {
+	detail := usage.Detail{
+		InputTokens:  usageNode.Get("input_tokens").Int(),
+		OutputTokens: usageNode.Get("output_tokens").Int(),
+		TotalTokens:  usageNode.Get("total_tokens").Int(),
+	}
+	if detail.TotalTokens == 0 {
+		detail.TotalTokens = detail.InputTokens + detail.OutputTokens
+	}
+	if cached := usageNode.Get("input_tokens_details.cached_tokens"); cached.Exists() {
+		detail.CachedTokens = cached.Int()
+	}
+	if reasoning := usageNode.Get("output_tokens_details.reasoning_tokens"); reasoning.Exists() {
+		detail.ReasoningTokens = reasoning.Int()
+	}
+	return detail
+}
+
+func parseOpenAIResponsesUsage(data []byte) usage.Detail {
+	usageNode := gjson.ParseBytes(data).Get("usage")
+	if !usageNode.Exists() {
+		return usage.Detail{}
+	}
+	return parseOpenAIResponsesUsageDetail(usageNode)
+}
+
+func parseOpenAIResponsesStreamUsage(line []byte) (usage.Detail, bool) {
+	payload := jsonPayload(line)
+	if len(payload) == 0 || !gjson.ValidBytes(payload) {
+		return usage.Detail{}, false
+	}
+	usageNode := gjson.GetBytes(payload, "usage")
+	if !usageNode.Exists() {
+		return usage.Detail{}, false
+	}
+	return parseOpenAIResponsesUsageDetail(usageNode), true
+}
+
 func parseClaudeUsage(data []byte) usage.Detail {
 	usageNode := gjson.ParseBytes(data).Get("usage")
 	if !usageNode.Exists() {
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -52,7 +52,7 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	systemsResult := rootResult.Get("system")
 	if systemsResult.IsArray() {
 		systemResults := systemsResult.Array()
-		message := `{"type":"message","role":"user","content":[]}`
+		message := `{"type":"message","role":"developer","content":[]}`
 		for i := 0; i < len(systemResults); i++ {
 			systemResult := systemResults[i]
 			systemTypeResult := systemResult.Get("type")
@@ -245,21 +245,23 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	template, _ = sjson.Set(template, "include", []string{"reasoning.encrypted_content"})

 	// Add a first message to ignore system instructions and ensure proper execution.
-	inputResult := gjson.Get(template, "input")
-	if inputResult.Exists() && inputResult.IsArray() {
-		inputResults := inputResult.Array()
-		newInput := "[]"
-		for i := 0; i < len(inputResults); i++ {
-			if i == 0 {
-				firstText := inputResults[i].Get("content.0.text")
-				firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"
-				if firstText.Exists() && firstText.String() != firstInstructions {
-					newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`)
+	if misc.GetCodexInstructionsEnabled() {
+		inputResult := gjson.Get(template, "input")
+		if inputResult.Exists() && inputResult.IsArray() {
+			inputResults := inputResult.Array()
+			newInput := "[]"
+			for i := 0; i < len(inputResults); i++ {
+				if i == 0 {
+					firstText := inputResults[i].Get("content.0.text")
+					firstInstructions := "EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"
+					if firstText.Exists() && firstText.String() != firstInstructions {
+						newInput, _ = sjson.SetRaw(newInput, "-1", `{"type":"message","role":"user","content":[{"type":"input_text","text":"EXECUTE ACCORDING TO THE FOLLOWING INSTRUCTIONS!!!"}]}`)
+					}
 				}
+				newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw)
 			}
-			newInput, _ = sjson.SetRaw(newInput, "-1", inputResults[i].Raw)
+			template, _ = sjson.SetRaw(template, "input", newInput)
 		}
-		template, _ = sjson.SetRaw(template, "input", newInput)
 	}

 	return []byte(template)
--- a/internal/translator/codex/gemini/codex_gemini_request.go
+++ b/internal/translator/codex/gemini/codex_gemini_request.go
@@ -95,7 +95,7 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 	// System instruction -> as a user message with input_text parts
 	sysParts := root.Get("system_instruction.parts")
 	if sysParts.IsArray() {
-		msg := `{"type":"message","role":"user","content":[]}`
+		msg := `{"type":"message","role":"developer","content":[]}`
 		arr := sysParts.Array()
 		for i := 0; i < len(arr); i++ {
 			p := arr[i]
--- a/internal/translator/codex/openai/chat-completions/codex_openai_request.go
+++ b/internal/translator/codex/openai/chat-completions/codex_openai_request.go
@@ -33,7 +33,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 	rawJSON := bytes.Clone(inputRawJSON)
 	userAgent := misc.ExtractCodexUserAgent(rawJSON)
 	// Start with empty JSON object
-	out := `{}`
+	out := `{"instructions":""}`

 	// Stream must be set to true
 	out, _ = sjson.Set(out, "stream", stream)
@@ -98,7 +98,9 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 	// Extract system instructions from first system message (string or text object)
 	messages := gjson.GetBytes(rawJSON, "messages")
 	_, instructions := misc.CodexInstructionsForModel(modelName, "", userAgent)
-	out, _ = sjson.Set(out, "instructions", instructions)
+	if misc.GetCodexInstructionsEnabled() {
+		out, _ = sjson.Set(out, "instructions", instructions)
+	}
 	// if messages.IsArray() {
 	// 	arr := messages.Array()
 	// 	for i := 0; i < len(arr); i++ {
@@ -141,7 +143,7 @@ func ConvertOpenAIRequestToCodex(modelName string, inputRawJSON []byte, stream b
 				msg := `{}`
 				msg, _ = sjson.Set(msg, "type", "message")
 				if role == "system" {
-					msg, _ = sjson.Set(msg, "role", "user")
+					msg, _ = sjson.Set(msg, "role", "developer")
 				} else {
 					msg, _ = sjson.Set(msg, "role", role)
 				}
--- a/internal/translator/codex/openai/responses/codex_openai-responses_request.go
+++ b/internal/translator/codex/openai/responses/codex_openai-responses_request.go
@@ -74,6 +74,11 @@ func ConvertOpenAIResponsesRequestToCodex(modelName string, inputRawJSON []byte,
 	}

 	if hasOfficialInstructions {
+		newInput := "[]"
+		for _, item := range inputResults {
+			newInput, _ = sjson.SetRaw(newInput, "-1", item.Raw)
+		}
+		rawJSON, _ = sjson.SetRawBytes(rawJSON, "input", []byte(newInput))
 		return rawJSON
 	}
 	// log.Debugf("instructions not matched, %s\n", originalInstructions)
--- a/sdk/api/handlers/gemini/gemini_handlers.go
+++ b/sdk/api/handlers/gemini/gemini_handlers.go
@@ -56,8 +56,12 @@ func (h *GeminiAPIHandler) GeminiModels(c *gin.Context) {
 		for k, v := range model {
 			normalizedModel[k] = v
 		}
-		if name, ok := normalizedModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") {
-			normalizedModel["name"] = "models/" + name
+		if name, ok := normalizedModel["name"].(string); ok && name != "" {
+			if !strings.HasPrefix(name, "models/") {
+				normalizedModel["name"] = "models/" + name
+			}
+			normalizedModel["displayName"] = name
+			normalizedModel["description"] = name
 		}
 		if _, ok := normalizedModel["supportedGenerationMethods"]; !ok {
 			normalizedModel["supportedGenerationMethods"] = defaultMethods
--- a/sdk/api/handlers/openai/endpoint_compat.go
+++ b/sdk/api/handlers/openai/endpoint_compat.go
@@ -0,0 +1,37 @@
+package openai
+
+import "github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+
+const (
+	openAIChatEndpoint      = "/chat/completions"
+	openAIResponsesEndpoint = "/responses"
+)
+
+func resolveEndpointOverride(modelName, requestedEndpoint string) (string, bool) {
+	if modelName == "" {
+		return "", false
+	}
+	info := registry.GetGlobalRegistry().GetModelInfo(modelName)
+	if info == nil || len(info.SupportedEndpoints) == 0 {
+		return "", false
+	}
+	if endpointListContains(info.SupportedEndpoints, requestedEndpoint) {
+		return "", false
+	}
+	if requestedEndpoint == openAIChatEndpoint && endpointListContains(info.SupportedEndpoints, openAIResponsesEndpoint) {
+		return openAIResponsesEndpoint, true
+	}
+	if requestedEndpoint == openAIResponsesEndpoint && endpointListContains(info.SupportedEndpoints, openAIChatEndpoint) {
+		return openAIChatEndpoint, true
+	}
+	return "", false
+}
+
+func endpointListContains(items []string, value string) bool {
+	for _, item := range items {
+		if item == value {
+			return true
+		}
+	}
+	return false
+}
--- a/sdk/api/handlers/openai/openai_handlers.go
+++ b/sdk/api/handlers/openai/openai_handlers.go
@@ -17,6 +17,7 @@ import (
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	codexconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/codex/openai/chat-completions"
 	responsesconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/openai/responses"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
 	"github.com/tidwall/gjson"
@@ -112,6 +113,23 @@ func (h *OpenAIAPIHandler) ChatCompletions(c *gin.Context) {
 	streamResult := gjson.GetBytes(rawJSON, "stream")
 	stream := streamResult.Type == gjson.True

+	modelName := gjson.GetBytes(rawJSON, "model").String()
+	if overrideEndpoint, ok := resolveEndpointOverride(modelName, openAIChatEndpoint); ok && overrideEndpoint == openAIResponsesEndpoint {
+		originalChat := rawJSON
+		if shouldTreatAsResponsesFormat(rawJSON) {
+			// Already responses-style payload; no conversion needed.
+		} else {
+			rawJSON = codexconverter.ConvertOpenAIRequestToCodex(modelName, rawJSON, stream)
+		}
+		stream = gjson.GetBytes(rawJSON, "stream").Bool()
+		if stream {
+			h.handleStreamingResponseViaResponses(c, rawJSON, originalChat)
+		} else {
+			h.handleNonStreamingResponseViaResponses(c, rawJSON, originalChat)
+		}
+		return
+	}
+
 	// Some clients send OpenAI Responses-format payloads to /v1/chat/completions.
 	// Convert them to Chat Completions so downstream translators preserve tool metadata.
 	if shouldTreatAsResponsesFormat(rawJSON) {
@@ -245,6 +263,76 @@ func convertCompletionsRequestToChatCompletions(rawJSON []byte) []byte {
 	return []byte(out)
 }

+func convertResponsesObjectToChatCompletion(ctx context.Context, modelName string, originalChatJSON, responsesRequestJSON, responsesPayload []byte) []byte {
+	if len(responsesPayload) == 0 {
+		return nil
+	}
+	wrapped := wrapResponsesPayloadAsCompleted(responsesPayload)
+	if len(wrapped) == 0 {
+		return nil
+	}
+	var param any
+	converted := codexconverter.ConvertCodexResponseToOpenAINonStream(ctx, modelName, originalChatJSON, responsesRequestJSON, wrapped, &param)
+	if converted == "" {
+		return nil
+	}
+	return []byte(converted)
+}
+
+func wrapResponsesPayloadAsCompleted(payload []byte) []byte {
+	if gjson.GetBytes(payload, "type").Exists() {
+		return payload
+	}
+	if gjson.GetBytes(payload, "object").String() != "response" {
+		return payload
+	}
+	wrapped := `{"type":"response.completed","response":{}}`
+	wrapped, _ = sjson.SetRaw(wrapped, "response", string(payload))
+	return []byte(wrapped)
+}
+
+func writeConvertedResponsesChunk(c *gin.Context, ctx context.Context, modelName string, originalChatJSON, responsesRequestJSON, chunk []byte, param *any) {
+	outputs := codexconverter.ConvertCodexResponseToOpenAI(ctx, modelName, originalChatJSON, responsesRequestJSON, chunk, param)
+	for _, out := range outputs {
+		if out == "" {
+			continue
+		}
+		_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", out)
+	}
+}
+
+func (h *OpenAIAPIHandler) forwardResponsesAsChatStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage, ctx context.Context, modelName string, originalChatJSON, responsesRequestJSON []byte, param *any) {
+	h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
+		WriteChunk: func(chunk []byte) {
+			outputs := codexconverter.ConvertCodexResponseToOpenAI(ctx, modelName, originalChatJSON, responsesRequestJSON, chunk, param)
+			for _, out := range outputs {
+				if out == "" {
+					continue
+				}
+				_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", out)
+			}
+		},
+		WriteTerminalError: func(errMsg *interfaces.ErrorMessage) {
+			if errMsg == nil {
+				return
+			}
+			status := http.StatusInternalServerError
+			if errMsg.StatusCode > 0 {
+				status = errMsg.StatusCode
+			}
+			errText := http.StatusText(status)
+			if errMsg.Error != nil && errMsg.Error.Error() != "" {
+				errText = errMsg.Error.Error()
+			}
+			body := handlers.BuildErrorResponseBody(status, errText)
+			_, _ = fmt.Fprintf(c.Writer, "data: %s\n\n", string(body))
+		},
+		WriteDone: func() {
+			_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
+		},
+	})
+}
+
 // convertChatCompletionsResponseToCompletions converts chat completions API response back to completions format.
 // This ensures the completions endpoint returns data in the expected format.
 //
@@ -435,6 +523,30 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []
 	cliCancel()
 }

+func (h *OpenAIAPIHandler) handleNonStreamingResponseViaResponses(c *gin.Context, rawJSON []byte, originalChatJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, OpenaiResponse, modelName, rawJSON, h.GetAlt(c))
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
+		return
+	}
+	converted := convertResponsesObjectToChatCompletion(cliCtx, modelName, originalChatJSON, rawJSON, resp)
+	if converted == nil {
+		h.WriteErrorResponse(c, &interfaces.ErrorMessage{
+			StatusCode: http.StatusInternalServerError,
+			Error:      fmt.Errorf("failed to convert response to chat completion format"),
+		})
+		cliCancel(fmt.Errorf("response conversion failed"))
+		return
+	}
+	_, _ = c.Writer.Write(converted)
+	cliCancel()
+}
+
 // handleStreamingResponse handles streaming responses for Gemini models.
 // It establishes a streaming connection with the backend service and forwards
 // the response chunks to the client in real-time using Server-Sent Events.
@@ -509,6 +621,67 @@ func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byt
 	}
 }

+func (h *OpenAIAPIHandler) handleStreamingResponseViaResponses(c *gin.Context, rawJSON []byte, originalChatJSON []byte) {
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, OpenaiResponse, modelName, rawJSON, h.GetAlt(c))
+	var param any
+
+	setSSEHeaders := func() {
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("Access-Control-Allow-Origin", "*")
+	}
+
+	// Peek for first usable chunk
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cliCancel(c.Request.Context().Err())
+			return
+		case errMsg, ok := <-errChan:
+			if !ok {
+				errChan = nil
+				continue
+			}
+			h.WriteErrorResponse(c, errMsg)
+			if errMsg != nil {
+				cliCancel(errMsg.Error)
+			} else {
+				cliCancel(nil)
+			}
+			return
+		case chunk, ok := <-dataChan:
+			if !ok {
+				setSSEHeaders()
+				_, _ = fmt.Fprintf(c.Writer, "data: [DONE]\n\n")
+				flusher.Flush()
+				cliCancel(nil)
+				return
+			}
+
+			setSSEHeaders()
+			writeConvertedResponsesChunk(c, cliCtx, modelName, originalChatJSON, rawJSON, chunk, &param)
+			flusher.Flush()
+
+			h.forwardResponsesAsChatStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan, cliCtx, modelName, originalChatJSON, rawJSON, &param)
+			return
+		}
+	}
+}
+
 // handleCompletionsNonStreamingResponse handles non-streaming completions responses.
 // It converts completions request to chat completions format, sends to backend,
 // then converts the response back to completions format before sending to client.
--- a/sdk/api/handlers/openai/openai_responses_handlers.go
+++ b/sdk/api/handlers/openai/openai_responses_handlers.go
@@ -16,6 +16,7 @@ import (
 	. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	responsesconverter "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/openai/openai/responses"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
 	"github.com/tidwall/gjson"
 )
@@ -83,7 +84,21 @@ func (h *OpenAIResponsesAPIHandler) Responses(c *gin.Context) {

 	// Check if the client requested a streaming response.
 	streamResult := gjson.GetBytes(rawJSON, "stream")
-	if streamResult.Type == gjson.True {
+	stream := streamResult.Type == gjson.True
+
+	modelName := gjson.GetBytes(rawJSON, "model").String()
+	if overrideEndpoint, ok := resolveEndpointOverride(modelName, openAIResponsesEndpoint); ok && overrideEndpoint == openAIChatEndpoint {
+		chatJSON := responsesconverter.ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName, rawJSON, stream)
+		stream = gjson.GetBytes(chatJSON, "stream").Bool()
+		if stream {
+			h.handleStreamingResponseViaChat(c, rawJSON, chatJSON)
+		} else {
+			h.handleNonStreamingResponseViaChat(c, rawJSON, chatJSON)
+		}
+		return
+	}
+
+	if stream {
 		h.handleStreamingResponse(c, rawJSON)
 	} else {
 		h.handleNonStreamingResponse(c, rawJSON)
@@ -116,6 +131,31 @@ func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponse(c *gin.Context, r
 	cliCancel()
 }

+func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponseViaChat(c *gin.Context, originalResponsesJSON, chatJSON []byte) {
+	c.Header("Content-Type", "application/json")
+
+	modelName := gjson.GetBytes(chatJSON, "model").String()
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, OpenAI, modelName, chatJSON, "")
+	if errMsg != nil {
+		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
+		return
+	}
+	var param any
+	converted := responsesconverter.ConvertOpenAIChatCompletionsResponseToOpenAIResponsesNonStream(cliCtx, modelName, originalResponsesJSON, originalResponsesJSON, resp, &param)
+	if converted == "" {
+		h.WriteErrorResponse(c, &interfaces.ErrorMessage{
+			StatusCode: http.StatusInternalServerError,
+			Error:      fmt.Errorf("failed to convert chat completion response to responses format"),
+		})
+		cliCancel(fmt.Errorf("response conversion failed"))
+		return
+	}
+	_, _ = c.Writer.Write([]byte(converted))
+	cliCancel()
+}
+
 // handleStreamingResponse handles streaming responses for Gemini models.
 // It establishes a streaming connection with the backend service and forwards
 // the response chunks to the client in real-time using Server-Sent Events.
@@ -196,6 +236,116 @@ func (h *OpenAIResponsesAPIHandler) handleStreamingResponse(c *gin.Context, rawJ
 	}
 }

+func (h *OpenAIResponsesAPIHandler) handleStreamingResponseViaChat(c *gin.Context, originalResponsesJSON, chatJSON []byte) {
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, handlers.ErrorResponse{
+			Error: handlers.ErrorDetail{
+				Message: "Streaming not supported",
+				Type:    "server_error",
+			},
+		})
+		return
+	}
+
+	modelName := gjson.GetBytes(chatJSON, "model").String()
+	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, OpenAI, modelName, chatJSON, "")
+	var param any
+
+	setSSEHeaders := func() {
+		c.Header("Content-Type", "text/event-stream")
+		c.Header("Cache-Control", "no-cache")
+		c.Header("Connection", "keep-alive")
+		c.Header("Access-Control-Allow-Origin", "*")
+	}
+
+	for {
+		select {
+		case <-c.Request.Context().Done():
+			cliCancel(c.Request.Context().Err())
+			return
+		case errMsg, ok := <-errChan:
+			if !ok {
+				errChan = nil
+				continue
+			}
+			h.WriteErrorResponse(c, errMsg)
+			if errMsg != nil {
+				cliCancel(errMsg.Error)
+			} else {
+				cliCancel(nil)
+			}
+			return
+		case chunk, ok := <-dataChan:
+			if !ok {
+				setSSEHeaders()
+				_, _ = c.Writer.Write([]byte("\n"))
+				flusher.Flush()
+				cliCancel(nil)
+				return
+			}
+
+			setSSEHeaders()
+			writeChatAsResponsesChunk(c, cliCtx, modelName, originalResponsesJSON, chunk, &param)
+			flusher.Flush()
+
+			h.forwardChatAsResponsesStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan, cliCtx, modelName, originalResponsesJSON, &param)
+			return
+		}
+	}
+}
+
+func writeChatAsResponsesChunk(c *gin.Context, ctx context.Context, modelName string, originalResponsesJSON, chunk []byte, param *any) {
+	outputs := responsesconverter.ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx, modelName, originalResponsesJSON, originalResponsesJSON, chunk, param)
+	for _, out := range outputs {
+		if out == "" {
+			continue
+		}
+		if bytes.HasPrefix([]byte(out), []byte("event:")) {
+			_, _ = c.Writer.Write([]byte("\n"))
+		}
+		_, _ = c.Writer.Write([]byte(out))
+		_, _ = c.Writer.Write([]byte("\n"))
+	}
+}
+
+func (h *OpenAIResponsesAPIHandler) forwardChatAsResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage, ctx context.Context, modelName string, originalResponsesJSON []byte, param *any) {
+	h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
+		WriteChunk: func(chunk []byte) {
+			outputs := responsesconverter.ConvertOpenAIChatCompletionsResponseToOpenAIResponses(ctx, modelName, originalResponsesJSON, originalResponsesJSON, chunk, param)
+			for _, out := range outputs {
+				if out == "" {
+					continue
+				}
+				if bytes.HasPrefix([]byte(out), []byte("event:")) {
+					_, _ = c.Writer.Write([]byte("\n"))
+				}
+				_, _ = c.Writer.Write([]byte(out))
+				_, _ = c.Writer.Write([]byte("\n"))
+			}
+		},
+		WriteTerminalError: func(errMsg *interfaces.ErrorMessage) {
+			if errMsg == nil {
+				return
+			}
+			status := http.StatusInternalServerError
+			if errMsg.StatusCode > 0 {
+				status = errMsg.StatusCode
+			}
+			errText := http.StatusText(status)
+			if errMsg.Error != nil && errMsg.Error.Error() != "" {
+				errText = errMsg.Error.Error()
+			}
+			body := handlers.BuildErrorResponseBody(status, errText)
+			_, _ = fmt.Fprintf(c.Writer, "\nevent: error\ndata: %s\n\n", string(body))
+		},
+		WriteDone: func() {
+			_, _ = c.Writer.Write([]byte("\n"))
+		},
+	})
+}
+
 func (h *OpenAIResponsesAPIHandler) forwardResponsesStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
 	h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
 		WriteChunk: func(chunk []byte) {
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -1222,6 +1222,9 @@ func rewriteModelInfoName(name, oldID, newID string) string {
 	if strings.EqualFold(oldID, newID) {
 		return name
 	}
+	if strings.EqualFold(trimmed, oldID) {
+		return newID
+	}
 	if strings.HasSuffix(trimmed, "/"+oldID) {
 		prefix := strings.TrimSuffix(trimmed, oldID)
 		return prefix + newID
Author	SHA1	Message	Date
Luis Pater	015a3e8a83	Merge branch 'router-for-me:main' into main	2026-01-17 05:17:38 +08:00
Luis Pater	bc7167e9fe	feat(runtime): add model alias support and enhance payload rule matching - Introduced `payloadModelAliases` and `payloadModelCandidates` functions to support model aliases for improved flexibility. - Updated rule matching logic to handle multiple model candidates. - Refactored variable naming in executor to improve code clarity and consistency.	2026-01-17 05:05:24 +08:00
Luis Pater	384578a88c	feat(cliproxy, gemini): improve ID matching logic and enrich normalized model output - Enhanced ID matching in `cliproxy` by adding additional conditions to better handle ID equality cases. - Updated `gemini` handlers to include `displayName` and `description` in normalized models for enriched metadata.	2026-01-17 04:44:09 +08:00
Luis Pater	65b4e1ec6c	feat(codex): enable instruction toggling and update role terminology - Added conditional logic for Codex instruction injection based on configuration. - Updated role terminology from "user" to "developer" for better alignment with context.	2026-01-17 04:12:29 +08:00
Luis Pater	06afa29f2d	Merge branch 'router-for-me:main' into main	2026-01-16 20:01:35 +08:00
Luis Pater	6600d58ba2	feat(codex): enhance input transformation and remove unused `safety_identifier` field - Added logic to transform `inputResults` into structured JSON for improved processing. - Removed redundant `safety_identifier` field in executor payload to streamline requests.	2026-01-16 19:59:01 +08:00
Luis Pater	25e9be3ced	Merge pull request #103 from ChrAlpha/feat/add-gpt-5.2-codex-copilot feat(openai): responses API support for GitHub Copilot provider	2026-01-16 18:33:53 +08:00
Luis Pater	ccb2aaf2fe	Merge branch 'router-for-me:main' into main	2026-01-16 18:29:56 +08:00
Luis Pater	961c6f67da	Merge pull request #100 from novadev94/fix/readd_kiro_auto fix(kiro): re-add kiro-auto to registry	2026-01-16 18:29:43 +08:00
Luis Pater	dc4305f75a	Merge pull request #107 from zccing/main fix(kiro): correct Amazon Q endpoint URL path	2026-01-16 18:28:45 +08:00
Chén Mù	4dc7af5a5d	Merge pull request #1054 from router-for-me/codex fix(codex): ensure instructions field exists	2026-01-16 15:40:12 +08:00
hkfires	902bea24b4	fix(codex): ensure instructions field exists	2026-01-16 15:38:10 +08:00
Cc	778cf4af9e	feat(kiro): add agent-mode and optout headers for non-IDC auth - Add x-amzn-kiro-agent-mode: vibe for non-IDC auth (Social, Builder ID) IDC auth continues to use "spec" mode - Add x-amzn-codewhisperer-optout: true for all auth types This opts out of data sharing for service improvement (privacy) These changes align with other Kiro implementations (kiro.rs, KiroGate, kiro-gateway, AIClient-2-API) and make requests more similar to real Kiro IDE clients. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-16 14:21:38 +08:00
hkfires	c3ef46f409	feat(config): supplement missing default aliases during antigravity migration	2026-01-16 13:37:46 +08:00
Cc	4721c58d9c	fix(kiro): correct Amazon Q endpoint URL path The Q endpoint was using `/` which caused all requests to fail with 400 or UnknownOperationException. Changed to `/generateAssistantResponse` which is the correct path for the Q endpoint. This fix restores the Q endpoint failover functionality. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>	2026-01-16 13:22:43 +08:00
Luis Pater	aa0b63e214	refactor(config): clarify Codex instruction toggle documentation	2026-01-16 12:50:09 +08:00
ChrAlpha	18daa023cb	fix(openai): improve error handling for response conversion failures	2026-01-15 19:13:54 +08:00
ChrAlpha	8950d92682	feat(openai): implement endpoint resolution and response handling for Chat and Responses models	2026-01-15 18:30:01 +08:00
ChrAlpha	0ffcce3ec8	feat(registry): add supported endpoints for GitHub Copilot models Enhance model definitions by including supported API endpoints for each model. This allows for better integration and usage tracking with the GitHub Copilot API.	2026-01-15 16:32:28 +08:00
ChrAlpha	f4fcfc5867	feat(registry): add GPT-5.2-Codex model to GitHub Copilot provider Add gpt-5.2-codex model definition to GetGitHubCopilotModels() function, enabling access to OpenAI GPT-5.2 Codex through the GitHub Copilot API.	2026-01-15 14:14:09 +08:00
Nova	f82f70df5c	fix(kiro): re-add kiro-auto to registry Reference: https://github.com/router-for-me/CLIProxyAPIPlus/pull/16 Revert: `a594338bc5`	2026-01-15 03:26:22 +07:00