Merge branch 'router-for-me:main' into main

2026-03-08 06:43:41 +00:00 · 2026-01-13 03:15:46 +08:00
parent e0194d8511 43652d044c
commit 9b33fbf1cd
15 changed files with 209 additions and 59 deletions
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -82,6 +82,9 @@ routing:
 # When true, enable authentication for the WebSocket API (/v1/ws).
 ws-auth: false

+# When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
+nonstream-keepalive-interval: 0
+
 # Streaming behavior (SSE keep-alives + safe bootstrap retries).
 # streaming:
 #   keepalive-seconds: 15   # Default: 0 (disabled). <= 0 disables keep-alives.
@@ -217,6 +220,7 @@ ws-auth: false
 # These mappings rename model IDs for both model listing and request routing.
 # Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot.
 # NOTE: Mappings do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
+# You can repeat the same name with different aliases to expose multiple client model names.
 # oauth-model-mappings:
 #   gemini-cli:
 #     - name: "gemini-2.5-pro"          # original model name under this channel
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -567,7 +567,7 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {

 // SanitizeOAuthModelMappings normalizes and deduplicates global OAuth model name mappings.
 // It trims whitespace, normalizes channel keys to lower-case, drops empty entries,
-// and ensures (From, To) pairs are unique within each channel.
+// allows multiple aliases per upstream name, and ensures aliases are unique within each channel.
 func (cfg *Config) SanitizeOAuthModelMappings() {
 	if cfg == nil || len(cfg.OAuthModelMappings) == 0 {
 		return
@@ -578,7 +578,6 @@ func (cfg *Config) SanitizeOAuthModelMappings() {
 		if channel == "" || len(mappings) == 0 {
 			continue
 		}
-		seenName := make(map[string]struct{}, len(mappings))
 		seenAlias := make(map[string]struct{}, len(mappings))
 		clean := make([]ModelNameMapping, 0, len(mappings))
 		for _, mapping := range mappings {
@@ -590,15 +589,10 @@ func (cfg *Config) SanitizeOAuthModelMappings() {
 			if strings.EqualFold(name, alias) {
 				continue
 			}
-			nameKey := strings.ToLower(name)
 			aliasKey := strings.ToLower(alias)
-			if _, ok := seenName[nameKey]; ok {
-				continue
-			}
 			if _, ok := seenAlias[aliasKey]; ok {
 				continue
 			}
-			seenName[nameKey] = struct{}{}
 			seenAlias[aliasKey] = struct{}{}
 			clean = append(clean, ModelNameMapping{Name: name, Alias: alias, Fork: mapping.Fork})
 		}
--- a/internal/config/oauth_model_mappings_test.go
+++ b/internal/config/oauth_model_mappings_test.go
@@ -25,3 +25,32 @@ func TestSanitizeOAuthModelMappings_PreservesForkFlag(t *testing.T) {
 		t.Fatalf("expected second mapping to be gpt-6->g6 fork=false, got name=%q alias=%q fork=%v", mappings[1].Name, mappings[1].Alias, mappings[1].Fork)
 	}
 }
+
+func TestSanitizeOAuthModelMappings_AllowsMultipleAliasesForSameName(t *testing.T) {
+	cfg := &Config{
+		OAuthModelMappings: map[string][]ModelNameMapping{
+			"antigravity": {
+				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
+				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
+				{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
+			},
+		},
+	}
+
+	cfg.SanitizeOAuthModelMappings()
+
+	mappings := cfg.OAuthModelMappings["antigravity"]
+	expected := []ModelNameMapping{
+		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101", Fork: true},
+		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5-20251101-thinking", Fork: true},
+		{Name: "gemini-claude-opus-4-5-thinking", Alias: "claude-opus-4-5", Fork: true},
+	}
+	if len(mappings) != len(expected) {
+		t.Fatalf("expected %d sanitized mappings, got %d", len(expected), len(mappings))
+	}
+	for i, exp := range expected {
+		if mappings[i].Name != exp.Name || mappings[i].Alias != exp.Alias || mappings[i].Fork != exp.Fork {
+			t.Fatalf("expected mapping %d to be name=%q alias=%q fork=%v, got name=%q alias=%q fork=%v", i, exp.Name, exp.Alias, exp.Fork, mappings[i].Name, mappings[i].Alias, mappings[i].Fork)
+		}
+	}
+}
--- a/internal/config/sdk_config.go
+++ b/internal/config/sdk_config.go
@@ -25,6 +25,10 @@ type SDKConfig struct {

 	// Streaming configures server-side streaming behavior (keep-alives and safe bootstrap retries).
 	Streaming StreamingConfig `yaml:"streaming" json:"streaming"`
+
+	// NonStreamKeepAliveInterval controls how often blank lines are emitted for non-streaming responses.
+	// <= 0 disables keep-alives. Value is in seconds.
+	NonStreamKeepAliveInterval int `yaml:"nonstream-keepalive-interval,omitempty" json:"nonstream-keepalive-interval,omitempty"`
 }

 // StreamingConfig holds server streaming behavior configuration.
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -106,6 +106,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body, _ = sjson.SetBytes(body, "model", model)
 	body, _ = sjson.SetBytes(body, "stream", true)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
+	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")

 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
 	httpReq, err := e.cacheHelper(ctx, from, url, req, body)
@@ -214,6 +215,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	}
 	body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
+	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
 	body, _ = sjson.SetBytes(body, "model", model)

 	url := strings.TrimSuffix(baseURL, "/") + "/responses"
@@ -316,6 +318,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false)
 	body, _ = sjson.SetBytes(body, "model", model)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
+	body, _ = sjson.DeleteBytes(body, "prompt_cache_retention")
 	body, _ = sjson.SetBytes(body, "stream", false)

 	enc, err := tokenizerForCodexModel(model)
--- a/internal/watcher/diff/config_diff.go
+++ b/internal/watcher/diff/config_diff.go
@@ -54,6 +54,9 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 	if oldCfg.ForceModelPrefix != newCfg.ForceModelPrefix {
 		changes = append(changes, fmt.Sprintf("force-model-prefix: %t -> %t", oldCfg.ForceModelPrefix, newCfg.ForceModelPrefix))
 	}
+	if oldCfg.NonStreamKeepAliveInterval != newCfg.NonStreamKeepAliveInterval {
+		changes = append(changes, fmt.Sprintf("nonstream-keepalive-interval: %d -> %d", oldCfg.NonStreamKeepAliveInterval, newCfg.NonStreamKeepAliveInterval))
+	}

 	// Quota-exceeded behavior
 	if oldCfg.QuotaExceeded.SwitchProject != newCfg.QuotaExceeded.SwitchProject {
--- a/internal/watcher/diff/config_diff_test.go
+++ b/internal/watcher/diff/config_diff_test.go
@@ -231,10 +231,11 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) {
 		AmpCode:                config.AmpCode{UpstreamAPIKey: "keep", RestrictManagementToLocalhost: false},
 		RemoteManagement:       config.RemoteManagement{DisableControlPanel: false, PanelGitHubRepository: "old/repo", SecretKey: "keep"},
 		SDKConfig: sdkconfig.SDKConfig{
-			RequestLog:       false,
-			ProxyURL:         "http://old-proxy",
-			APIKeys:          []string{"key-1"},
-			ForceModelPrefix: false,
+			RequestLog:                 false,
+			ProxyURL:                   "http://old-proxy",
+			APIKeys:                    []string{"key-1"},
+			ForceModelPrefix:           false,
+			NonStreamKeepAliveInterval: 0,
 		},
 	}
 	newCfg := &config.Config{
@@ -267,10 +268,11 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) {
 			SecretKey:             "",
 		},
 		SDKConfig: sdkconfig.SDKConfig{
-			RequestLog:       true,
-			ProxyURL:         "http://new-proxy",
-			APIKeys:          []string{" key-1 ", "key-2"},
-			ForceModelPrefix: true,
+			RequestLog:                 true,
+			ProxyURL:                   "http://new-proxy",
+			APIKeys:                    []string{" key-1 ", "key-2"},
+			ForceModelPrefix:           true,
+			NonStreamKeepAliveInterval: 5,
 		},
 	}

@@ -285,6 +287,7 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) {
 	expectContains(t, details, "proxy-url: http://old-proxy -> http://new-proxy")
 	expectContains(t, details, "ws-auth: false -> true")
 	expectContains(t, details, "force-model-prefix: false -> true")
+	expectContains(t, details, "nonstream-keepalive-interval: 0 -> 5")
 	expectContains(t, details, "quota-exceeded.switch-project: false -> true")
 	expectContains(t, details, "quota-exceeded.switch-preview-model: false -> true")
 	expectContains(t, details, "api-keys count: 1 -> 2")
--- a/sdk/api/handlers/claude/code_handlers.go
+++ b/sdk/api/handlers/claude/code_handlers.go
@@ -146,10 +146,12 @@ func (h *ClaudeCodeAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSO
 	c.Header("Content-Type", "application/json")
 	alt := h.GetAlt(c)
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)

 	modelName := gjson.GetBytes(rawJSON, "model").String()

 	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	stopKeepAlive()
 	if errMsg != nil {
 		h.WriteErrorResponse(c, errMsg)
 		cliCancel(errMsg.Error)
@@ -159,13 +161,18 @@ func (h *ClaudeCodeAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSO
 	// Decompress gzipped responses - Claude API sometimes returns gzip without Content-Encoding header
 	// This fixes title generation and other non-streaming responses that arrive compressed
 	if len(resp) >= 2 && resp[0] == 0x1f && resp[1] == 0x8b {
-		gzReader, err := gzip.NewReader(bytes.NewReader(resp))
-		if err != nil {
-			log.Warnf("failed to decompress gzipped Claude response: %v", err)
+		gzReader, errGzip := gzip.NewReader(bytes.NewReader(resp))
+		if errGzip != nil {
+			log.Warnf("failed to decompress gzipped Claude response: %v", errGzip)
 		} else {
-			defer gzReader.Close()
-			if decompressed, err := io.ReadAll(gzReader); err != nil {
-				log.Warnf("failed to read decompressed Claude response: %v", err)
+			defer func() {
+				if errClose := gzReader.Close(); errClose != nil {
+					log.Warnf("failed to close Claude gzip reader: %v", errClose)
+				}
+			}()
+			decompressed, errRead := io.ReadAll(gzReader)
+			if errRead != nil {
+				log.Warnf("failed to read decompressed Claude response: %v", errRead)
 			} else {
 				resp = decompressed
 			}
--- a/sdk/api/handlers/gemini/gemini_handlers.go
+++ b/sdk/api/handlers/gemini/gemini_handlers.go
@@ -336,7 +336,9 @@ func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName strin
 	c.Header("Content-Type", "application/json")
 	alt := h.GetAlt(c)
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
 	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, alt)
+	stopKeepAlive()
 	if errMsg != nil {
 		h.WriteErrorResponse(c, errMsg)
 		cliCancel(errMsg.Error)
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -9,6 +9,7 @@ import (
 	"fmt"
 	"net/http"
 	"strings"
+	"sync"
 	"time"

 	"github.com/gin-gonic/gin"
@@ -113,6 +114,19 @@ func StreamingKeepAliveInterval(cfg *config.SDKConfig) time.Duration {
 	return time.Duration(seconds) * time.Second
 }

+// NonStreamingKeepAliveInterval returns the keep-alive interval for non-streaming responses.
+// Returning 0 disables keep-alives (default when unset).
+func NonStreamingKeepAliveInterval(cfg *config.SDKConfig) time.Duration {
+	seconds := 0
+	if cfg != nil {
+		seconds = cfg.NonStreamKeepAliveInterval
+	}
+	if seconds <= 0 {
+		return 0
+	}
+	return time.Duration(seconds) * time.Second
+}
+
 // StreamingBootstrapRetries returns how many times a streaming request may be retried before any bytes are sent.
 func StreamingBootstrapRetries(cfg *config.SDKConfig) int {
 	retries := defaultStreamingBootstrapRetries
@@ -294,6 +308,53 @@ func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *
 	}
 }

+// StartNonStreamingKeepAlive emits blank lines every 5 seconds while waiting for a non-streaming response.
+// It returns a stop function that must be called before writing the final response.
+func (h *BaseAPIHandler) StartNonStreamingKeepAlive(c *gin.Context, ctx context.Context) func() {
+	if h == nil || c == nil {
+		return func() {}
+	}
+	interval := NonStreamingKeepAliveInterval(h.Cfg)
+	if interval <= 0 {
+		return func() {}
+	}
+	flusher, ok := c.Writer.(http.Flusher)
+	if !ok {
+		return func() {}
+	}
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	stopChan := make(chan struct{})
+	var stopOnce sync.Once
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		ticker := time.NewTicker(interval)
+		defer ticker.Stop()
+		for {
+			select {
+			case <-stopChan:
+				return
+			case <-ctx.Done():
+				return
+			case <-ticker.C:
+				_, _ = c.Writer.Write([]byte("\n"))
+				flusher.Flush()
+			}
+		}
+	}()
+
+	return func() {
+		stopOnce.Do(func() {
+			close(stopChan)
+		})
+		wg.Wait()
+	}
+}
+
 // appendAPIResponse preserves any previously captured API response and appends new data.
 func appendAPIResponse(c *gin.Context, data []byte) {
 	if c == nil || len(data) == 0 {
--- a/sdk/api/handlers/handlers_stream_bootstrap_test.go
+++ b/sdk/api/handlers/handlers_stream_bootstrap_test.go
@@ -56,6 +56,14 @@ func (e *failOnceStreamExecutor) CountTokens(context.Context, *coreauth.Auth, co
 	return coreexecutor.Response{}, &coreauth.Error{Code: "not_implemented", Message: "CountTokens not implemented"}
 }

+func (e *failOnceStreamExecutor) HttpRequest(ctx context.Context, auth *coreauth.Auth, req *http.Request) (*http.Response, error) {
+	return nil, &coreauth.Error{
+		Code:       "not_implemented",
+		Message:    "HttpRequest not implemented",
+		HTTPStatus: http.StatusNotImplemented,
+	}
+}
+
 func (e *failOnceStreamExecutor) Calls() int {
 	e.mu.Lock()
 	defer e.mu.Unlock()
--- a/sdk/api/handlers/openai/openai_handlers.go
+++ b/sdk/api/handlers/openai/openai_handlers.go
@@ -524,7 +524,9 @@ func (h *OpenAIAPIHandler) handleCompletionsNonStreamingResponse(c *gin.Context,

 	modelName := gjson.GetBytes(chatCompletionsJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
+	stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)
 	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, chatCompletionsJSON, "")
+	stopKeepAlive()
 	if errMsg != nil {
 		h.WriteErrorResponse(c, errMsg)
 		cliCancel(errMsg.Error)
--- a/sdk/api/handlers/openai/openai_responses_handlers.go
+++ b/sdk/api/handlers/openai/openai_responses_handlers.go
@@ -103,20 +103,17 @@ func (h *OpenAIResponsesAPIHandler) handleNonStreamingResponse(c *gin.Context, r

 	modelName := gjson.GetBytes(rawJSON, "model").String()
 	cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
-	defer func() {
-		cliCancel()
-	}()
+	stopKeepAlive := h.StartNonStreamingKeepAlive(c, cliCtx)

 	resp, errMsg := h.ExecuteWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
+	stopKeepAlive()
 	if errMsg != nil {
 		h.WriteErrorResponse(c, errMsg)
+		cliCancel(errMsg.Error)
 		return
 	}
 	_, _ = c.Writer.Write(resp)
-	return
-
-	// no legacy fallback
-
+	cliCancel()
 }

 // handleStreamingResponse handles streaming responses for Gemini models.
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -1247,7 +1247,7 @@ func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, mode
 		fork  bool
 	}

-	forward := make(map[string]mappingEntry, len(mappings))
+	forward := make(map[string][]mappingEntry, len(mappings))
 	for i := range mappings {
 		name := strings.TrimSpace(mappings[i].Name)
 		alias := strings.TrimSpace(mappings[i].Alias)
@@ -1258,14 +1258,12 @@ func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, mode
 			continue
 		}
 		key := strings.ToLower(name)
-		if _, exists := forward[key]; exists {
-			continue
-		}
-		forward[key] = mappingEntry{alias: alias, fork: mappings[i].Fork}
+		forward[key] = append(forward[key], mappingEntry{alias: alias, fork: mappings[i].Fork})
 	}
 	if len(forward) == 0 {
 		return models
 	}
+
 	out := make([]*ModelInfo, 0, len(models))
 	seen := make(map[string]struct{}, len(models))
 	for _, model := range models {
@@ -1277,17 +1275,8 @@ func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, mode
 			continue
 		}
 		key := strings.ToLower(id)
-		entry, ok := forward[key]
-		if !ok {
-			if _, exists := seen[key]; exists {
-				continue
-			}
-			seen[key] = struct{}{}
-			out = append(out, model)
-			continue
-		}
-		mappedID := strings.TrimSpace(entry.alias)
-		if mappedID == "" {
+		entries := forward[key]
+		if len(entries) == 0 {
 			if _, exists := seen[key]; exists {
 				continue
 			}
@@ -1296,11 +1285,29 @@ func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, mode
 			continue
 		}

-		if entry.fork {
+		keepOriginal := false
+		for _, entry := range entries {
+			if entry.fork {
+				keepOriginal = true
+				break
+			}
+		}
+		if keepOriginal {
 			if _, exists := seen[key]; !exists {
 				seen[key] = struct{}{}
 				out = append(out, model)
 			}
+		}
+
+		addedAlias := false
+		for _, entry := range entries {
+			mappedID := strings.TrimSpace(entry.alias)
+			if mappedID == "" {
+				continue
+			}
+			if strings.EqualFold(mappedID, id) {
+				continue
+			}
 			aliasKey := strings.ToLower(mappedID)
 			if _, exists := seen[aliasKey]; exists {
 				continue
@@ -1312,24 +1319,16 @@ func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, mode
 				clone.Name = rewriteModelInfoName(clone.Name, id, mappedID)
 			}
 			out = append(out, &clone)
-			continue
+			addedAlias = true
 		}

-		uniqueKey := strings.ToLower(mappedID)
-		if _, exists := seen[uniqueKey]; exists {
-			continue
-		}
-		seen[uniqueKey] = struct{}{}
-		if mappedID == id {
+		if !keepOriginal && !addedAlias {
+			if _, exists := seen[key]; exists {
+				continue
+			}
+			seen[key] = struct{}{}
 			out = append(out, model)
-			continue
 		}
-		clone := *model
-		clone.ID = mappedID
-		if clone.Name != "" {
-			clone.Name = rewriteModelInfoName(clone.Name, id, mappedID)
-		}
-		out = append(out, &clone)
 	}
 	return out
 }
--- a/sdk/cliproxy/service_oauth_model_mappings_test.go
+++ b/sdk/cliproxy/service_oauth_model_mappings_test.go
@@ -56,3 +56,37 @@ func TestApplyOAuthModelMappings_ForkAddsAlias(t *testing.T) {
 		t.Fatalf("expected forked model name %q, got %q", "models/g5", out[1].Name)
 	}
 }
+
+func TestApplyOAuthModelMappings_ForkAddsMultipleAliases(t *testing.T) {
+	cfg := &config.Config{
+		OAuthModelMappings: map[string][]config.ModelNameMapping{
+			"codex": {
+				{Name: "gpt-5", Alias: "g5", Fork: true},
+				{Name: "gpt-5", Alias: "g5-2", Fork: true},
+			},
+		},
+	}
+	models := []*ModelInfo{
+		{ID: "gpt-5", Name: "models/gpt-5"},
+	}
+
+	out := applyOAuthModelMappings(cfg, "codex", "oauth", models)
+	if len(out) != 3 {
+		t.Fatalf("expected 3 models, got %d", len(out))
+	}
+	if out[0].ID != "gpt-5" {
+		t.Fatalf("expected first model id %q, got %q", "gpt-5", out[0].ID)
+	}
+	if out[1].ID != "g5" {
+		t.Fatalf("expected second model id %q, got %q", "g5", out[1].ID)
+	}
+	if out[1].Name != "models/g5" {
+		t.Fatalf("expected forked model name %q, got %q", "models/g5", out[1].Name)
+	}
+	if out[2].ID != "g5-2" {
+		t.Fatalf("expected third model id %q, got %q", "g5-2", out[2].ID)
+	}
+	if out[2].Name != "models/g5-2" {
+		t.Fatalf("expected forked model name %q, got %q", "models/g5-2", out[2].Name)
+	}
+}