From 9102ff031d0cc013f13c22fad825883674753947 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Mon, 25 Aug 2025 23:27:30 +0800 Subject: [PATCH] Refactor API handlers to implement retry mechanism with configurable limits and improved error handling - Introduced retry counter with a configurable ` RequestRetry ` limit in all handlers. - Enhanced error handling with specific HTTP status codes for switching clients. - Standardized response forwarding for non-retriable errors. - Improved logging for quota and client switch scenarios. --- README.md | 32 +++++++++-------- README_CN.md | 5 +++ config.example.yaml | 3 ++ internal/api/handlers/claude/code_handlers.go | 18 +++++++--- .../handlers/gemini/gemini-cli_handlers.go | 33 ++++++++++++++---- .../api/handlers/gemini/gemini_handlers.go | 34 ++++++++++++++----- .../api/handlers/openai/openai_handlers.go | 32 +++++++++++++---- internal/api/server.go | 16 +++++++-- internal/config/config.go | 3 ++ internal/logging/request_logger.go | 9 +++++ internal/watcher/watcher.go | 6 ++++ 11 files changed, 148 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 264cce1d..cb2b0ace 100644 --- a/README.md +++ b/README.md @@ -238,20 +238,21 @@ The server uses a YAML configuration file (`config.yaml`) located in the project ### Configuration Options -| Parameter | Type | Default | Description | -|---------------------------------------|----------|--------------------|----------------------------------------------------------------------------------------------| -| `port` | integer | 8317 | The port number on which the server will listen | -| `auth-dir` | string | "~/.cli-proxy-api" | Directory where authentication tokens are stored. Supports using `~` for home directory | -| `proxy-url` | string | "" | Proxy url, support socks5/http/https protocol, example: socks5://user:pass@192.168.1.1:1080/ | -| `quota-exceeded` | object | {} | Configuration for handling quota exceeded | -| `quota-exceeded.switch-project` | boolean | true | Whether to automatically switch to another project when a quota is exceeded | -| `quota-exceeded.switch-preview-model` | boolean | true | Whether to automatically switch to a preview model when a quota is exceeded | -| `debug` | boolean | false | Enable debug mode for verbose logging | -| `api-keys` | string[] | [] | List of API keys that can be used to authenticate requests | -| `generative-language-api-key` | string[] | [] | List of Generative Language API keys | -| `claude-api-key` | object | {} | List of Claude API keys | -| `claude-api-key.api-key` | string | "" | Claude API key | -| `claude-api-key.base-url` | string | "" | Custom Claude API endpoint, if you use the third party API endpoint | +| Parameter | Type | Default | Description | +|---------------------------------------|----------|--------------------|-----------------------------------------------------------------------------------------------| +| `port` | integer | 8317 | The port number on which the server will listen | +| `auth-dir` | string | "~/.cli-proxy-api" | Directory where authentication tokens are stored. Supports using `~` for home directory | +| `proxy-url` | string | "" | Proxy url, support socks5/http/https protocol, example: socks5://user:pass@192.168.1.1:1080/ | +| `request-retry` | integer | 0 | Request retry times, if the response http code is 403, 408, 500, 502, 503, 504, it will retry | +| `quota-exceeded` | object | {} | Configuration for handling quota exceeded | +| `quota-exceeded.switch-project` | boolean | true | Whether to automatically switch to another project when a quota is exceeded | +| `quota-exceeded.switch-preview-model` | boolean | true | Whether to automatically switch to a preview model when a quota is exceeded | +| `debug` | boolean | false | Enable debug mode for verbose logging | +| `api-keys` | string[] | [] | List of API keys that can be used to authenticate requests | +| `generative-language-api-key` | string[] | [] | List of Generative Language API keys | +| `claude-api-key` | object | {} | List of Claude API keys | +| `claude-api-key.api-key` | string | "" | Claude API key | +| `claude-api-key.base-url` | string | "" | Custom Claude API endpoint, if you use the third party API endpoint | ### Example Configuration File @@ -268,6 +269,9 @@ debug: false # Proxy url, support socks5/http/https protocol, example: socks5://user:pass@192.168.1.1:1080/ proxy-url: "" +# Request retry times, if the response http code is 403, 408, 500, 502, 503, 504, it will retry +request-retry: 3 + # Quota exceeded behavior quota-exceeded: switch-project: true # Whether to automatically switch to another project when a quota is exceeded diff --git a/README_CN.md b/README_CN.md index fb561c5b..4adce5ea 100644 --- a/README_CN.md +++ b/README_CN.md @@ -240,6 +240,7 @@ console.log(await claudeResponse.json()); | `port` | integer | 8317 | 服务器监听的端口号 | | `auth-dir` | string | "~/.cli-proxy-api" | 存储身份验证令牌的目录。支持使用 `~` 表示主目录 | | `proxy-url` | string | "" | 代理 URL,支持 socks5/http/https 协议,示例:socks5://user:pass@192.168.1.1:1080/ | +| `request-retry` | integer | 0 | 请求失败重试次数,如果响应的http代码为403、408、500、502、503、504则会自动重试 | | `quota-exceeded` | object | {} | 处理配额超限的配置 | | `quota-exceeded.switch-project` | boolean | true | 当配额超限时是否自动切换到另一个项目 | | `quota-exceeded.switch-preview-model` | boolean | true | 当配额超限时是否自动切换到预览模型 | @@ -265,6 +266,10 @@ debug: false # 代理 URL,支持 socks5/http/https 协议,示例:socks5://user:pass@192.168.1.1:1080/ proxy-url: "" +# 请求失败重试次数,如果响应的http代码为403、408、500、502、503、504则会自动重试 +request-retry: 3 + + # 配额超限行为 quota-exceeded: switch-project: true # 当配额超限时是否自动切换到另一个项目 diff --git a/config.example.yaml b/config.example.yaml index 36710fbd..48d88df6 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -4,6 +4,9 @@ auth-dir: "~/.cli-proxy-api" debug: true proxy-url: "" +# Request retry times, if the response http code is 403, 408, 500, 502, 503, 504, it will retry +request-retry: 3 + # Quota exceeded behavior quota-exceeded: switch-project: true diff --git a/internal/api/handlers/claude/code_handlers.go b/internal/api/handlers/claude/code_handlers.go index 4b59cb70..7260f251 100644 --- a/internal/api/handlers/claude/code_handlers.go +++ b/internal/api/handlers/claude/code_handlers.go @@ -122,11 +122,11 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [ cliClient.GetRequestMutex().Unlock() } }() - + retryCount := 0 // Main client rotation loop with quota management // This loop implements a sophisticated load balancing and failover mechanism outLoop: - for { + for retryCount <= h.Cfg.RequestRetry { var errorResponse *interfaces.ErrorMessage cliClient, errorResponse = h.GetClient(modelName) if errorResponse != nil { @@ -170,9 +170,17 @@ outLoop: if okError { // Special handling for quota exceeded errors // If configured, attempt to switch to a different project/client - if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject { - continue outLoop // Restart the client selection process - } else { + switch errInfo.StatusCode { + case 429: + if h.Cfg.QuotaExceeded.SwitchProject { + log.Debugf("quota exceeded, switch client") + continue outLoop // Restart the client selection process + } + case 403, 408, 500, 502, 503, 504: + log.Debugf("http status code %d, switch client", errInfo.StatusCode) + retryCount++ + continue outLoop + default: // Forward other errors directly to the client c.Status(errInfo.StatusCode) _, _ = fmt.Fprint(c.Writer, errInfo.Error.Error()) diff --git a/internal/api/handlers/gemini/gemini-cli_handlers.go b/internal/api/handlers/gemini/gemini-cli_handlers.go index 82ef3392..9aaad1ea 100644 --- a/internal/api/handlers/gemini/gemini-cli_handlers.go +++ b/internal/api/handlers/gemini/gemini-cli_handlers.go @@ -167,8 +167,9 @@ func (h *GeminiCLIAPIHandler) handleInternalStreamGenerateContent(c *gin.Context } }() + retryCount := 0 outLoop: - for { + for retryCount <= h.Cfg.RequestRetry { var errorResponse *interfaces.ErrorMessage cliClient, errorResponse = h.GetClient(modelName) if errorResponse != nil { @@ -205,9 +206,18 @@ outLoop: // Handle errors from the backend. case err, okError := <-errChan: if okError { - if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject { + switch err.StatusCode { + case 429: + if h.Cfg.QuotaExceeded.SwitchProject { + log.Debugf("quota exceeded, switch client") + continue outLoop // Restart the client selection process + } + case 403, 408, 500, 502, 503, 504: + log.Debugf("http status code %d, switch client", err.StatusCode) + retryCount++ continue outLoop - } else { + default: + // Forward other errors directly to the client c.Status(err.StatusCode) _, _ = fmt.Fprint(c.Writer, err.Error.Error()) flusher.Flush() @@ -238,7 +248,8 @@ func (h *GeminiCLIAPIHandler) handleInternalGenerateContent(c *gin.Context, rawJ } }() - for { + retryCount := 0 + for retryCount <= h.Cfg.RequestRetry { var errorResponse *interfaces.ErrorMessage cliClient, errorResponse = h.GetClient(modelName) if errorResponse != nil { @@ -250,12 +261,20 @@ func (h *GeminiCLIAPIHandler) handleInternalGenerateContent(c *gin.Context, rawJ resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, "") if err != nil { - if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject { + switch err.StatusCode { + case 429: + if h.Cfg.QuotaExceeded.SwitchProject { + log.Debugf("quota exceeded, switch client") + continue // Restart the client selection process + } + case 403, 408, 500, 502, 503, 504: + log.Debugf("http status code %d, switch client", err.StatusCode) + retryCount++ continue - } else { + default: + // Forward other errors directly to the client c.Status(err.StatusCode) _, _ = c.Writer.Write([]byte(err.Error.Error())) - // log.Debugf("code: %d, error: %s", err.StatusCode, err.Error.Error()) cliCancel(err.Error) } break diff --git a/internal/api/handlers/gemini/gemini_handlers.go b/internal/api/handlers/gemini/gemini_handlers.go index 258987a0..759f372f 100644 --- a/internal/api/handlers/gemini/gemini_handlers.go +++ b/internal/api/handlers/gemini/gemini_handlers.go @@ -270,8 +270,9 @@ func (h *GeminiAPIHandler) handleStreamGenerateContent(c *gin.Context, modelName } }() + retryCount := 0 outLoop: - for { + for retryCount <= h.Cfg.RequestRetry { var errorResponse *interfaces.ErrorMessage cliClient, errorResponse = h.GetClient(modelName) if errorResponse != nil { @@ -311,11 +312,18 @@ outLoop: // Handle errors from the backend. case err, okError := <-errChan: if okError { - if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject { - log.Debugf("quota exceeded, switch client") + switch err.StatusCode { + case 429: + if h.Cfg.QuotaExceeded.SwitchProject { + log.Debugf("quota exceeded, switch client") + continue outLoop // Restart the client selection process + } + case 403, 408, 500, 502, 503, 504: + log.Debugf("http status code %d, switch client", err.StatusCode) + retryCount++ continue outLoop - } else { - // log.Debugf("error code :%d, error: %v", err.StatusCode, err.Error.Error()) + default: + // Forward other errors directly to the client c.Status(err.StatusCode) _, _ = fmt.Fprint(c.Writer, err.Error.Error()) flusher.Flush() @@ -402,7 +410,8 @@ func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName strin } }() - for { + retryCount := 0 + for retryCount <= h.Cfg.RequestRetry { var errorResponse *interfaces.ErrorMessage cliClient, errorResponse = h.GetClient(modelName) if errorResponse != nil { @@ -414,9 +423,18 @@ func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName strin resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, alt) if err != nil { - if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject { + switch err.StatusCode { + case 429: + if h.Cfg.QuotaExceeded.SwitchProject { + log.Debugf("quota exceeded, switch client") + continue // Restart the client selection process + } + case 403, 408, 500, 502, 503, 504: + log.Debugf("http status code %d, switch client", err.StatusCode) + retryCount++ continue - } else { + default: + // Forward other errors directly to the client c.Status(err.StatusCode) _, _ = c.Writer.Write([]byte(err.Error.Error())) cliCancel(err.Error) diff --git a/internal/api/handlers/openai/openai_handlers.go b/internal/api/handlers/openai/openai_handlers.go index 485b6827..aebe25b9 100644 --- a/internal/api/handlers/openai/openai_handlers.go +++ b/internal/api/handlers/openai/openai_handlers.go @@ -183,7 +183,8 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON [] } }() - for { + retryCount := 0 + for retryCount <= h.Cfg.RequestRetry { var errorResponse *interfaces.ErrorMessage cliClient, errorResponse = h.GetClient(modelName) if errorResponse != nil { @@ -195,9 +196,18 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON [] resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, "") if err != nil { - if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject { + switch err.StatusCode { + case 429: + if h.Cfg.QuotaExceeded.SwitchProject { + log.Debugf("quota exceeded, switch client") + continue // Restart the client selection process + } + case 403, 408, 500, 502, 503, 504: + log.Debugf("http status code %d, switch client", err.StatusCode) + retryCount++ continue - } else { + default: + // Forward other errors directly to the client c.Status(err.StatusCode) _, _ = c.Writer.Write([]byte(err.Error.Error())) cliCancel(err.Error) @@ -247,8 +257,9 @@ func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byt } }() + retryCount := 0 outLoop: - for { + for retryCount <= h.Cfg.RequestRetry { var errorResponse *interfaces.ErrorMessage cliClient, errorResponse = h.GetClient(modelName) if errorResponse != nil { @@ -286,9 +297,18 @@ outLoop: // Handle errors from the backend. case err, okError := <-errChan: if okError { - if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject { + switch err.StatusCode { + case 429: + if h.Cfg.QuotaExceeded.SwitchProject { + log.Debugf("quota exceeded, switch client") + continue outLoop // Restart the client selection process + } + case 403, 408, 500, 502, 503, 504: + log.Debugf("http status code %d, switch client", err.StatusCode) + retryCount++ continue outLoop - } else { + default: + // Forward other errors directly to the client c.Status(err.StatusCode) _, _ = fmt.Fprint(c.Writer, err.Error.Error()) flusher.Flush() diff --git a/internal/api/server.go b/internal/api/server.go index 8d3579f9..374a25ea 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -37,6 +37,9 @@ type Server struct { // cfg holds the current server configuration. cfg *config.Config + + // requestLogger is the request logger instance for dynamic configuration updates. + requestLogger *logging.FileRequestLogger } // NewServer creates and initializes a new API server instance. @@ -69,9 +72,10 @@ func NewServer(cfg *config.Config, cliClients []interfaces.Client) *Server { // Create server instance s := &Server{ - engine: engine, - handlers: handlers.NewBaseAPIHandlers(cliClients, cfg), - cfg: cfg, + engine: engine, + handlers: handlers.NewBaseAPIHandlers(cliClients, cfg), + cfg: cfg, + requestLogger: requestLogger, } // Setup routes @@ -189,6 +193,12 @@ func corsMiddleware() gin.HandlerFunc { // - clients: The new slice of AI service clients // - cfg: The new application configuration func (s *Server) UpdateClients(clients []interfaces.Client, cfg *config.Config) { + // Update request logger enabled state if it has changed + if s.requestLogger != nil && s.cfg.RequestLog != cfg.RequestLog { + s.requestLogger.SetEnabled(cfg.RequestLog) + log.Debugf("request logging updated from %t to %t", s.cfg.RequestLog, cfg.RequestLog) + } + s.cfg = cfg s.handlers.UpdateClients(clients, cfg) log.Infof("server clients and configuration updated: %d clients", len(clients)) diff --git a/internal/config/config.go b/internal/config/config.go index d3a7cd8b..82f2461d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -37,6 +37,9 @@ type Config struct { // RequestLog enables or disables detailed request logging functionality. RequestLog bool `yaml:"request-log"` + // RequestRetry defines the retry times when the request failed. + RequestRetry int `yaml:"request-retry"` + ClaudeKey []ClaudeKey `yaml:"claude-api-key"` } diff --git a/internal/logging/request_logger.go b/internal/logging/request_logger.go index 444c33f3..4779c62a 100644 --- a/internal/logging/request_logger.go +++ b/internal/logging/request_logger.go @@ -115,6 +115,15 @@ func (l *FileRequestLogger) IsEnabled() bool { return l.enabled } +// SetEnabled updates the request logging enabled state. +// This method allows dynamic enabling/disabling of request logging. +// +// Parameters: +// - enabled: Whether request logging should be enabled +func (l *FileRequestLogger) SetEnabled(enabled bool) { + l.enabled = enabled +} + // LogRequest logs a complete non-streaming request/response cycle to a file. // // Parameters: diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go index a5ab2ed9..21eb79b2 100644 --- a/internal/watcher/watcher.go +++ b/internal/watcher/watcher.go @@ -169,6 +169,12 @@ func (w *Watcher) reloadConfig() { if oldConfig.ProxyURL != newConfig.ProxyURL { log.Debugf(" proxy-url: %s -> %s", oldConfig.ProxyURL, newConfig.ProxyURL) } + if oldConfig.RequestLog != newConfig.RequestLog { + log.Debugf(" request-log: %t -> %t", oldConfig.RequestLog, newConfig.RequestLog) + } + if oldConfig.RequestRetry != newConfig.RequestRetry { + log.Debugf(" request-retry: %d -> %d", oldConfig.RequestRetry, newConfig.RequestRetry) + } if len(oldConfig.APIKeys) != len(newConfig.APIKeys) { log.Debugf(" api-keys count: %d -> %d", len(oldConfig.APIKeys), len(newConfig.APIKeys)) }