From 9102ff031d0cc013f13c22fad825883674753947 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Mon, 25 Aug 2025 23:27:30 +0800
Subject: [PATCH] Refactor API handlers to implement retry mechanism with
 configurable limits and improved error handling

- Introduced retry counter with a configurable ` RequestRetry ` limit in all handlers.
- Enhanced error handling with specific HTTP status codes for switching clients.
- Standardized response forwarding for non-retriable errors.
- Improved logging for quota and client switch scenarios.
---
 README.md                                     | 32 +++++++++--------
 README_CN.md                                  |  5 +++
 config.example.yaml                           |  3 ++
 internal/api/handlers/claude/code_handlers.go | 18 +++++++---
 .../handlers/gemini/gemini-cli_handlers.go    | 33 ++++++++++++++----
 .../api/handlers/gemini/gemini_handlers.go    | 34 ++++++++++++++-----
 .../api/handlers/openai/openai_handlers.go    | 32 +++++++++++++----
 internal/api/server.go                        | 16 +++++++--
 internal/config/config.go                     |  3 ++
 internal/logging/request_logger.go            |  9 +++++
 internal/watcher/watcher.go                   |  6 ++++
 11 files changed, 148 insertions(+), 43 deletions(-)

diff --git a/README.md b/README.md
index 264cce1d..cb2b0ace 100644
--- a/README.md
+++ b/README.md
@@ -238,20 +238,21 @@ The server uses a YAML configuration file (`config.yaml`) located in the project
 
 ### Configuration Options
 
-| Parameter                             | Type     | Default            | Description                                                                                  |
-|---------------------------------------|----------|--------------------|----------------------------------------------------------------------------------------------|
-| `port`                                | integer  | 8317               | The port number on which the server will listen                                              |
-| `auth-dir`                            | string   | "~/.cli-proxy-api" | Directory where authentication tokens are stored. Supports using `~` for home directory      |
-| `proxy-url`                           | string   | ""                 | Proxy url, support socks5/http/https protocol, example: socks5://user:pass@192.168.1.1:1080/ |
-| `quota-exceeded`                      | object   | {}                 | Configuration for handling quota exceeded                                                    |
-| `quota-exceeded.switch-project`       | boolean  | true               | Whether to automatically switch to another project when a quota is exceeded                  |
-| `quota-exceeded.switch-preview-model` | boolean  | true               | Whether to automatically switch to a preview model when a quota is exceeded                  |
-| `debug`                               | boolean  | false              | Enable debug mode for verbose logging                                                        |
-| `api-keys`                            | string[] | []                 | List of API keys that can be used to authenticate requests                                   |
-| `generative-language-api-key`         | string[] | []                 | List of Generative Language API keys                                                         |
-| `claude-api-key`                      | object   | {}                 | List of Claude API keys                                                                      |
-| `claude-api-key.api-key`              | string   | ""                 | Claude API key                                                                               |
-| `claude-api-key.base-url`             | string   | ""                 | Custom Claude API endpoint, if you use the third party API endpoint                          |
+| Parameter                             | Type     | Default            | Description                                                                                   |
+|---------------------------------------|----------|--------------------|-----------------------------------------------------------------------------------------------|
+| `port`                                | integer  | 8317               | The port number on which the server will listen                                               |
+| `auth-dir`                            | string   | "~/.cli-proxy-api" | Directory where authentication tokens are stored. Supports using `~` for home directory       |
+| `proxy-url`                           | string   | ""                 | Proxy url, support socks5/http/https protocol, example: socks5://user:pass@192.168.1.1:1080/  |
+| `request-retry`                       | integer  | 0                  | Request retry times, if the response http code is 403, 408, 500, 502, 503, 504, it will retry |
+| `quota-exceeded`                      | object   | {}                 | Configuration for handling quota exceeded                                                     |
+| `quota-exceeded.switch-project`       | boolean  | true               | Whether to automatically switch to another project when a quota is exceeded                   |
+| `quota-exceeded.switch-preview-model` | boolean  | true               | Whether to automatically switch to a preview model when a quota is exceeded                   |
+| `debug`                               | boolean  | false              | Enable debug mode for verbose logging                                                         |
+| `api-keys`                            | string[] | []                 | List of API keys that can be used to authenticate requests                                    |
+| `generative-language-api-key`         | string[] | []                 | List of Generative Language API keys                                                          |
+| `claude-api-key`                      | object   | {}                 | List of Claude API keys                                                                       |
+| `claude-api-key.api-key`              | string   | ""                 | Claude API key                                                                                |
+| `claude-api-key.base-url`             | string   | ""                 | Custom Claude API endpoint, if you use the third party API endpoint                           |
 
 ### Example Configuration File
 
@@ -268,6 +269,9 @@ debug: false
 # Proxy url, support socks5/http/https protocol, example: socks5://user:pass@192.168.1.1:1080/
 proxy-url: ""
 
+# Request retry times, if the response http code is 403, 408, 500, 502, 503, 504, it will retry
+request-retry: 3
+
 # Quota exceeded behavior
 quota-exceeded:
    switch-project: true # Whether to automatically switch to another project when a quota is exceeded
diff --git a/README_CN.md b/README_CN.md
index fb561c5b..4adce5ea 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -240,6 +240,7 @@ console.log(await claudeResponse.json());
 | `port`                                | integer  | 8317               | 服务器监听的端口号                                                              |
 | `auth-dir`                            | string   | "~/.cli-proxy-api" | 存储身份验证令牌的目录。支持使用 `~` 表示主目录                                             |
 | `proxy-url`                           | string   | ""                 | 代理 URL，支持 socks5/http/https 协议，示例：socks5://user:pass@192.168.1.1:1080/ |
+| `request-retry`                       | integer  | 0                  | 请求失败重试次数，如果响应的http代码为403、408、500、502、503、504则会自动重试                     |
 | `quota-exceeded`                      | object   | {}                 | 处理配额超限的配置                                                              |
 | `quota-exceeded.switch-project`       | boolean  | true               | 当配额超限时是否自动切换到另一个项目                                                     |
 | `quota-exceeded.switch-preview-model` | boolean  | true               | 当配额超限时是否自动切换到预览模型                                                      |
@@ -265,6 +266,10 @@ debug: false
 # 代理 URL，支持 socks5/http/https 协议，示例：socks5://user:pass@192.168.1.1:1080/
 proxy-url: ""
 
+# 请求失败重试次数，如果响应的http代码为403、408、500、502、503、504则会自动重试
+request-retry: 3
+
+
 # 配额超限行为
 quota-exceeded:
    switch-project: true # 当配额超限时是否自动切换到另一个项目
diff --git a/config.example.yaml b/config.example.yaml
index 36710fbd..48d88df6 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -4,6 +4,9 @@ auth-dir: "~/.cli-proxy-api"
 debug: true
 proxy-url: ""
 
+# Request retry times, if the response http code is 403, 408, 500, 502, 503, 504, it will retry
+request-retry: 3
+
 # Quota exceeded behavior
 quota-exceeded:
   switch-project: true
diff --git a/internal/api/handlers/claude/code_handlers.go b/internal/api/handlers/claude/code_handlers.go
index 4b59cb70..7260f251 100644
--- a/internal/api/handlers/claude/code_handlers.go
+++ b/internal/api/handlers/claude/code_handlers.go
@@ -122,11 +122,11 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [
 			cliClient.GetRequestMutex().Unlock()
 		}
 	}()
-
+	retryCount := 0
 	// Main client rotation loop with quota management
 	// This loop implements a sophisticated load balancing and failover mechanism
 outLoop:
-	for {
+	for retryCount <= h.Cfg.RequestRetry {
 		var errorResponse *interfaces.ErrorMessage
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
@@ -170,9 +170,17 @@ outLoop:
 				if okError {
 					// Special handling for quota exceeded errors
 					// If configured, attempt to switch to a different project/client
-					if errInfo.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						continue outLoop // Restart the client selection process
-					} else {
+					switch errInfo.StatusCode {
+					case 429:
+						if h.Cfg.QuotaExceeded.SwitchProject {
+							log.Debugf("quota exceeded, switch client")
+							continue outLoop // Restart the client selection process
+						}
+					case 403, 408, 500, 502, 503, 504:
+						log.Debugf("http status code %d, switch client", errInfo.StatusCode)
+						retryCount++
+						continue outLoop
+					default:
 						// Forward other errors directly to the client
 						c.Status(errInfo.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, errInfo.Error.Error())
diff --git a/internal/api/handlers/gemini/gemini-cli_handlers.go b/internal/api/handlers/gemini/gemini-cli_handlers.go
index 82ef3392..9aaad1ea 100644
--- a/internal/api/handlers/gemini/gemini-cli_handlers.go
+++ b/internal/api/handlers/gemini/gemini-cli_handlers.go
@@ -167,8 +167,9 @@ func (h *GeminiCLIAPIHandler) handleInternalStreamGenerateContent(c *gin.Context
 		}
 	}()
 
+	retryCount := 0
 outLoop:
-	for {
+	for retryCount <= h.Cfg.RequestRetry {
 		var errorResponse *interfaces.ErrorMessage
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
@@ -205,9 +206,18 @@ outLoop:
 			// Handle errors from the backend.
 			case err, okError := <-errChan:
 				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+					switch err.StatusCode {
+					case 429:
+						if h.Cfg.QuotaExceeded.SwitchProject {
+							log.Debugf("quota exceeded, switch client")
+							continue outLoop // Restart the client selection process
+						}
+					case 403, 408, 500, 502, 503, 504:
+						log.Debugf("http status code %d, switch client", err.StatusCode)
+						retryCount++
 						continue outLoop
-					} else {
+					default:
+						// Forward other errors directly to the client
 						c.Status(err.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
 						flusher.Flush()
@@ -238,7 +248,8 @@ func (h *GeminiCLIAPIHandler) handleInternalGenerateContent(c *gin.Context, rawJ
 		}
 	}()
 
-	for {
+	retryCount := 0
+	for retryCount <= h.Cfg.RequestRetry {
 		var errorResponse *interfaces.ErrorMessage
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
@@ -250,12 +261,20 @@ func (h *GeminiCLIAPIHandler) handleInternalGenerateContent(c *gin.Context, rawJ
 
 		resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, "")
 		if err != nil {
-			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+			switch err.StatusCode {
+			case 429:
+				if h.Cfg.QuotaExceeded.SwitchProject {
+					log.Debugf("quota exceeded, switch client")
+					continue // Restart the client selection process
+				}
+			case 403, 408, 500, 502, 503, 504:
+				log.Debugf("http status code %d, switch client", err.StatusCode)
+				retryCount++
 				continue
-			} else {
+			default:
+				// Forward other errors directly to the client
 				c.Status(err.StatusCode)
 				_, _ = c.Writer.Write([]byte(err.Error.Error()))
-				// log.Debugf("code: %d, error: %s", err.StatusCode, err.Error.Error())
 				cliCancel(err.Error)
 			}
 			break
diff --git a/internal/api/handlers/gemini/gemini_handlers.go b/internal/api/handlers/gemini/gemini_handlers.go
index 258987a0..759f372f 100644
--- a/internal/api/handlers/gemini/gemini_handlers.go
+++ b/internal/api/handlers/gemini/gemini_handlers.go
@@ -270,8 +270,9 @@ func (h *GeminiAPIHandler) handleStreamGenerateContent(c *gin.Context, modelName
 		}
 	}()
 
+	retryCount := 0
 outLoop:
-	for {
+	for retryCount <= h.Cfg.RequestRetry {
 		var errorResponse *interfaces.ErrorMessage
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
@@ -311,11 +312,18 @@ outLoop:
 			// Handle errors from the backend.
 			case err, okError := <-errChan:
 				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
-						log.Debugf("quota exceeded, switch client")
+					switch err.StatusCode {
+					case 429:
+						if h.Cfg.QuotaExceeded.SwitchProject {
+							log.Debugf("quota exceeded, switch client")
+							continue outLoop // Restart the client selection process
+						}
+					case 403, 408, 500, 502, 503, 504:
+						log.Debugf("http status code %d, switch client", err.StatusCode)
+						retryCount++
 						continue outLoop
-					} else {
-						// log.Debugf("error code :%d, error: %v", err.StatusCode, err.Error.Error())
+					default:
+						// Forward other errors directly to the client
 						c.Status(err.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
 						flusher.Flush()
@@ -402,7 +410,8 @@ func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName strin
 		}
 	}()
 
-	for {
+	retryCount := 0
+	for retryCount <= h.Cfg.RequestRetry {
 		var errorResponse *interfaces.ErrorMessage
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
@@ -414,9 +423,18 @@ func (h *GeminiAPIHandler) handleGenerateContent(c *gin.Context, modelName strin
 
 		resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, alt)
 		if err != nil {
-			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+			switch err.StatusCode {
+			case 429:
+				if h.Cfg.QuotaExceeded.SwitchProject {
+					log.Debugf("quota exceeded, switch client")
+					continue // Restart the client selection process
+				}
+			case 403, 408, 500, 502, 503, 504:
+				log.Debugf("http status code %d, switch client", err.StatusCode)
+				retryCount++
 				continue
-			} else {
+			default:
+				// Forward other errors directly to the client
 				c.Status(err.StatusCode)
 				_, _ = c.Writer.Write([]byte(err.Error.Error()))
 				cliCancel(err.Error)
diff --git a/internal/api/handlers/openai/openai_handlers.go b/internal/api/handlers/openai/openai_handlers.go
index 485b6827..aebe25b9 100644
--- a/internal/api/handlers/openai/openai_handlers.go
+++ b/internal/api/handlers/openai/openai_handlers.go
@@ -183,7 +183,8 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []
 		}
 	}()
 
-	for {
+	retryCount := 0
+	for retryCount <= h.Cfg.RequestRetry {
 		var errorResponse *interfaces.ErrorMessage
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
@@ -195,9 +196,18 @@ func (h *OpenAIAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSON []
 
 		resp, err := cliClient.SendRawMessage(cliCtx, modelName, rawJSON, "")
 		if err != nil {
-			if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+			switch err.StatusCode {
+			case 429:
+				if h.Cfg.QuotaExceeded.SwitchProject {
+					log.Debugf("quota exceeded, switch client")
+					continue // Restart the client selection process
+				}
+			case 403, 408, 500, 502, 503, 504:
+				log.Debugf("http status code %d, switch client", err.StatusCode)
+				retryCount++
 				continue
-			} else {
+			default:
+				// Forward other errors directly to the client
 				c.Status(err.StatusCode)
 				_, _ = c.Writer.Write([]byte(err.Error.Error()))
 				cliCancel(err.Error)
@@ -247,8 +257,9 @@ func (h *OpenAIAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byt
 		}
 	}()
 
+	retryCount := 0
 outLoop:
-	for {
+	for retryCount <= h.Cfg.RequestRetry {
 		var errorResponse *interfaces.ErrorMessage
 		cliClient, errorResponse = h.GetClient(modelName)
 		if errorResponse != nil {
@@ -286,9 +297,18 @@ outLoop:
 			// Handle errors from the backend.
 			case err, okError := <-errChan:
 				if okError {
-					if err.StatusCode == 429 && h.Cfg.QuotaExceeded.SwitchProject {
+					switch err.StatusCode {
+					case 429:
+						if h.Cfg.QuotaExceeded.SwitchProject {
+							log.Debugf("quota exceeded, switch client")
+							continue outLoop // Restart the client selection process
+						}
+					case 403, 408, 500, 502, 503, 504:
+						log.Debugf("http status code %d, switch client", err.StatusCode)
+						retryCount++
 						continue outLoop
-					} else {
+					default:
+						// Forward other errors directly to the client
 						c.Status(err.StatusCode)
 						_, _ = fmt.Fprint(c.Writer, err.Error.Error())
 						flusher.Flush()
diff --git a/internal/api/server.go b/internal/api/server.go
index 8d3579f9..374a25ea 100644
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -37,6 +37,9 @@ type Server struct {
 
 	// cfg holds the current server configuration.
 	cfg *config.Config
+
+	// requestLogger is the request logger instance for dynamic configuration updates.
+	requestLogger *logging.FileRequestLogger
 }
 
 // NewServer creates and initializes a new API server instance.
@@ -69,9 +72,10 @@ func NewServer(cfg *config.Config, cliClients []interfaces.Client) *Server {
 
 	// Create server instance
 	s := &Server{
-		engine:   engine,
-		handlers: handlers.NewBaseAPIHandlers(cliClients, cfg),
-		cfg:      cfg,
+		engine:        engine,
+		handlers:      handlers.NewBaseAPIHandlers(cliClients, cfg),
+		cfg:           cfg,
+		requestLogger: requestLogger,
 	}
 
 	// Setup routes
@@ -189,6 +193,12 @@ func corsMiddleware() gin.HandlerFunc {
 //   - clients: The new slice of AI service clients
 //   - cfg: The new application configuration
 func (s *Server) UpdateClients(clients []interfaces.Client, cfg *config.Config) {
+	// Update request logger enabled state if it has changed
+	if s.requestLogger != nil && s.cfg.RequestLog != cfg.RequestLog {
+		s.requestLogger.SetEnabled(cfg.RequestLog)
+		log.Debugf("request logging updated from %t to %t", s.cfg.RequestLog, cfg.RequestLog)
+	}
+
 	s.cfg = cfg
 	s.handlers.UpdateClients(clients, cfg)
 	log.Infof("server clients and configuration updated: %d clients", len(clients))
diff --git a/internal/config/config.go b/internal/config/config.go
index d3a7cd8b..82f2461d 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -37,6 +37,9 @@ type Config struct {
 	// RequestLog enables or disables detailed request logging functionality.
 	RequestLog bool `yaml:"request-log"`
 
+	// RequestRetry defines the retry times when the request failed.
+	RequestRetry int `yaml:"request-retry"`
+
 	ClaudeKey []ClaudeKey `yaml:"claude-api-key"`
 }
 
diff --git a/internal/logging/request_logger.go b/internal/logging/request_logger.go
index 444c33f3..4779c62a 100644
--- a/internal/logging/request_logger.go
+++ b/internal/logging/request_logger.go
@@ -115,6 +115,15 @@ func (l *FileRequestLogger) IsEnabled() bool {
 	return l.enabled
 }
 
+// SetEnabled updates the request logging enabled state.
+// This method allows dynamic enabling/disabling of request logging.
+//
+// Parameters:
+//   - enabled: Whether request logging should be enabled
+func (l *FileRequestLogger) SetEnabled(enabled bool) {
+	l.enabled = enabled
+}
+
 // LogRequest logs a complete non-streaming request/response cycle to a file.
 //
 // Parameters:
diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go
index a5ab2ed9..21eb79b2 100644
--- a/internal/watcher/watcher.go
+++ b/internal/watcher/watcher.go
@@ -169,6 +169,12 @@ func (w *Watcher) reloadConfig() {
 		if oldConfig.ProxyURL != newConfig.ProxyURL {
 			log.Debugf("  proxy-url: %s -> %s", oldConfig.ProxyURL, newConfig.ProxyURL)
 		}
+		if oldConfig.RequestLog != newConfig.RequestLog {
+			log.Debugf("  request-log: %t -> %t", oldConfig.RequestLog, newConfig.RequestLog)
+		}
+		if oldConfig.RequestRetry != newConfig.RequestRetry {
+			log.Debugf("  request-retry: %d -> %d", oldConfig.RequestRetry, newConfig.RequestRetry)
+		}
 		if len(oldConfig.APIKeys) != len(newConfig.APIKeys) {
 			log.Debugf("  api-keys count: %d -> %d", len(oldConfig.APIKeys), len(newConfig.APIKeys))
 		}