Compare commits

...

19 Commits

Author SHA1 Message Date
Luis Pater
188de4ff2a Merge branch 'router-for-me:main' into main 2025-12-15 00:31:29 +08:00
Luis Pater
5a75ef8ffd Merge pull request #536 from AoaoMH/feature/auth-model-check
feat: using Client Model Infos;
2025-12-15 00:29:33 +08:00
Test
07279f8746 feat: using Client Model Infos; 2025-12-15 00:13:05 +08:00
Luis Pater
71f788b13a fix(registry): remove unused ThinkingSupport from DeepSeek-R1 model 2025-12-14 21:30:17 +08:00
Luis Pater
59c62dc580 fix(registry): correct DeepSeek-V3.2 experimental model ID 2025-12-14 21:27:43 +08:00
Luis Pater
8fb1f114bc Merge pull request #25 from Ravens2121/master
fix(kiro): Always parse thinking tags from Kiro API responses
2025-12-14 20:29:14 +08:00
Luis Pater
6a4cff6699 Merge branch 'router-for-me:main' into main 2025-12-14 17:28:28 +08:00
Luis Pater
d5310a3300 Merge pull request #531 from AoaoMH/feature/auth-model-check
feat: add API endpoint to query models for auth credentials
2025-12-14 16:46:43 +08:00
Ravens2121
de0ea3ac49 fix(kiro): Always parse thinking tags from Kiro API responses
Amp-Thread-ID: https://ampcode.com/threads/T-019b1c00-17b4-713d-a8cc-813b71181934
Co-authored-by: Amp <amp@ampcode.com>
2025-12-14 16:46:17 +08:00
Ravens2121
12116b018d Merge branch 'router-for-me:main' into master 2025-12-14 16:42:30 +08:00
Ravens2121
c3ed3b40ea feat(kiro): Add token usage cross-validation and simplify thinking mode handling 2025-12-14 16:40:33 +08:00
Luis Pater
b80c2aabb0 Merge branch 'router-for-me:main' into main 2025-12-14 16:19:29 +08:00
Luis Pater
f0a3eb574e fix(registry): update DeepSeek model definitions with new IDs and descriptions 2025-12-14 16:17:11 +08:00
Test
bb15855443 feat: add API endpoint to query models for auth credentials 2025-12-14 15:16:26 +08:00
Luis Pater
14ce6aebd1 Merge pull request #449 from sususu98/fix/gemini-cli-429-retry-delay-parsing
fix(gemini-cli): enhance 429 retry delay parsing
2025-12-14 14:04:14 +08:00
Luis Pater
2fe83723f2 Merge pull request #515 from teeverc/fix/response-rewriter-streaming-flush
fix(amp): flush response buffer after each streaming chunk write
2025-12-14 13:26:05 +08:00
teeverc
cd8c86c6fb refactor: only flush stream response on successful write 2025-12-13 13:32:54 -08:00
teeverc
52d5fd1a67 fix: streaming for amp cli 2025-12-13 13:17:53 -08:00
sususu
07d21463ca fix(gemini-cli): enhance 429 retry delay parsing
Add fallback parsing for quota reset delay when RetryInfo is not present:
- Try ErrorInfo.metadata.quotaResetDelay (e.g., "373.801628ms")
- Parse from error.message "Your quota will reset after Xs."

This ensures proper cooldown timing for rate-limited requests.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 09:34:39 +08:00
9 changed files with 187 additions and 47 deletions

View File

@@ -30,6 +30,7 @@ import (
"github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen"
"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
sdkAuth "github.com/router-for-me/CLIProxyAPI/v6/sdk/auth"
coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
@@ -293,6 +294,54 @@ func (h *Handler) ListAuthFiles(c *gin.Context) {
c.JSON(200, gin.H{"files": files})
}
// GetAuthFileModels returns the models supported by a specific auth file
func (h *Handler) GetAuthFileModels(c *gin.Context) {
name := c.Query("name")
if name == "" {
c.JSON(400, gin.H{"error": "name is required"})
return
}
// Try to find auth ID via authManager
var authID string
if h.authManager != nil {
auths := h.authManager.List()
for _, auth := range auths {
if auth.FileName == name || auth.ID == name {
authID = auth.ID
break
}
}
}
if authID == "" {
authID = name // fallback to filename as ID
}
// Get models from registry
reg := registry.GetGlobalRegistry()
models := reg.GetModelsForClient(authID)
result := make([]gin.H, 0, len(models))
for _, m := range models {
entry := gin.H{
"id": m.ID,
}
if m.DisplayName != "" {
entry["display_name"] = m.DisplayName
}
if m.Type != "" {
entry["type"] = m.Type
}
if m.OwnedBy != "" {
entry["owned_by"] = m.OwnedBy
}
result = append(result, entry)
}
c.JSON(200, gin.H{"models": result})
}
// List auth files from disk when the auth manager is unavailable.
func (h *Handler) listAuthFilesFromDisk(c *gin.Context) {
entries, err := os.ReadDir(h.cfg.AuthDir)

View File

@@ -95,7 +95,13 @@ func (rw *ResponseRewriter) Write(data []byte) (int, error) {
}
if rw.isStreaming {
return rw.ResponseWriter.Write(rw.rewriteStreamChunk(data))
n, err := rw.ResponseWriter.Write(rw.rewriteStreamChunk(data))
if err == nil {
if flusher, ok := rw.ResponseWriter.(http.Flusher); ok {
flusher.Flush()
}
}
return n, err
}
return rw.body.Write(data)
}

View File

@@ -586,6 +586,7 @@ func (s *Server) registerManagementRoutes() {
mgmt.DELETE("/oauth-excluded-models", s.mgmt.DeleteOAuthExcludedModels)
mgmt.GET("/auth-files", s.mgmt.ListAuthFiles)
mgmt.GET("/auth-files/models", s.mgmt.GetAuthFileModels)
mgmt.GET("/auth-files/download", s.mgmt.DownloadAuthFile)
mgmt.POST("/auth-files", s.mgmt.UploadAuthFile)
mgmt.DELETE("/auth-files", s.mgmt.DeleteAuthFile)

View File

@@ -648,10 +648,11 @@ func GetIFlowModels() []*ModelInfo {
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Chat", Created: 1764576000},
{ID: "deepseek-v3.2-reasoner", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2 Reasoner", Created: 1764576000},
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},

View File

@@ -90,6 +90,9 @@ type ModelRegistry struct {
models map[string]*ModelRegistration
// clientModels maps client ID to the models it provides
clientModels map[string][]string
// clientModelInfos maps client ID to a map of model ID -> ModelInfo
// This preserves the original model info provided by each client
clientModelInfos map[string]map[string]*ModelInfo
// clientProviders maps client ID to its provider identifier
clientProviders map[string]string
// mutex ensures thread-safe access to the registry
@@ -104,10 +107,11 @@ var registryOnce sync.Once
func GetGlobalRegistry() *ModelRegistry {
registryOnce.Do(func() {
globalRegistry = &ModelRegistry{
models: make(map[string]*ModelRegistration),
clientModels: make(map[string][]string),
clientProviders: make(map[string]string),
mutex: &sync.RWMutex{},
models: make(map[string]*ModelRegistration),
clientModels: make(map[string][]string),
clientModelInfos: make(map[string]map[string]*ModelInfo),
clientProviders: make(map[string]string),
mutex: &sync.RWMutex{},
}
})
return globalRegistry
@@ -144,6 +148,7 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
// No models supplied; unregister existing client state if present.
r.unregisterClientInternal(clientID)
delete(r.clientModels, clientID)
delete(r.clientModelInfos, clientID)
delete(r.clientProviders, clientID)
misc.LogCredentialSeparator()
return
@@ -152,7 +157,7 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
now := time.Now()
oldModels, hadExisting := r.clientModels[clientID]
oldProvider, _ := r.clientProviders[clientID]
oldProvider := r.clientProviders[clientID]
providerChanged := oldProvider != provider
if !hadExisting {
// Pure addition path.
@@ -161,6 +166,12 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
r.addModelRegistration(modelID, provider, model, now)
}
r.clientModels[clientID] = append([]string(nil), rawModelIDs...)
// Store client's own model infos
clientInfos := make(map[string]*ModelInfo, len(newModels))
for id, m := range newModels {
clientInfos[id] = cloneModelInfo(m)
}
r.clientModelInfos[clientID] = clientInfos
if provider != "" {
r.clientProviders[clientID] = provider
} else {
@@ -287,6 +298,12 @@ func (r *ModelRegistry) RegisterClient(clientID, clientProvider string, models [
if len(rawModelIDs) > 0 {
r.clientModels[clientID] = append([]string(nil), rawModelIDs...)
}
// Update client's own model infos
clientInfos := make(map[string]*ModelInfo, len(newModels))
for id, m := range newModels {
clientInfos[id] = cloneModelInfo(m)
}
r.clientModelInfos[clientID] = clientInfos
if provider != "" {
r.clientProviders[clientID] = provider
} else {
@@ -436,6 +453,7 @@ func (r *ModelRegistry) unregisterClientInternal(clientID string) {
}
delete(r.clientModels, clientID)
delete(r.clientModelInfos, clientID)
if hasProvider {
delete(r.clientProviders, clientID)
}
@@ -885,3 +903,44 @@ func (r *ModelRegistry) GetFirstAvailableModel(handlerType string) (string, erro
return "", fmt.Errorf("no available clients for any model in handler type: %s", handlerType)
}
// GetModelsForClient returns the models registered for a specific client.
// Parameters:
// - clientID: The client identifier (typically auth file name or auth ID)
//
// Returns:
// - []*ModelInfo: List of models registered for this client, nil if client not found
func (r *ModelRegistry) GetModelsForClient(clientID string) []*ModelInfo {
r.mutex.RLock()
defer r.mutex.RUnlock()
modelIDs, exists := r.clientModels[clientID]
if !exists || len(modelIDs) == 0 {
return nil
}
// Try to use client-specific model infos first
clientInfos := r.clientModelInfos[clientID]
seen := make(map[string]struct{})
result := make([]*ModelInfo, 0, len(modelIDs))
for _, modelID := range modelIDs {
if _, dup := seen[modelID]; dup {
continue
}
seen[modelID] = struct{}{}
// Prefer client's own model info to preserve original type/owned_by
if clientInfos != nil {
if info, ok := clientInfos[modelID]; ok && info != nil {
result = append(result, info)
continue
}
}
// Fallback to global registry (for backwards compatibility)
if reg, ok := r.models[modelID]; ok && reg.Info != nil {
result = append(result, reg.Info)
}
}
return result
}

View File

@@ -11,6 +11,8 @@ import (
"fmt"
"io"
"net/http"
"regexp"
"strconv"
"strings"
"time"
@@ -784,20 +786,45 @@ func parseRetryDelay(errorBody []byte) (*time.Duration, error) {
// Try to parse the retryDelay from the error response
// Format: error.details[].retryDelay where @type == "type.googleapis.com/google.rpc.RetryInfo"
details := gjson.GetBytes(errorBody, "error.details")
if !details.Exists() || !details.IsArray() {
return nil, fmt.Errorf("no error.details found")
if details.Exists() && details.IsArray() {
for _, detail := range details.Array() {
typeVal := detail.Get("@type").String()
if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
retryDelay := detail.Get("retryDelay").String()
if retryDelay != "" {
// Parse duration string like "0.847655010s"
duration, err := time.ParseDuration(retryDelay)
if err != nil {
return nil, fmt.Errorf("failed to parse duration")
}
return &duration, nil
}
}
}
// Fallback: try ErrorInfo.metadata.quotaResetDelay (e.g., "373.801628ms")
for _, detail := range details.Array() {
typeVal := detail.Get("@type").String()
if typeVal == "type.googleapis.com/google.rpc.ErrorInfo" {
quotaResetDelay := detail.Get("metadata.quotaResetDelay").String()
if quotaResetDelay != "" {
duration, err := time.ParseDuration(quotaResetDelay)
if err == nil {
return &duration, nil
}
}
}
}
}
for _, detail := range details.Array() {
typeVal := detail.Get("@type").String()
if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
retryDelay := detail.Get("retryDelay").String()
if retryDelay != "" {
// Parse duration string like "0.847655010s"
duration, err := time.ParseDuration(retryDelay)
if err != nil {
return nil, fmt.Errorf("failed to parse duration")
}
// Fallback: parse from error.message "Your quota will reset after Xs."
message := gjson.GetBytes(errorBody, "error.message").String()
if message != "" {
re := regexp.MustCompile(`after\s+(\d+)s\.?`)
if matches := re.FindStringSubmatch(message); len(matches) > 1 {
seconds, err := strconv.Atoi(matches[1])
if err == nil {
duration := time.Duration(seconds) * time.Second
return &duration, nil
}
}

View File

@@ -166,7 +166,8 @@ type KiroExecutor struct {
// This is critical because OpenAI and Claude formats have different tool structures:
// - OpenAI: tools[].function.name, tools[].function.description
// - Claude: tools[].name, tools[].description
func buildKiroPayloadForFormat(body []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, sourceFormat sdktranslator.Format) []byte {
// Returns the serialized JSON payload and a boolean indicating whether thinking mode was injected.
func buildKiroPayloadForFormat(body []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool, sourceFormat sdktranslator.Format) ([]byte, bool) {
switch sourceFormat.String() {
case "openai":
log.Debugf("kiro: using OpenAI payload builder for source format: %s", sourceFormat.String())
@@ -248,7 +249,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
// Rebuild payload with the correct origin for this endpoint
// Each endpoint requires its matching Origin value in the request body
kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
log.Debugf("kiro: trying endpoint %d/%d: %s (Name: %s, Origin: %s)",
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
@@ -358,7 +359,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
// Rebuild payload with new profile ARN if changed
kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
log.Infof("kiro: token refreshed successfully, retrying request")
continue
}
@@ -415,7 +416,7 @@ func (e *KiroExecutor) executeWithRetry(ctx context.Context, auth *cliproxyauth.
if refreshedAuth != nil {
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
log.Infof("kiro: token refreshed for 403, retrying request")
continue
}
@@ -554,7 +555,10 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
// Rebuild payload with the correct origin for this endpoint
// Each endpoint requires its matching Origin value in the request body
kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
// Kiro API always returns <thinking> tags regardless of whether thinking mode was requested
// So we always enable thinking parsing for Kiro responses
thinkingEnabled := true
log.Debugf("kiro: stream trying endpoint %d/%d: %s (Name: %s, Origin: %s)",
endpointIdx+1, len(endpointConfigs), url, endpointConfig.Name, currentOrigin)
@@ -677,7 +681,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
// Rebuild payload with new profile ARN if changed
kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
log.Infof("kiro: token refreshed successfully, retrying stream request")
continue
}
@@ -734,7 +738,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
if refreshedAuth != nil {
auth = refreshedAuth
accessToken, profileArn = kiroCredentials(auth)
kiroPayload = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
kiroPayload, _ = buildKiroPayloadForFormat(body, kiroModelID, profileArn, currentOrigin, isAgentic, isChatOnly, from)
log.Infof("kiro: token refreshed for 403, retrying stream request")
continue
}
@@ -758,7 +762,7 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
out := make(chan cliproxyexecutor.StreamChunk)
go func(resp *http.Response) {
go func(resp *http.Response, thinkingEnabled bool) {
defer close(out)
defer func() {
if r := recover(); r != nil {
@@ -772,21 +776,12 @@ func (e *KiroExecutor) executeStreamWithRetry(ctx context.Context, auth *cliprox
}
}()
// Check if thinking mode was enabled in the original request
// Only parse <thinking> tags when thinking was explicitly requested
// Check multiple sources: original request, pre-translation payload, and translated body
// This handles different client formats (Claude API, OpenAI, AMP/Cursor)
thinkingEnabled := kiroclaude.IsThinkingEnabled(opts.OriginalRequest)
if !thinkingEnabled {
thinkingEnabled = kiroclaude.IsThinkingEnabled(req.Payload)
}
if !thinkingEnabled {
thinkingEnabled = kiroclaude.IsThinkingEnabled(body)
}
log.Debugf("kiro: stream thinkingEnabled = %v", thinkingEnabled)
// Kiro API always returns <thinking> tags regardless of request parameters
// So we always enable thinking parsing for Kiro responses
log.Debugf("kiro: stream thinkingEnabled = %v (always true for Kiro)", thinkingEnabled)
e.streamToChannel(ctx, resp.Body, out, from, req.Model, opts.OriginalRequest, body, reporter, thinkingEnabled)
}(httpResp)
}(httpResp, thinkingEnabled)
return out, nil
}

View File

@@ -135,7 +135,8 @@ func ConvertClaudeRequestToKiro(modelName string, inputRawJSON []byte, stream bo
// isAgentic parameter enables chunked write optimization prompt for -agentic model variants.
// isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode).
// Supports thinking mode - when Claude API thinking parameter is present, injects thinkingHint.
func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte {
// Returns the payload and a boolean indicating whether thinking mode was injected.
func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) ([]byte, bool) {
// Extract max_tokens for potential use in inferenceConfig
// Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
const kiroMaxOutputTokens = 32000
@@ -307,10 +308,10 @@ func BuildKiroPayload(claudeBody []byte, modelID, profileArn, origin string, isA
result, err := json.Marshal(payload)
if err != nil {
log.Debugf("kiro: failed to marshal payload: %v", err)
return nil
return nil, false
}
return result
return result, thinkingEnabled
}
// normalizeOrigin normalizes origin value for Kiro API compatibility

View File

@@ -133,7 +133,8 @@ func ConvertOpenAIRequestToKiro(modelName string, inputRawJSON []byte, stream bo
// origin parameter determines which quota to use: "CLI" for Amazon Q, "AI_EDITOR" for Kiro IDE.
// isAgentic parameter enables chunked write optimization prompt for -agentic model variants.
// isChatOnly parameter disables tool calling for -chat model variants (pure conversation mode).
func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) []byte {
// Returns the payload and a boolean indicating whether thinking mode was injected.
func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin string, isAgentic, isChatOnly bool) ([]byte, bool) {
// Extract max_tokens for potential use in inferenceConfig
// Handle -1 as "use maximum" (Kiro max output is ~32000 tokens)
const kiroMaxOutputTokens = 32000
@@ -311,10 +312,10 @@ func BuildKiroPayloadFromOpenAI(openaiBody []byte, modelID, profileArn, origin s
result, err := json.Marshal(payload)
if err != nil {
log.Debugf("kiro-openai: failed to marshal payload: %v", err)
return nil
return nil, false
}
return result
return result, thinkingEnabled
}
// normalizeOrigin normalizes origin value for Kiro API compatibility