diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 7609a68b..a435ba7c 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -16,6 +16,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + - name: Refresh models catalog + run: curl -fsSL https://raw.githubusercontent.com/router-for-me/models/refs/heads/main/models.json -o internal/registry/models/models.json - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Login to DockerHub @@ -47,6 +49,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + - name: Refresh models catalog + run: curl -fsSL https://raw.githubusercontent.com/router-for-me/models/refs/heads/main/models.json -o internal/registry/models/models.json - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Login to DockerHub diff --git a/.github/workflows/pr-test-build.yml b/.github/workflows/pr-test-build.yml index 477ff049..b24b1fcb 100644 --- a/.github/workflows/pr-test-build.yml +++ b/.github/workflows/pr-test-build.yml @@ -12,6 +12,8 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + - name: Refresh models catalog + run: curl -fsSL https://raw.githubusercontent.com/router-for-me/models/refs/heads/main/models.json -o internal/registry/models/models.json - name: Set up Go uses: actions/setup-go@v5 with: diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 04ec21a9..dd63c508 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -16,6 +16,8 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Refresh models catalog + run: curl -fsSL https://raw.githubusercontent.com/router-for-me/models/refs/heads/main/models.json -o internal/registry/models/models.json - run: git fetch --force --tags - uses: actions/setup-go@v4 with: diff --git a/cmd/server/main.go b/cmd/server/main.go index 9a204ebb..4148cd06 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -25,6 +25,7 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/internal/logging" "github.com/router-for-me/CLIProxyAPI/v6/internal/managementasset" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/store" _ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator" "github.com/router-for-me/CLIProxyAPI/v6/internal/tui" @@ -573,6 +574,7 @@ func main() { if standalone { // Standalone mode: start an embedded local server and connect TUI client to it. managementasset.StartAutoUpdater(context.Background(), configFilePath) + registry.StartModelsUpdater(context.Background()) hook := tui.NewLogHook(2000) hook.SetFormatter(&logging.LogFormatter{}) log.AddHook(hook) @@ -643,15 +645,16 @@ func main() { } } } else { - // Start the main proxy service - managementasset.StartAutoUpdater(context.Background(), configFilePath) + // Start the main proxy service + managementasset.StartAutoUpdater(context.Background(), configFilePath) + registry.StartModelsUpdater(context.Background()) - if cfg.AuthDir != "" { - kiro.InitializeAndStart(cfg.AuthDir, cfg) - defer kiro.StopGlobalRefreshManager() - } + if cfg.AuthDir != "" { + kiro.InitializeAndStart(cfg.AuthDir, cfg) + defer kiro.StopGlobalRefreshManager() + } - cmd.StartService(cfg, configFilePath, password) + cmd.StartService(cfg, configFilePath, password) } } } diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index d7a6d75b..3ec611e3 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -1,5 +1,5 @@ // Package registry provides model definitions and lookup helpers for various AI providers. -// Static model metadata is stored in model_definitions_static_data.go. +// Static model metadata is loaded from the embedded models.json file and can be refreshed from network. package registry import ( @@ -7,6 +7,131 @@ import ( "strings" ) +// AntigravityModelConfig captures static antigravity model overrides, including +// Thinking budget limits and provider max completion tokens. +type AntigravityModelConfig struct { + Thinking *ThinkingSupport `json:"thinking,omitempty"` + MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` +} + +// staticModelsJSON mirrors the top-level structure of models.json. +type staticModelsJSON struct { + Claude []*ModelInfo `json:"claude"` + Gemini []*ModelInfo `json:"gemini"` + Vertex []*ModelInfo `json:"vertex"` + GeminiCLI []*ModelInfo `json:"gemini-cli"` + AIStudio []*ModelInfo `json:"aistudio"` + CodexFree []*ModelInfo `json:"codex-free"` + CodexTeam []*ModelInfo `json:"codex-team"` + CodexPlus []*ModelInfo `json:"codex-plus"` + CodexPro []*ModelInfo `json:"codex-pro"` + Qwen []*ModelInfo `json:"qwen"` + IFlow []*ModelInfo `json:"iflow"` + Kimi []*ModelInfo `json:"kimi"` + Antigravity map[string]*AntigravityModelConfig `json:"antigravity"` +} + +// GetClaudeModels returns the standard Claude model definitions. +func GetClaudeModels() []*ModelInfo { + return cloneModelInfos(getModels().Claude) +} + +// GetGeminiModels returns the standard Gemini model definitions. +func GetGeminiModels() []*ModelInfo { + return cloneModelInfos(getModels().Gemini) +} + +// GetGeminiVertexModels returns Gemini model definitions for Vertex AI. +func GetGeminiVertexModels() []*ModelInfo { + return cloneModelInfos(getModels().Vertex) +} + +// GetGeminiCLIModels returns Gemini model definitions for the Gemini CLI. +func GetGeminiCLIModels() []*ModelInfo { + return cloneModelInfos(getModels().GeminiCLI) +} + +// GetAIStudioModels returns model definitions for AI Studio. +func GetAIStudioModels() []*ModelInfo { + return cloneModelInfos(getModels().AIStudio) +} + +// GetCodexFreeModels returns model definitions for the Codex free plan tier. +func GetCodexFreeModels() []*ModelInfo { + return cloneModelInfos(getModels().CodexFree) +} + +// GetCodexTeamModels returns model definitions for the Codex team plan tier. +func GetCodexTeamModels() []*ModelInfo { + return cloneModelInfos(getModels().CodexTeam) +} + +// GetCodexPlusModels returns model definitions for the Codex plus plan tier. +func GetCodexPlusModels() []*ModelInfo { + return cloneModelInfos(getModels().CodexPlus) +} + +// GetCodexProModels returns model definitions for the Codex pro plan tier. +func GetCodexProModels() []*ModelInfo { + return cloneModelInfos(getModels().CodexPro) +} + +// GetQwenModels returns the standard Qwen model definitions. +func GetQwenModels() []*ModelInfo { + return cloneModelInfos(getModels().Qwen) +} + +// GetIFlowModels returns the standard iFlow model definitions. +func GetIFlowModels() []*ModelInfo { + return cloneModelInfos(getModels().IFlow) +} + +// GetKimiModels returns the standard Kimi (Moonshot AI) model definitions. +func GetKimiModels() []*ModelInfo { + return cloneModelInfos(getModels().Kimi) +} + +// GetAntigravityModelConfig returns static configuration for antigravity models. +// Keys use upstream model names returned by the Antigravity models endpoint. +func GetAntigravityModelConfig() map[string]*AntigravityModelConfig { + data := getModels() + if len(data.Antigravity) == 0 { + return nil + } + out := make(map[string]*AntigravityModelConfig, len(data.Antigravity)) + for k, v := range data.Antigravity { + out[k] = cloneAntigravityModelConfig(v) + } + return out +} + +func cloneAntigravityModelConfig(cfg *AntigravityModelConfig) *AntigravityModelConfig { + if cfg == nil { + return nil + } + copyConfig := *cfg + if cfg.Thinking != nil { + copyThinking := *cfg.Thinking + if len(cfg.Thinking.Levels) > 0 { + copyThinking.Levels = append([]string(nil), cfg.Thinking.Levels...) + } + copyConfig.Thinking = ©Thinking + } + return ©Config +} + +// cloneModelInfos returns a shallow copy of the slice with each element deep-cloned. +func cloneModelInfos(models []*ModelInfo) []*ModelInfo { + if len(models) == 0 { + return nil + } + out := make([]*ModelInfo, len(models)) + for i, m := range models { + out[i] = cloneModelInfo(m) + } + return out +} + // GetStaticModelDefinitionsByChannel returns static model definitions for a given channel/provider. // It returns nil when the channel is unknown. // @@ -39,7 +164,7 @@ func GetStaticModelDefinitionsByChannel(channel string) []*ModelInfo { case "aistudio": return GetAIStudioModels() case "codex": - return GetOpenAIModels() + return GetCodexProModels() case "qwen": return GetQwenModels() case "iflow": @@ -89,16 +214,17 @@ func LookupStaticModelInfo(modelID string) *ModelInfo { return nil } + data := getModels() allModels := [][]*ModelInfo{ - GetClaudeModels(), - GetGeminiModels(), - GetGeminiVertexModels(), - GetGeminiCLIModels(), - GetAIStudioModels(), - GetOpenAIModels(), - GetQwenModels(), - GetIFlowModels(), - GetKimiModels(), + data.Claude, + data.Gemini, + data.Vertex, + data.GeminiCLI, + data.AIStudio, + data.CodexPro, + data.Qwen, + data.IFlow, + data.Kimi, GetGitHubCopilotModels(), GetKiroModels(), GetKiloModels(), @@ -107,13 +233,13 @@ func LookupStaticModelInfo(modelID string) *ModelInfo { for _, models := range allModels { for _, m := range models { if m != nil && m.ID == modelID { - return m + return cloneModelInfo(m) } } } // Check Antigravity static config - if cfg := GetAntigravityModelConfig()[modelID]; cfg != nil { + if cfg := cloneAntigravityModelConfig(data.Antigravity[modelID]); cfg != nil { return &ModelInfo{ ID: modelID, Thinking: cfg.Thinking, diff --git a/internal/registry/model_definitions_static_data.go b/internal/registry/model_definitions_static_data.go deleted file mode 100644 index 7756fcc5..00000000 --- a/internal/registry/model_definitions_static_data.go +++ /dev/null @@ -1,1116 +0,0 @@ -// Package registry provides model definitions for various AI service providers. -// This file stores the static model metadata catalog. -package registry - -// GetClaudeModels returns the standard Claude model definitions -func GetClaudeModels() []*ModelInfo { - return []*ModelInfo{ - - { - ID: "claude-haiku-4-5-20251001", - Object: "model", - Created: 1759276800, // 2025-10-01 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4.5 Haiku", - ContextLength: 200000, - MaxCompletionTokens: 64000, - Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, - }, - { - ID: "claude-sonnet-4-5-20250929", - Object: "model", - Created: 1759104000, // 2025-09-29 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4.5 Sonnet", - ContextLength: 200000, - MaxCompletionTokens: 64000, - Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, - }, - { - ID: "claude-sonnet-4-6", - Object: "model", - Created: 1771372800, // 2026-02-17 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4.6 Sonnet", - ContextLength: 200000, - MaxCompletionTokens: 64000, - Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false, Levels: []string{"low", "medium", "high"}}, - }, - { - ID: "claude-opus-4-6", - Object: "model", - Created: 1770318000, // 2026-02-05 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4.6 Opus", - Description: "Premium model combining maximum intelligence with practical performance", - ContextLength: 1000000, - MaxCompletionTokens: 128000, - Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false, Levels: []string{"low", "medium", "high", "max"}}, - }, - { - ID: "claude-sonnet-4-6", - Object: "model", - Created: 1771286400, // 2026-02-17 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4.6 Sonnet", - Description: "Best combination of speed and intelligence", - ContextLength: 200000, - MaxCompletionTokens: 64000, - Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, - }, - { - ID: "claude-opus-4-5-20251101", - Object: "model", - Created: 1761955200, // 2025-11-01 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4.5 Opus", - Description: "Premium model combining maximum intelligence with practical performance", - ContextLength: 200000, - MaxCompletionTokens: 64000, - Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, - }, - { - ID: "claude-opus-4-1-20250805", - Object: "model", - Created: 1722945600, // 2025-08-05 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4.1 Opus", - ContextLength: 200000, - MaxCompletionTokens: 32000, - Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false}, - }, - { - ID: "claude-opus-4-20250514", - Object: "model", - Created: 1715644800, // 2025-05-14 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4 Opus", - ContextLength: 200000, - MaxCompletionTokens: 32000, - Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false}, - }, - { - ID: "claude-sonnet-4-20250514", - Object: "model", - Created: 1715644800, // 2025-05-14 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 4 Sonnet", - ContextLength: 200000, - MaxCompletionTokens: 64000, - Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false}, - }, - { - ID: "claude-3-7-sonnet-20250219", - Object: "model", - Created: 1708300800, // 2025-02-19 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 3.7 Sonnet", - ContextLength: 128000, - MaxCompletionTokens: 8192, - Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false}, - }, - { - ID: "claude-3-5-haiku-20241022", - Object: "model", - Created: 1729555200, // 2024-10-22 - OwnedBy: "anthropic", - Type: "claude", - DisplayName: "Claude 3.5 Haiku", - ContextLength: 128000, - MaxCompletionTokens: 8192, - // Thinking: not supported for Haiku models - }, - } -} - -// GetGeminiModels returns the standard Gemini model definitions -func GetGeminiModels() []*ModelInfo { - return []*ModelInfo{ - { - ID: "gemini-2.5-pro", - Object: "model", - Created: 1750118400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-2.5-pro", - Version: "2.5", - DisplayName: "Gemini 2.5 Pro", - Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, - }, - { - ID: "gemini-2.5-flash", - Object: "model", - Created: 1750118400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-2.5-flash", - Version: "001", - DisplayName: "Gemini 2.5 Flash", - Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, - }, - { - ID: "gemini-2.5-flash-lite", - Object: "model", - Created: 1753142400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-2.5-flash-lite", - Version: "2.5", - DisplayName: "Gemini 2.5 Flash Lite", - Description: "Our smallest and most cost effective model, built for at scale usage.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, - }, - { - ID: "gemini-3-pro-preview", - Object: "model", - Created: 1737158400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3-pro-preview", - Version: "3.0", - DisplayName: "Gemini 3 Pro Preview", - Description: "Gemini 3 Pro Preview", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, - }, - { - ID: "gemini-3.1-pro-preview", - Object: "model", - Created: 1771459200, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3.1-pro-preview", - Version: "3.1", - DisplayName: "Gemini 3.1 Pro Preview", - Description: "Gemini 3.1 Pro Preview", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, - }, - { - ID: "gemini-3.1-flash-image-preview", - Object: "model", - Created: 1771459200, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3.1-flash-image-preview", - Version: "3.1", - DisplayName: "Gemini 3.1 Flash Image Preview", - Description: "Gemini 3.1 Flash Image Preview", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}, - }, - { - ID: "gemini-3-flash-preview", - Object: "model", - Created: 1765929600, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3-flash-preview", - Version: "3.0", - DisplayName: "Gemini 3 Flash Preview", - Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, - }, - { - ID: "gemini-3.1-flash-lite-preview", - Object: "model", - Created: 1776288000, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3.1-flash-lite-preview", - Version: "3.1", - DisplayName: "Gemini 3.1 Flash Lite Preview", - Description: "Our smallest and most cost effective model, built for at scale usage.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}, - }, - { - ID: "gemini-3-pro-image-preview", - Object: "model", - Created: 1737158400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3-pro-image-preview", - Version: "3.0", - DisplayName: "Gemini 3 Pro Image Preview", - Description: "Gemini 3 Pro Image Preview", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, - }, - } -} - -func GetGeminiVertexModels() []*ModelInfo { - return []*ModelInfo{ - { - ID: "gemini-2.5-pro", - Object: "model", - Created: 1750118400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-2.5-pro", - Version: "2.5", - DisplayName: "Gemini 2.5 Pro", - Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, - }, - { - ID: "gemini-2.5-flash", - Object: "model", - Created: 1750118400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-2.5-flash", - Version: "001", - DisplayName: "Gemini 2.5 Flash", - Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, - }, - { - ID: "gemini-2.5-flash-lite", - Object: "model", - Created: 1753142400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-2.5-flash-lite", - Version: "2.5", - DisplayName: "Gemini 2.5 Flash Lite", - Description: "Our smallest and most cost effective model, built for at scale usage.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, - }, - { - ID: "gemini-3-pro-preview", - Object: "model", - Created: 1737158400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3-pro-preview", - Version: "3.0", - DisplayName: "Gemini 3 Pro Preview", - Description: "Gemini 3 Pro Preview", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, - }, - { - ID: "gemini-3-flash-preview", - Object: "model", - Created: 1765929600, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3-flash-preview", - Version: "3.0", - DisplayName: "Gemini 3 Flash Preview", - Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, - }, - { - ID: "gemini-3.1-pro-preview", - Object: "model", - Created: 1771459200, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3.1-pro-preview", - Version: "3.1", - DisplayName: "Gemini 3.1 Pro Preview", - Description: "Gemini 3.1 Pro Preview", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, - }, - { - ID: "gemini-3.1-flash-image-preview", - Object: "model", - Created: 1771459200, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3.1-flash-image-preview", - Version: "3.1", - DisplayName: "Gemini 3.1 Flash Image Preview", - Description: "Gemini 3.1 Flash Image Preview", - }, - { - ID: "gemini-3.1-flash-lite-preview", - Object: "model", - Created: 1776288000, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3.1-flash-lite-preview", - Version: "3.1", - DisplayName: "Gemini 3.1 Flash Lite Preview", - Description: "Our smallest and most cost effective model, built for at scale usage.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}, - }, - { - ID: "gemini-3-pro-image-preview", - Object: "model", - Created: 1737158400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3-pro-image-preview", - Version: "3.0", - DisplayName: "Gemini 3 Pro Image Preview", - Description: "Gemini 3 Pro Image Preview", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, - }, - // Imagen image generation models - use :predict action - { - ID: "imagen-4.0-generate-001", - Object: "model", - Created: 1750000000, - OwnedBy: "google", - Type: "gemini", - Name: "models/imagen-4.0-generate-001", - Version: "4.0", - DisplayName: "Imagen 4.0 Generate", - Description: "Imagen 4.0 image generation model", - SupportedGenerationMethods: []string{"predict"}, - }, - { - ID: "imagen-4.0-ultra-generate-001", - Object: "model", - Created: 1750000000, - OwnedBy: "google", - Type: "gemini", - Name: "models/imagen-4.0-ultra-generate-001", - Version: "4.0", - DisplayName: "Imagen 4.0 Ultra Generate", - Description: "Imagen 4.0 Ultra high-quality image generation model", - SupportedGenerationMethods: []string{"predict"}, - }, - { - ID: "imagen-3.0-generate-002", - Object: "model", - Created: 1740000000, - OwnedBy: "google", - Type: "gemini", - Name: "models/imagen-3.0-generate-002", - Version: "3.0", - DisplayName: "Imagen 3.0 Generate", - Description: "Imagen 3.0 image generation model", - SupportedGenerationMethods: []string{"predict"}, - }, - { - ID: "imagen-3.0-fast-generate-001", - Object: "model", - Created: 1740000000, - OwnedBy: "google", - Type: "gemini", - Name: "models/imagen-3.0-fast-generate-001", - Version: "3.0", - DisplayName: "Imagen 3.0 Fast Generate", - Description: "Imagen 3.0 fast image generation model", - SupportedGenerationMethods: []string{"predict"}, - }, - { - ID: "imagen-4.0-fast-generate-001", - Object: "model", - Created: 1750000000, - OwnedBy: "google", - Type: "gemini", - Name: "models/imagen-4.0-fast-generate-001", - Version: "4.0", - DisplayName: "Imagen 4.0 Fast Generate", - Description: "Imagen 4.0 fast image generation model", - SupportedGenerationMethods: []string{"predict"}, - }, - } -} - -// GetGeminiCLIModels returns the standard Gemini model definitions -func GetGeminiCLIModels() []*ModelInfo { - return []*ModelInfo{ - { - ID: "gemini-2.5-pro", - Object: "model", - Created: 1750118400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-2.5-pro", - Version: "2.5", - DisplayName: "Gemini 2.5 Pro", - Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, - }, - { - ID: "gemini-2.5-flash", - Object: "model", - Created: 1750118400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-2.5-flash", - Version: "001", - DisplayName: "Gemini 2.5 Flash", - Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, - }, - { - ID: "gemini-2.5-flash-lite", - Object: "model", - Created: 1753142400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-2.5-flash-lite", - Version: "2.5", - DisplayName: "Gemini 2.5 Flash Lite", - Description: "Our smallest and most cost effective model, built for at scale usage.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, - }, - { - ID: "gemini-3-pro-preview", - Object: "model", - Created: 1737158400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3-pro-preview", - Version: "3.0", - DisplayName: "Gemini 3 Pro Preview", - Description: "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, - }, - { - ID: "gemini-3.1-pro-preview", - Object: "model", - Created: 1771459200, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3.1-pro-preview", - Version: "3.1", - DisplayName: "Gemini 3.1 Pro Preview", - Description: "Gemini 3.1 Pro Preview", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, - }, - { - ID: "gemini-3-flash-preview", - Object: "model", - Created: 1765929600, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3-flash-preview", - Version: "3.0", - DisplayName: "Gemini 3 Flash Preview", - Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, - }, - { - ID: "gemini-3.1-flash-lite-preview", - Object: "model", - Created: 1776288000, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3.1-flash-lite-preview", - Version: "3.1", - DisplayName: "Gemini 3.1 Flash Lite Preview", - Description: "Our smallest and most cost effective model, built for at scale usage.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}, - }, - } -} - -// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations -func GetAIStudioModels() []*ModelInfo { - return []*ModelInfo{ - { - ID: "gemini-2.5-pro", - Object: "model", - Created: 1750118400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-2.5-pro", - Version: "2.5", - DisplayName: "Gemini 2.5 Pro", - Description: "Stable release (June 17th, 2025) of Gemini 2.5 Pro", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, - }, - { - ID: "gemini-2.5-flash", - Object: "model", - Created: 1750118400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-2.5-flash", - Version: "001", - DisplayName: "Gemini 2.5 Flash", - Description: "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, - }, - { - ID: "gemini-2.5-flash-lite", - Object: "model", - Created: 1753142400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-2.5-flash-lite", - Version: "2.5", - DisplayName: "Gemini 2.5 Flash Lite", - Description: "Our smallest and most cost effective model, built for at scale usage.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, - }, - { - ID: "gemini-3-pro-preview", - Object: "model", - Created: 1737158400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3-pro-preview", - Version: "3.0", - DisplayName: "Gemini 3 Pro Preview", - Description: "Gemini 3 Pro Preview", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, - }, - { - ID: "gemini-3.1-pro-preview", - Object: "model", - Created: 1771459200, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3.1-pro-preview", - Version: "3.1", - DisplayName: "Gemini 3.1 Pro Preview", - Description: "Gemini 3.1 Pro Preview", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, - }, - { - ID: "gemini-3-flash-preview", - Object: "model", - Created: 1765929600, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3-flash-preview", - Version: "3.0", - DisplayName: "Gemini 3 Flash Preview", - Description: "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, - }, - { - ID: "gemini-3.1-flash-lite-preview", - Object: "model", - Created: 1776288000, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-3.1-flash-lite-preview", - Version: "3.1", - DisplayName: "Gemini 3.1 Flash Lite Preview", - Description: "Our smallest and most cost effective model, built for at scale usage.", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}, - }, - { - ID: "gemini-pro-latest", - Object: "model", - Created: 1750118400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-pro-latest", - Version: "2.5", - DisplayName: "Gemini Pro Latest", - Description: "Latest release of Gemini Pro", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, - }, - { - ID: "gemini-flash-latest", - Object: "model", - Created: 1750118400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-flash-latest", - Version: "2.5", - DisplayName: "Gemini Flash Latest", - Description: "Latest release of Gemini Flash", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, - }, - { - ID: "gemini-flash-lite-latest", - Object: "model", - Created: 1753142400, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-flash-lite-latest", - Version: "2.5", - DisplayName: "Gemini Flash-Lite Latest", - Description: "Latest release of Gemini Flash-Lite", - InputTokenLimit: 1048576, - OutputTokenLimit: 65536, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, - }, - // { - // ID: "gemini-2.5-flash-image-preview", - // Object: "model", - // Created: 1756166400, - // OwnedBy: "google", - // Type: "gemini", - // Name: "models/gemini-2.5-flash-image-preview", - // Version: "2.5", - // DisplayName: "Gemini 2.5 Flash Image Preview", - // Description: "State-of-the-art image generation and editing model.", - // InputTokenLimit: 1048576, - // OutputTokenLimit: 8192, - // SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - // // image models don't support thinkingConfig; leave Thinking nil - // }, - { - ID: "gemini-2.5-flash-image", - Object: "model", - Created: 1759363200, - OwnedBy: "google", - Type: "gemini", - Name: "models/gemini-2.5-flash-image", - Version: "2.5", - DisplayName: "Gemini 2.5 Flash Image", - Description: "State-of-the-art image generation and editing model.", - InputTokenLimit: 1048576, - OutputTokenLimit: 8192, - SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - // image models don't support thinkingConfig; leave Thinking nil - }, - } -} - -// GetOpenAIModels returns the standard OpenAI model definitions -func GetOpenAIModels() []*ModelInfo { - return []*ModelInfo{ - { - ID: "gpt-5", - Object: "model", - Created: 1754524800, - OwnedBy: "openai", - Type: "openai", - Version: "gpt-5-2025-08-07", - DisplayName: "GPT 5", - Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", - ContextLength: 400000, - MaxCompletionTokens: 128000, - SupportedParameters: []string{"tools"}, - Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}}, - }, - { - ID: "gpt-5-codex", - Object: "model", - Created: 1757894400, - OwnedBy: "openai", - Type: "openai", - Version: "gpt-5-2025-09-15", - DisplayName: "GPT 5 Codex", - Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.", - ContextLength: 400000, - MaxCompletionTokens: 128000, - SupportedParameters: []string{"tools"}, - Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, - }, - { - ID: "gpt-5-codex-mini", - Object: "model", - Created: 1762473600, - OwnedBy: "openai", - Type: "openai", - Version: "gpt-5-2025-11-07", - DisplayName: "GPT 5 Codex Mini", - Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.", - ContextLength: 400000, - MaxCompletionTokens: 128000, - SupportedParameters: []string{"tools"}, - Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, - }, - { - ID: "gpt-5.1", - Object: "model", - Created: 1762905600, - OwnedBy: "openai", - Type: "openai", - Version: "gpt-5.1-2025-11-12", - DisplayName: "GPT 5", - Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", - ContextLength: 400000, - MaxCompletionTokens: 128000, - SupportedParameters: []string{"tools"}, - Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}}, - }, - { - ID: "gpt-5.1-codex", - Object: "model", - Created: 1762905600, - OwnedBy: "openai", - Type: "openai", - Version: "gpt-5.1-2025-11-12", - DisplayName: "GPT 5.1 Codex", - Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.", - ContextLength: 400000, - MaxCompletionTokens: 128000, - SupportedParameters: []string{"tools"}, - Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, - }, - { - ID: "gpt-5.1-codex-mini", - Object: "model", - Created: 1762905600, - OwnedBy: "openai", - Type: "openai", - Version: "gpt-5.1-2025-11-12", - DisplayName: "GPT 5.1 Codex Mini", - Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.", - ContextLength: 400000, - MaxCompletionTokens: 128000, - SupportedParameters: []string{"tools"}, - Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, - }, - { - ID: "gpt-5.1-codex-max", - Object: "model", - Created: 1763424000, - OwnedBy: "openai", - Type: "openai", - Version: "gpt-5.1-max", - DisplayName: "GPT 5.1 Codex Max", - Description: "Stable version of GPT 5.1 Codex Max", - ContextLength: 400000, - MaxCompletionTokens: 128000, - SupportedParameters: []string{"tools"}, - Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}}, - }, - { - ID: "gpt-5.2", - Object: "model", - Created: 1765440000, - OwnedBy: "openai", - Type: "openai", - Version: "gpt-5.2", - DisplayName: "GPT 5.2", - Description: "Stable version of GPT 5.2", - ContextLength: 400000, - MaxCompletionTokens: 128000, - SupportedParameters: []string{"tools"}, - Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}}, - }, - { - ID: "gpt-5.2-codex", - Object: "model", - Created: 1765440000, - OwnedBy: "openai", - Type: "openai", - Version: "gpt-5.2", - DisplayName: "GPT 5.2 Codex", - Description: "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.", - ContextLength: 400000, - MaxCompletionTokens: 128000, - SupportedParameters: []string{"tools"}, - Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}}, - }, - { - ID: "gpt-5.3-codex", - Object: "model", - Created: 1770307200, - OwnedBy: "openai", - Type: "openai", - Version: "gpt-5.3", - DisplayName: "GPT 5.3 Codex", - Description: "Stable version of GPT 5.3 Codex, The best model for coding and agentic tasks across domains.", - ContextLength: 400000, - MaxCompletionTokens: 128000, - SupportedParameters: []string{"tools"}, - Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}}, - }, - { - ID: "gpt-5.3-codex-spark", - Object: "model", - Created: 1770912000, - OwnedBy: "openai", - Type: "openai", - Version: "gpt-5.3", - DisplayName: "GPT 5.3 Codex Spark", - Description: "Ultra-fast coding model.", - ContextLength: 128000, - MaxCompletionTokens: 128000, - SupportedParameters: []string{"tools"}, - Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}}, - }, - { - ID: "gpt-5.4", - Object: "model", - Created: 1772668800, - OwnedBy: "openai", - Type: "openai", - Version: "gpt-5.4", - DisplayName: "GPT 5.4", - Description: "Stable version of GPT 5.4", - ContextLength: 1_050_000, - MaxCompletionTokens: 128000, - SupportedParameters: []string{"tools"}, - Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}}, - }, - } -} - -// GetQwenModels returns the standard Qwen model definitions -func GetQwenModels() []*ModelInfo { - return []*ModelInfo{ - { - ID: "qwen3-coder-plus", - Object: "model", - Created: 1753228800, - OwnedBy: "qwen", - Type: "qwen", - Version: "3.0", - DisplayName: "Qwen3 Coder Plus", - Description: "Advanced code generation and understanding model", - ContextLength: 32768, - MaxCompletionTokens: 8192, - SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"}, - }, - { - ID: "qwen3-coder-flash", - Object: "model", - Created: 1753228800, - OwnedBy: "qwen", - Type: "qwen", - Version: "3.0", - DisplayName: "Qwen3 Coder Flash", - Description: "Fast code generation model", - ContextLength: 8192, - MaxCompletionTokens: 2048, - SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"}, - }, - { - ID: "coder-model", - Object: "model", - Created: 1771171200, - OwnedBy: "qwen", - Type: "qwen", - Version: "3.5", - DisplayName: "Qwen 3.5 Plus", - Description: "efficient hybrid model with leading coding performance", - ContextLength: 1048576, - MaxCompletionTokens: 65536, - SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"}, - }, - { - ID: "vision-model", - Object: "model", - Created: 1758672000, - OwnedBy: "qwen", - Type: "qwen", - Version: "3.0", - DisplayName: "Qwen3 Vision Model", - Description: "Vision model model", - ContextLength: 32768, - MaxCompletionTokens: 2048, - SupportedParameters: []string{"temperature", "top_p", "max_tokens", "stream", "stop"}, - }, - } -} - -// iFlowThinkingSupport is a shared ThinkingSupport configuration for iFlow models -// that support thinking mode via chat_template_kwargs.enable_thinking (boolean toggle). -// Uses level-based configuration so standard normalization flows apply before conversion. -var iFlowThinkingSupport = &ThinkingSupport{ - Levels: []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}, -} - -// GetIFlowModels returns supported models for iFlow OAuth accounts. -func GetIFlowModels() []*ModelInfo { - entries := []struct { - ID string - DisplayName string - Description string - Created int64 - Thinking *ThinkingSupport - }{ - {ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800}, - {ID: "qwen3-max", DisplayName: "Qwen3-Max", Description: "Qwen3 flagship model", Created: 1758672000}, - {ID: "qwen3-vl-plus", DisplayName: "Qwen3-VL-Plus", Description: "Qwen3 multimodal vision-language", Created: 1758672000}, - {ID: "qwen3-max-preview", DisplayName: "Qwen3-Max-Preview", Description: "Qwen3 Max preview build", Created: 1757030400, Thinking: iFlowThinkingSupport}, - {ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400, Thinking: iFlowThinkingSupport}, - {ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000}, - {ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000, Thinking: iFlowThinkingSupport}, - {ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200, Thinking: iFlowThinkingSupport}, - {ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200}, - {ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200}, - {ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400}, - {ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600}, - {ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600}, - {ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600}, - {ID: "iflow-rome-30ba3b", DisplayName: "iFlow-ROME", Description: "iFlow Rome 30BA3B model", Created: 1736899200}, - } - models := make([]*ModelInfo, 0, len(entries)) - for _, entry := range entries { - models = append(models, &ModelInfo{ - ID: entry.ID, - Object: "model", - Created: entry.Created, - OwnedBy: "iflow", - Type: "iflow", - DisplayName: entry.DisplayName, - Description: entry.Description, - Thinking: entry.Thinking, - }) - } - return models -} - -// AntigravityModelConfig captures static antigravity model overrides, including -// Thinking budget limits and provider max completion tokens. -type AntigravityModelConfig struct { - Thinking *ThinkingSupport - MaxCompletionTokens int -} - -// GetAntigravityModelConfig returns static configuration for antigravity models. -// Keys use upstream model names returned by the Antigravity models endpoint. -func GetAntigravityModelConfig() map[string]*AntigravityModelConfig { - return map[string]*AntigravityModelConfig{ - "gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}}, - "gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}}, - "gemini-3-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}}, - "gemini-3-pro-low": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}}, - "gemini-3.1-pro-high": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}}, - "gemini-3.1-pro-low": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}}, - "gemini-3.1-flash-image": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}}, - "gemini-3.1-flash-lite-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "high"}}}, - "gemini-3-flash": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}}, - "claude-opus-4-6-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, - "claude-sonnet-4-6": {Thinking: &ThinkingSupport{Min: 1024, Max: 64000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, - "gpt-oss-120b-medium": {}, - } -} - -// GetKimiModels returns the standard Kimi (Moonshot AI) model definitions -func GetKimiModels() []*ModelInfo { - return []*ModelInfo{ - { - ID: "kimi-k2", - Object: "model", - Created: 1752192000, // 2025-07-11 - OwnedBy: "moonshot", - Type: "kimi", - DisplayName: "Kimi K2", - Description: "Kimi K2 - Moonshot AI's flagship coding model", - ContextLength: 131072, - MaxCompletionTokens: 32768, - }, - { - ID: "kimi-k2-thinking", - Object: "model", - Created: 1762387200, // 2025-11-06 - OwnedBy: "moonshot", - Type: "kimi", - DisplayName: "Kimi K2 Thinking", - Description: "Kimi K2 Thinking - Extended reasoning model", - ContextLength: 131072, - MaxCompletionTokens: 32768, - Thinking: &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true}, - }, - { - ID: "kimi-k2.5", - Object: "model", - Created: 1769472000, // 2026-01-26 - OwnedBy: "moonshot", - Type: "kimi", - DisplayName: "Kimi K2.5", - Description: "Kimi K2.5 - Latest Moonshot AI coding model with improved capabilities", - ContextLength: 131072, - MaxCompletionTokens: 32768, - Thinking: &ThinkingSupport{Min: 1024, Max: 32000, ZeroAllowed: true, DynamicAllowed: true}, - }, - } -} diff --git a/internal/registry/model_updater.go b/internal/registry/model_updater.go new file mode 100644 index 00000000..84c9d6aa --- /dev/null +++ b/internal/registry/model_updater.go @@ -0,0 +1,198 @@ +package registry + +import ( + "context" + _ "embed" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "sync" + "time" + + log "github.com/sirupsen/logrus" +) + +const ( + modelsFetchTimeout = 30 * time.Second +) + +var modelsURLs = []string{ + "https://raw.githubusercontent.com/router-for-me/models/refs/heads/main/models.json", + "https://models.router-for.me/models.json", +} + +//go:embed models/models.json +var embeddedModelsJSON []byte + +type modelStore struct { + mu sync.RWMutex + data *staticModelsJSON +} + +var modelsCatalogStore = &modelStore{} + +var updaterOnce sync.Once + +func init() { + // Load embedded data as fallback on startup. + if err := loadModelsFromBytes(embeddedModelsJSON, "embed"); err != nil { + panic(fmt.Sprintf("registry: failed to parse embedded models.json: %v", err)) + } +} + +// StartModelsUpdater runs a one-time models refresh on startup. +// It blocks until the startup fetch attempt finishes so service initialization +// can wait for the refreshed catalog before registering auth-backed models. +// Safe to call multiple times; only one refresh will run. +func StartModelsUpdater(ctx context.Context) { + updaterOnce.Do(func() { + runModelsUpdater(ctx) + }) +} + +func runModelsUpdater(ctx context.Context) { + // Try network fetch once on startup, then stop. + // Periodic refresh is disabled - models are only refreshed at startup. + tryRefreshModels(ctx) +} + +func tryRefreshModels(ctx context.Context) { + client := &http.Client{Timeout: modelsFetchTimeout} + for _, url := range modelsURLs { + reqCtx, cancel := context.WithTimeout(ctx, modelsFetchTimeout) + req, err := http.NewRequestWithContext(reqCtx, "GET", url, nil) + if err != nil { + cancel() + log.Debugf("models fetch request creation failed for %s: %v", url, err) + continue + } + + resp, err := client.Do(req) + if err != nil { + cancel() + log.Debugf("models fetch failed from %s: %v", url, err) + continue + } + + if resp.StatusCode != 200 { + resp.Body.Close() + cancel() + log.Debugf("models fetch returned %d from %s", resp.StatusCode, url) + continue + } + + data, err := io.ReadAll(resp.Body) + resp.Body.Close() + cancel() + + if err != nil { + log.Debugf("models fetch read error from %s: %v", url, err) + continue + } + + if err := loadModelsFromBytes(data, url); err != nil { + log.Warnf("models parse failed from %s: %v", url, err) + continue + } + + log.Infof("models updated from %s", url) + return + } + log.Warn("models refresh failed from all URLs, using current data") +} + +func loadModelsFromBytes(data []byte, source string) error { + var parsed staticModelsJSON + if err := json.Unmarshal(data, &parsed); err != nil { + return fmt.Errorf("%s: decode models catalog: %w", source, err) + } + if err := validateModelsCatalog(&parsed); err != nil { + return fmt.Errorf("%s: validate models catalog: %w", source, err) + } + + modelsCatalogStore.mu.Lock() + modelsCatalogStore.data = &parsed + modelsCatalogStore.mu.Unlock() + return nil +} + +func getModels() *staticModelsJSON { + modelsCatalogStore.mu.RLock() + defer modelsCatalogStore.mu.RUnlock() + return modelsCatalogStore.data +} + +func validateModelsCatalog(data *staticModelsJSON) error { + if data == nil { + return fmt.Errorf("catalog is nil") + } + + requiredSections := []struct { + name string + models []*ModelInfo + }{ + {name: "claude", models: data.Claude}, + {name: "gemini", models: data.Gemini}, + {name: "vertex", models: data.Vertex}, + {name: "gemini-cli", models: data.GeminiCLI}, + {name: "aistudio", models: data.AIStudio}, + {name: "codex-free", models: data.CodexFree}, + {name: "codex-team", models: data.CodexTeam}, + {name: "codex-plus", models: data.CodexPlus}, + {name: "codex-pro", models: data.CodexPro}, + {name: "qwen", models: data.Qwen}, + {name: "iflow", models: data.IFlow}, + {name: "kimi", models: data.Kimi}, + } + + for _, section := range requiredSections { + if err := validateModelSection(section.name, section.models); err != nil { + return err + } + } + if err := validateAntigravitySection(data.Antigravity); err != nil { + return err + } + return nil +} + +func validateModelSection(section string, models []*ModelInfo) error { + if len(models) == 0 { + return fmt.Errorf("%s section is empty", section) + } + + seen := make(map[string]struct{}, len(models)) + for i, model := range models { + if model == nil { + return fmt.Errorf("%s[%d] is null", section, i) + } + modelID := strings.TrimSpace(model.ID) + if modelID == "" { + return fmt.Errorf("%s[%d] has empty id", section, i) + } + if _, exists := seen[modelID]; exists { + return fmt.Errorf("%s contains duplicate model id %q", section, modelID) + } + seen[modelID] = struct{}{} + } + return nil +} + +func validateAntigravitySection(configs map[string]*AntigravityModelConfig) error { + if len(configs) == 0 { + return fmt.Errorf("antigravity section is empty") + } + + for modelID, cfg := range configs { + trimmedID := strings.TrimSpace(modelID) + if trimmedID == "" { + return fmt.Errorf("antigravity contains empty model id") + } + if cfg == nil { + return fmt.Errorf("antigravity[%q] is null", trimmedID) + } + } + return nil +} diff --git a/internal/registry/models/models.json b/internal/registry/models/models.json new file mode 100644 index 00000000..5f919f9f --- /dev/null +++ b/internal/registry/models/models.json @@ -0,0 +1,2598 @@ +{ + "claude": [ + { + "id": "claude-haiku-4-5-20251001", + "object": "model", + "created": 1759276800, + "owned_by": "anthropic", + "type": "claude", + "display_name": "Claude 4.5 Haiku", + "context_length": 200000, + "max_completion_tokens": 64000, + "thinking": { + "min": 1024, + "max": 128000, + "zero_allowed": true + } + }, + { + "id": "claude-sonnet-4-5-20250929", + "object": "model", + "created": 1759104000, + "owned_by": "anthropic", + "type": "claude", + "display_name": "Claude 4.5 Sonnet", + "context_length": 200000, + "max_completion_tokens": 64000, + "thinking": { + "min": 1024, + "max": 128000, + "zero_allowed": true + } + }, + { + "id": "claude-sonnet-4-6", + "object": "model", + "created": 1771372800, + "owned_by": "anthropic", + "type": "claude", + "display_name": "Claude 4.6 Sonnet", + "context_length": 200000, + "max_completion_tokens": 64000, + "thinking": { + "min": 1024, + "max": 128000, + "zero_allowed": true, + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "claude-opus-4-6", + "object": "model", + "created": 1770318000, + "owned_by": "anthropic", + "type": "claude", + "display_name": "Claude 4.6 Opus", + "description": "Premium model combining maximum intelligence with practical performance", + "context_length": 1000000, + "max_completion_tokens": 128000, + "thinking": { + "min": 1024, + "max": 128000, + "zero_allowed": true, + "levels": [ + "low", + "medium", + "high", + "max" + ] + } + }, + { + "id": "claude-opus-4-5-20251101", + "object": "model", + "created": 1761955200, + "owned_by": "anthropic", + "type": "claude", + "display_name": "Claude 4.5 Opus", + "description": "Premium model combining maximum intelligence with practical performance", + "context_length": 200000, + "max_completion_tokens": 64000, + "thinking": { + "min": 1024, + "max": 128000, + "zero_allowed": true + } + }, + { + "id": "claude-opus-4-1-20250805", + "object": "model", + "created": 1722945600, + "owned_by": "anthropic", + "type": "claude", + "display_name": "Claude 4.1 Opus", + "context_length": 200000, + "max_completion_tokens": 32000, + "thinking": { + "min": 1024, + "max": 128000 + } + }, + { + "id": "claude-opus-4-20250514", + "object": "model", + "created": 1715644800, + "owned_by": "anthropic", + "type": "claude", + "display_name": "Claude 4 Opus", + "context_length": 200000, + "max_completion_tokens": 32000, + "thinking": { + "min": 1024, + "max": 128000 + } + }, + { + "id": "claude-sonnet-4-20250514", + "object": "model", + "created": 1715644800, + "owned_by": "anthropic", + "type": "claude", + "display_name": "Claude 4 Sonnet", + "context_length": 200000, + "max_completion_tokens": 64000, + "thinking": { + "min": 1024, + "max": 128000 + } + }, + { + "id": "claude-3-7-sonnet-20250219", + "object": "model", + "created": 1708300800, + "owned_by": "anthropic", + "type": "claude", + "display_name": "Claude 3.7 Sonnet", + "context_length": 128000, + "max_completion_tokens": 8192, + "thinking": { + "min": 1024, + "max": 128000 + } + }, + { + "id": "claude-3-5-haiku-20241022", + "object": "model", + "created": 1729555200, + "owned_by": "anthropic", + "type": "claude", + "display_name": "Claude 3.5 Haiku", + "context_length": 128000, + "max_completion_tokens": 8192 + } + ], + "gemini": [ + { + "id": "gemini-2.5-pro", + "object": "model", + "created": 1750118400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 2.5 Pro", + "name": "models/gemini-2.5-pro", + "version": "2.5", + "description": "Stable release (June 17th, 2025) of Gemini 2.5 Pro", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true + } + }, + { + "id": "gemini-2.5-flash", + "object": "model", + "created": 1750118400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 2.5 Flash", + "name": "models/gemini-2.5-flash", + "version": "001", + "description": "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "max": 24576, + "zero_allowed": true, + "dynamic_allowed": true + } + }, + { + "id": "gemini-2.5-flash-lite", + "object": "model", + "created": 1753142400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 2.5 Flash Lite", + "name": "models/gemini-2.5-flash-lite", + "version": "2.5", + "description": "Our smallest and most cost effective model, built for at scale usage.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "max": 24576, + "zero_allowed": true, + "dynamic_allowed": true + } + }, + { + "id": "gemini-3-pro-preview", + "object": "model", + "created": 1737158400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3 Pro Preview", + "name": "models/gemini-3-pro-preview", + "version": "3.0", + "description": "Gemini 3 Pro Preview", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "low", + "high" + ] + } + }, + { + "id": "gemini-3.1-pro-preview", + "object": "model", + "created": 1771459200, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3.1 Pro Preview", + "name": "models/gemini-3.1-pro-preview", + "version": "3.1", + "description": "Gemini 3.1 Pro Preview", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "low", + "high" + ] + } + }, + { + "id": "gemini-3.1-flash-image-preview", + "object": "model", + "created": 1771459200, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3.1 Flash Image Preview", + "name": "models/gemini-3.1-flash-image-preview", + "version": "3.1", + "description": "Gemini 3.1 Flash Image Preview", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "minimal", + "high" + ] + } + }, + { + "id": "gemini-3-flash-preview", + "object": "model", + "created": 1765929600, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3 Flash Preview", + "name": "models/gemini-3-flash-preview", + "version": "3.0", + "description": "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "minimal", + "low", + "medium", + "high" + ] + } + }, + { + "id": "gemini-3.1-flash-lite-preview", + "object": "model", + "created": 1776288000, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3.1 Flash Lite Preview", + "name": "models/gemini-3.1-flash-lite-preview", + "version": "3.1", + "description": "Our smallest and most cost effective model, built for at scale usage.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "minimal", + "high" + ] + } + }, + { + "id": "gemini-3-pro-image-preview", + "object": "model", + "created": 1737158400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3 Pro Image Preview", + "name": "models/gemini-3-pro-image-preview", + "version": "3.0", + "description": "Gemini 3 Pro Image Preview", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "low", + "high" + ] + } + } + ], + "vertex": [ + { + "id": "gemini-2.5-pro", + "object": "model", + "created": 1750118400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 2.5 Pro", + "name": "models/gemini-2.5-pro", + "version": "2.5", + "description": "Stable release (June 17th, 2025) of Gemini 2.5 Pro", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true + } + }, + { + "id": "gemini-2.5-flash", + "object": "model", + "created": 1750118400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 2.5 Flash", + "name": "models/gemini-2.5-flash", + "version": "001", + "description": "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "max": 24576, + "zero_allowed": true, + "dynamic_allowed": true + } + }, + { + "id": "gemini-2.5-flash-lite", + "object": "model", + "created": 1753142400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 2.5 Flash Lite", + "name": "models/gemini-2.5-flash-lite", + "version": "2.5", + "description": "Our smallest and most cost effective model, built for at scale usage.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "max": 24576, + "zero_allowed": true, + "dynamic_allowed": true + } + }, + { + "id": "gemini-3-pro-preview", + "object": "model", + "created": 1737158400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3 Pro Preview", + "name": "models/gemini-3-pro-preview", + "version": "3.0", + "description": "Gemini 3 Pro Preview", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "low", + "high" + ] + } + }, + { + "id": "gemini-3-flash-preview", + "object": "model", + "created": 1765929600, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3 Flash Preview", + "name": "models/gemini-3-flash-preview", + "version": "3.0", + "description": "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "minimal", + "low", + "medium", + "high" + ] + } + }, + { + "id": "gemini-3.1-pro-preview", + "object": "model", + "created": 1771459200, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3.1 Pro Preview", + "name": "models/gemini-3.1-pro-preview", + "version": "3.1", + "description": "Gemini 3.1 Pro Preview", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "low", + "high" + ] + } + }, + { + "id": "gemini-3.1-flash-image-preview", + "object": "model", + "created": 1771459200, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3.1 Flash Image Preview", + "name": "models/gemini-3.1-flash-image-preview", + "version": "3.1", + "description": "Gemini 3.1 Flash Image Preview", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "minimal", + "high" + ] + } + }, + { + "id": "gemini-3.1-flash-lite-preview", + "object": "model", + "created": 1776288000, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3.1 Flash Lite Preview", + "name": "models/gemini-3.1-flash-lite-preview", + "version": "3.1", + "description": "Our smallest and most cost effective model, built for at scale usage.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "minimal", + "high" + ] + } + }, + { + "id": "gemini-3-pro-image-preview", + "object": "model", + "created": 1737158400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3 Pro Image Preview", + "name": "models/gemini-3-pro-image-preview", + "version": "3.0", + "description": "Gemini 3 Pro Image Preview", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "low", + "high" + ] + } + }, + { + "id": "imagen-4.0-generate-001", + "object": "model", + "created": 1750000000, + "owned_by": "google", + "type": "gemini", + "display_name": "Imagen 4.0 Generate", + "name": "models/imagen-4.0-generate-001", + "version": "4.0", + "description": "Imagen 4.0 image generation model", + "supportedGenerationMethods": [ + "predict" + ] + }, + { + "id": "imagen-4.0-ultra-generate-001", + "object": "model", + "created": 1750000000, + "owned_by": "google", + "type": "gemini", + "display_name": "Imagen 4.0 Ultra Generate", + "name": "models/imagen-4.0-ultra-generate-001", + "version": "4.0", + "description": "Imagen 4.0 Ultra high-quality image generation model", + "supportedGenerationMethods": [ + "predict" + ] + }, + { + "id": "imagen-3.0-generate-002", + "object": "model", + "created": 1740000000, + "owned_by": "google", + "type": "gemini", + "display_name": "Imagen 3.0 Generate", + "name": "models/imagen-3.0-generate-002", + "version": "3.0", + "description": "Imagen 3.0 image generation model", + "supportedGenerationMethods": [ + "predict" + ] + }, + { + "id": "imagen-3.0-fast-generate-001", + "object": "model", + "created": 1740000000, + "owned_by": "google", + "type": "gemini", + "display_name": "Imagen 3.0 Fast Generate", + "name": "models/imagen-3.0-fast-generate-001", + "version": "3.0", + "description": "Imagen 3.0 fast image generation model", + "supportedGenerationMethods": [ + "predict" + ] + }, + { + "id": "imagen-4.0-fast-generate-001", + "object": "model", + "created": 1750000000, + "owned_by": "google", + "type": "gemini", + "display_name": "Imagen 4.0 Fast Generate", + "name": "models/imagen-4.0-fast-generate-001", + "version": "4.0", + "description": "Imagen 4.0 fast image generation model", + "supportedGenerationMethods": [ + "predict" + ] + } + ], + "gemini-cli": [ + { + "id": "gemini-2.5-pro", + "object": "model", + "created": 1750118400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 2.5 Pro", + "name": "models/gemini-2.5-pro", + "version": "2.5", + "description": "Stable release (June 17th, 2025) of Gemini 2.5 Pro", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true + } + }, + { + "id": "gemini-2.5-flash", + "object": "model", + "created": 1750118400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 2.5 Flash", + "name": "models/gemini-2.5-flash", + "version": "001", + "description": "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "max": 24576, + "zero_allowed": true, + "dynamic_allowed": true + } + }, + { + "id": "gemini-2.5-flash-lite", + "object": "model", + "created": 1753142400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 2.5 Flash Lite", + "name": "models/gemini-2.5-flash-lite", + "version": "2.5", + "description": "Our smallest and most cost effective model, built for at scale usage.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "max": 24576, + "zero_allowed": true, + "dynamic_allowed": true + } + }, + { + "id": "gemini-3-pro-preview", + "object": "model", + "created": 1737158400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3 Pro Preview", + "name": "models/gemini-3-pro-preview", + "version": "3.0", + "description": "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "low", + "high" + ] + } + }, + { + "id": "gemini-3.1-pro-preview", + "object": "model", + "created": 1771459200, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3.1 Pro Preview", + "name": "models/gemini-3.1-pro-preview", + "version": "3.1", + "description": "Gemini 3.1 Pro Preview", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "low", + "high" + ] + } + }, + { + "id": "gemini-3-flash-preview", + "object": "model", + "created": 1765929600, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3 Flash Preview", + "name": "models/gemini-3-flash-preview", + "version": "3.0", + "description": "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "minimal", + "low", + "medium", + "high" + ] + } + }, + { + "id": "gemini-3.1-flash-lite-preview", + "object": "model", + "created": 1776288000, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3.1 Flash Lite Preview", + "name": "models/gemini-3.1-flash-lite-preview", + "version": "3.1", + "description": "Our smallest and most cost effective model, built for at scale usage.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "minimal", + "high" + ] + } + } + ], + "aistudio": [ + { + "id": "gemini-2.5-pro", + "object": "model", + "created": 1750118400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 2.5 Pro", + "name": "models/gemini-2.5-pro", + "version": "2.5", + "description": "Stable release (June 17th, 2025) of Gemini 2.5 Pro", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true + } + }, + { + "id": "gemini-2.5-flash", + "object": "model", + "created": 1750118400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 2.5 Flash", + "name": "models/gemini-2.5-flash", + "version": "001", + "description": "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "max": 24576, + "zero_allowed": true, + "dynamic_allowed": true + } + }, + { + "id": "gemini-2.5-flash-lite", + "object": "model", + "created": 1753142400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 2.5 Flash Lite", + "name": "models/gemini-2.5-flash-lite", + "version": "2.5", + "description": "Our smallest and most cost effective model, built for at scale usage.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "max": 24576, + "zero_allowed": true, + "dynamic_allowed": true + } + }, + { + "id": "gemini-3-pro-preview", + "object": "model", + "created": 1737158400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3 Pro Preview", + "name": "models/gemini-3-pro-preview", + "version": "3.0", + "description": "Gemini 3 Pro Preview", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true + } + }, + { + "id": "gemini-3.1-pro-preview", + "object": "model", + "created": 1771459200, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3.1 Pro Preview", + "name": "models/gemini-3.1-pro-preview", + "version": "3.1", + "description": "Gemini 3.1 Pro Preview", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true + } + }, + { + "id": "gemini-3-flash-preview", + "object": "model", + "created": 1765929600, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3 Flash Preview", + "name": "models/gemini-3-flash-preview", + "version": "3.0", + "description": "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true + } + }, + { + "id": "gemini-3.1-flash-lite-preview", + "object": "model", + "created": 1776288000, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 3.1 Flash Lite Preview", + "name": "models/gemini-3.1-flash-lite-preview", + "version": "3.1", + "description": "Our smallest and most cost effective model, built for at scale usage.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "minimal", + "high" + ] + } + }, + { + "id": "gemini-pro-latest", + "object": "model", + "created": 1750118400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini Pro Latest", + "name": "models/gemini-pro-latest", + "version": "2.5", + "description": "Latest release of Gemini Pro", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true + } + }, + { + "id": "gemini-flash-latest", + "object": "model", + "created": 1750118400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini Flash Latest", + "name": "models/gemini-flash-latest", + "version": "2.5", + "description": "Latest release of Gemini Flash", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "max": 24576, + "zero_allowed": true, + "dynamic_allowed": true + } + }, + { + "id": "gemini-flash-lite-latest", + "object": "model", + "created": 1753142400, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini Flash-Lite Latest", + "name": "models/gemini-flash-lite-latest", + "version": "2.5", + "description": "Latest release of Gemini Flash-Lite", + "inputTokenLimit": 1048576, + "outputTokenLimit": 65536, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ], + "thinking": { + "min": 512, + "max": 24576, + "zero_allowed": true, + "dynamic_allowed": true + } + }, + { + "id": "gemini-2.5-flash-image", + "object": "model", + "created": 1759363200, + "owned_by": "google", + "type": "gemini", + "display_name": "Gemini 2.5 Flash Image", + "name": "models/gemini-2.5-flash-image", + "version": "2.5", + "description": "State-of-the-art image generation and editing model.", + "inputTokenLimit": 1048576, + "outputTokenLimit": 8192, + "supportedGenerationMethods": [ + "generateContent", + "countTokens", + "createCachedContent", + "batchGenerateContent" + ] + } + ], + "codex-free": [ + { + "id": "gpt-5", + "object": "model", + "created": 1754524800, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5", + "version": "gpt-5-2025-08-07", + "description": "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "minimal", + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5-codex", + "object": "model", + "created": 1757894400, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5 Codex", + "version": "gpt-5-2025-09-15", + "description": "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5-codex-mini", + "object": "model", + "created": 1762473600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5 Codex Mini", + "version": "gpt-5-2025-11-07", + "description": "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1", + "object": "model", + "created": 1762905600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5", + "version": "gpt-5.1-2025-11-12", + "description": "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "none", + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1-codex", + "object": "model", + "created": 1762905600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.1 Codex", + "version": "gpt-5.1-2025-11-12", + "description": "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1-codex-mini", + "object": "model", + "created": 1762905600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.1 Codex Mini", + "version": "gpt-5.1-2025-11-12", + "description": "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1-codex-max", + "object": "model", + "created": 1763424000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.1 Codex Max", + "version": "gpt-5.1-max", + "description": "Stable version of GPT 5.1 Codex Max", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.2", + "object": "model", + "created": 1765440000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.2", + "version": "gpt-5.2", + "description": "Stable version of GPT 5.2", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "none", + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.2-codex", + "object": "model", + "created": 1765440000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.2 Codex", + "version": "gpt-5.2", + "description": "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + } + ], + "codex-team": [ + { + "id": "gpt-5", + "object": "model", + "created": 1754524800, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5", + "version": "gpt-5-2025-08-07", + "description": "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "minimal", + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5-codex", + "object": "model", + "created": 1757894400, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5 Codex", + "version": "gpt-5-2025-09-15", + "description": "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5-codex-mini", + "object": "model", + "created": 1762473600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5 Codex Mini", + "version": "gpt-5-2025-11-07", + "description": "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1", + "object": "model", + "created": 1762905600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5", + "version": "gpt-5.1-2025-11-12", + "description": "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "none", + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1-codex", + "object": "model", + "created": 1762905600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.1 Codex", + "version": "gpt-5.1-2025-11-12", + "description": "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1-codex-mini", + "object": "model", + "created": 1762905600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.1 Codex Mini", + "version": "gpt-5.1-2025-11-12", + "description": "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1-codex-max", + "object": "model", + "created": 1763424000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.1 Codex Max", + "version": "gpt-5.1-max", + "description": "Stable version of GPT 5.1 Codex Max", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.2", + "object": "model", + "created": 1765440000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.2", + "version": "gpt-5.2", + "description": "Stable version of GPT 5.2", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "none", + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.2-codex", + "object": "model", + "created": 1765440000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.2 Codex", + "version": "gpt-5.2", + "description": "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.3-codex", + "object": "model", + "created": 1770307200, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.3 Codex", + "version": "gpt-5.3", + "description": "Stable version of GPT 5.3 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.4", + "object": "model", + "created": 1772668800, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.4", + "version": "gpt-5.4", + "description": "Stable version of GPT 5.4", + "context_length": 1050000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + } + ], + "codex-plus": [ + { + "id": "gpt-5", + "object": "model", + "created": 1754524800, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5", + "version": "gpt-5-2025-08-07", + "description": "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "minimal", + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5-codex", + "object": "model", + "created": 1757894400, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5 Codex", + "version": "gpt-5-2025-09-15", + "description": "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5-codex-mini", + "object": "model", + "created": 1762473600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5 Codex Mini", + "version": "gpt-5-2025-11-07", + "description": "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1", + "object": "model", + "created": 1762905600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5", + "version": "gpt-5.1-2025-11-12", + "description": "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "none", + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1-codex", + "object": "model", + "created": 1762905600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.1 Codex", + "version": "gpt-5.1-2025-11-12", + "description": "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1-codex-mini", + "object": "model", + "created": 1762905600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.1 Codex Mini", + "version": "gpt-5.1-2025-11-12", + "description": "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1-codex-max", + "object": "model", + "created": 1763424000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.1 Codex Max", + "version": "gpt-5.1-max", + "description": "Stable version of GPT 5.1 Codex Max", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.2", + "object": "model", + "created": 1765440000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.2", + "version": "gpt-5.2", + "description": "Stable version of GPT 5.2", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "none", + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.2-codex", + "object": "model", + "created": 1765440000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.2 Codex", + "version": "gpt-5.2", + "description": "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.3-codex", + "object": "model", + "created": 1770307200, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.3 Codex", + "version": "gpt-5.3", + "description": "Stable version of GPT 5.3 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.3-codex-spark", + "object": "model", + "created": 1770912000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.3 Codex Spark", + "version": "gpt-5.3", + "description": "Ultra-fast coding model.", + "context_length": 128000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.4", + "object": "model", + "created": 1772668800, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.4", + "version": "gpt-5.4", + "description": "Stable version of GPT 5.4", + "context_length": 1050000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + } + ], + "codex-pro": [ + { + "id": "gpt-5", + "object": "model", + "created": 1754524800, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5", + "version": "gpt-5-2025-08-07", + "description": "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "minimal", + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5-codex", + "object": "model", + "created": 1757894400, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5 Codex", + "version": "gpt-5-2025-09-15", + "description": "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5-codex-mini", + "object": "model", + "created": 1762473600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5 Codex Mini", + "version": "gpt-5-2025-11-07", + "description": "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1", + "object": "model", + "created": 1762905600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5", + "version": "gpt-5.1-2025-11-12", + "description": "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "none", + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1-codex", + "object": "model", + "created": 1762905600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.1 Codex", + "version": "gpt-5.1-2025-11-12", + "description": "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1-codex-mini", + "object": "model", + "created": 1762905600, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.1 Codex Mini", + "version": "gpt-5.1-2025-11-12", + "description": "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high" + ] + } + }, + { + "id": "gpt-5.1-codex-max", + "object": "model", + "created": 1763424000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.1 Codex Max", + "version": "gpt-5.1-max", + "description": "Stable version of GPT 5.1 Codex Max", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.2", + "object": "model", + "created": 1765440000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.2", + "version": "gpt-5.2", + "description": "Stable version of GPT 5.2", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "none", + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.2-codex", + "object": "model", + "created": 1765440000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.2 Codex", + "version": "gpt-5.2", + "description": "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.3-codex", + "object": "model", + "created": 1770307200, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.3 Codex", + "version": "gpt-5.3", + "description": "Stable version of GPT 5.3 Codex, The best model for coding and agentic tasks across domains.", + "context_length": 400000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.3-codex-spark", + "object": "model", + "created": 1770912000, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.3 Codex Spark", + "version": "gpt-5.3", + "description": "Ultra-fast coding model.", + "context_length": 128000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "gpt-5.4", + "object": "model", + "created": 1772668800, + "owned_by": "openai", + "type": "openai", + "display_name": "GPT 5.4", + "version": "gpt-5.4", + "description": "Stable version of GPT 5.4", + "context_length": 1050000, + "max_completion_tokens": 128000, + "supported_parameters": [ + "tools" + ], + "thinking": { + "levels": [ + "low", + "medium", + "high", + "xhigh" + ] + } + } + ], + "qwen": [ + { + "id": "qwen3-coder-plus", + "object": "model", + "created": 1753228800, + "owned_by": "qwen", + "type": "qwen", + "display_name": "Qwen3 Coder Plus", + "version": "3.0", + "description": "Advanced code generation and understanding model", + "context_length": 32768, + "max_completion_tokens": 8192, + "supported_parameters": [ + "temperature", + "top_p", + "max_tokens", + "stream", + "stop" + ] + }, + { + "id": "qwen3-coder-flash", + "object": "model", + "created": 1753228800, + "owned_by": "qwen", + "type": "qwen", + "display_name": "Qwen3 Coder Flash", + "version": "3.0", + "description": "Fast code generation model", + "context_length": 8192, + "max_completion_tokens": 2048, + "supported_parameters": [ + "temperature", + "top_p", + "max_tokens", + "stream", + "stop" + ] + }, + { + "id": "coder-model", + "object": "model", + "created": 1771171200, + "owned_by": "qwen", + "type": "qwen", + "display_name": "Qwen 3.5 Plus", + "version": "3.5", + "description": "efficient hybrid model with leading coding performance", + "context_length": 1048576, + "max_completion_tokens": 65536, + "supported_parameters": [ + "temperature", + "top_p", + "max_tokens", + "stream", + "stop" + ] + }, + { + "id": "vision-model", + "object": "model", + "created": 1758672000, + "owned_by": "qwen", + "type": "qwen", + "display_name": "Qwen3 Vision Model", + "version": "3.0", + "description": "Vision model model", + "context_length": 32768, + "max_completion_tokens": 2048, + "supported_parameters": [ + "temperature", + "top_p", + "max_tokens", + "stream", + "stop" + ] + } + ], + "iflow": [ + { + "id": "qwen3-coder-plus", + "object": "model", + "created": 1753228800, + "owned_by": "iflow", + "type": "iflow", + "display_name": "Qwen3-Coder-Plus", + "description": "Qwen3 Coder Plus code generation" + }, + { + "id": "qwen3-max", + "object": "model", + "created": 1758672000, + "owned_by": "iflow", + "type": "iflow", + "display_name": "Qwen3-Max", + "description": "Qwen3 flagship model" + }, + { + "id": "qwen3-vl-plus", + "object": "model", + "created": 1758672000, + "owned_by": "iflow", + "type": "iflow", + "display_name": "Qwen3-VL-Plus", + "description": "Qwen3 multimodal vision-language" + }, + { + "id": "qwen3-max-preview", + "object": "model", + "created": 1757030400, + "owned_by": "iflow", + "type": "iflow", + "display_name": "Qwen3-Max-Preview", + "description": "Qwen3 Max preview build", + "thinking": { + "levels": [ + "none", + "auto", + "minimal", + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "glm-4.6", + "object": "model", + "created": 1759190400, + "owned_by": "iflow", + "type": "iflow", + "display_name": "GLM-4.6", + "description": "Zhipu GLM 4.6 general model", + "thinking": { + "levels": [ + "none", + "auto", + "minimal", + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "kimi-k2", + "object": "model", + "created": 1752192000, + "owned_by": "iflow", + "type": "iflow", + "display_name": "Kimi-K2", + "description": "Moonshot Kimi K2 general model" + }, + { + "id": "deepseek-v3.2", + "object": "model", + "created": 1759104000, + "owned_by": "iflow", + "type": "iflow", + "display_name": "DeepSeek-V3.2-Exp", + "description": "DeepSeek V3.2 experimental", + "thinking": { + "levels": [ + "none", + "auto", + "minimal", + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "deepseek-v3.1", + "object": "model", + "created": 1756339200, + "owned_by": "iflow", + "type": "iflow", + "display_name": "DeepSeek-V3.1-Terminus", + "description": "DeepSeek V3.1 Terminus", + "thinking": { + "levels": [ + "none", + "auto", + "minimal", + "low", + "medium", + "high", + "xhigh" + ] + } + }, + { + "id": "deepseek-r1", + "object": "model", + "created": 1737331200, + "owned_by": "iflow", + "type": "iflow", + "display_name": "DeepSeek-R1", + "description": "DeepSeek reasoning model R1" + }, + { + "id": "deepseek-v3", + "object": "model", + "created": 1734307200, + "owned_by": "iflow", + "type": "iflow", + "display_name": "DeepSeek-V3-671B", + "description": "DeepSeek V3 671B" + }, + { + "id": "qwen3-32b", + "object": "model", + "created": 1747094400, + "owned_by": "iflow", + "type": "iflow", + "display_name": "Qwen3-32B", + "description": "Qwen3 32B" + }, + { + "id": "qwen3-235b-a22b-thinking-2507", + "object": "model", + "created": 1753401600, + "owned_by": "iflow", + "type": "iflow", + "display_name": "Qwen3-235B-A22B-Thinking", + "description": "Qwen3 235B A22B Thinking (2507)" + }, + { + "id": "qwen3-235b-a22b-instruct", + "object": "model", + "created": 1753401600, + "owned_by": "iflow", + "type": "iflow", + "display_name": "Qwen3-235B-A22B-Instruct", + "description": "Qwen3 235B A22B Instruct" + }, + { + "id": "qwen3-235b", + "object": "model", + "created": 1753401600, + "owned_by": "iflow", + "type": "iflow", + "display_name": "Qwen3-235B-A22B", + "description": "Qwen3 235B A22B" + }, + { + "id": "iflow-rome-30ba3b", + "object": "model", + "created": 1736899200, + "owned_by": "iflow", + "type": "iflow", + "display_name": "iFlow-ROME", + "description": "iFlow Rome 30BA3B model" + } + ], + "kimi": [ + { + "id": "kimi-k2", + "object": "model", + "created": 1752192000, + "owned_by": "moonshot", + "type": "kimi", + "display_name": "Kimi K2", + "description": "Kimi K2 - Moonshot AI's flagship coding model", + "context_length": 131072, + "max_completion_tokens": 32768 + }, + { + "id": "kimi-k2-thinking", + "object": "model", + "created": 1762387200, + "owned_by": "moonshot", + "type": "kimi", + "display_name": "Kimi K2 Thinking", + "description": "Kimi K2 Thinking - Extended reasoning model", + "context_length": 131072, + "max_completion_tokens": 32768, + "thinking": { + "min": 1024, + "max": 32000, + "zero_allowed": true, + "dynamic_allowed": true + } + }, + { + "id": "kimi-k2.5", + "object": "model", + "created": 1769472000, + "owned_by": "moonshot", + "type": "kimi", + "display_name": "Kimi K2.5", + "description": "Kimi K2.5 - Latest Moonshot AI coding model with improved capabilities", + "context_length": 131072, + "max_completion_tokens": 32768, + "thinking": { + "min": 1024, + "max": 32000, + "zero_allowed": true, + "dynamic_allowed": true + } + } + ], + "antigravity": { + "claude-opus-4-6-thinking": { + "thinking": { + "min": 1024, + "max": 64000, + "zero_allowed": true, + "dynamic_allowed": true + }, + "max_completion_tokens": 64000 + }, + "claude-sonnet-4-6": { + "thinking": { + "min": 1024, + "max": 64000, + "zero_allowed": true, + "dynamic_allowed": true + }, + "max_completion_tokens": 64000 + }, + "gemini-2.5-flash": { + "thinking": { + "max": 24576, + "zero_allowed": true, + "dynamic_allowed": true + } + }, + "gemini-2.5-flash-lite": { + "thinking": { + "max": 24576, + "zero_allowed": true, + "dynamic_allowed": true + } + }, + "gemini-3-flash": { + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "minimal", + "low", + "medium", + "high" + ] + } + }, + "gemini-3-pro-high": { + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "low", + "high" + ] + } + }, + "gemini-3-pro-low": { + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "low", + "high" + ] + } + }, + "gemini-3.1-flash-image": { + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "minimal", + "high" + ] + } + }, + "gemini-3.1-flash-lite-preview": { + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "minimal", + "high" + ] + } + }, + "gemini-3.1-pro-high": { + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "low", + "high" + ] + } + }, + "gemini-3.1-pro-low": { + "thinking": { + "min": 128, + "max": 32768, + "dynamic_allowed": true, + "levels": [ + "low", + "high" + ] + } + }, + "gpt-oss-120b-medium": {} + } +} \ No newline at end of file diff --git a/internal/runtime/executor/claude_executor_test.go b/internal/runtime/executor/claude_executor_test.go index 7bf77a7a..fa458c0f 100644 --- a/internal/runtime/executor/claude_executor_test.go +++ b/internal/runtime/executor/claude_executor_test.go @@ -842,8 +842,8 @@ func TestClaudeExecutor_ExecuteStream_AcceptEncodingOverrideCannotBypassIdentity executor := NewClaudeExecutor(&config.Config{}) // Inject Accept-Encoding via the custom header attribute mechanism. auth := &cliproxyauth.Auth{Attributes: map[string]string{ - "api_key": "key-123", - "base_url": server.URL, + "api_key": "key-123", + "base_url": server.URL, "header:Accept-Encoding": "gzip, deflate, br, zstd", }} payload := []byte(`{"messages":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`) diff --git a/internal/watcher/synthesizer/file.go b/internal/watcher/synthesizer/file.go index 02a0cefa..ab54aeaa 100644 --- a/internal/watcher/synthesizer/file.go +++ b/internal/watcher/synthesizer/file.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex" "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli" coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" ) @@ -149,6 +150,16 @@ func synthesizeFileAuths(ctx *SynthesisContext, fullPath string, data []byte) [] } } ApplyAuthExcludedModelsMeta(a, cfg, perAccountExcluded, "oauth") + // For codex auth files, extract plan_type from the JWT id_token. + if provider == "codex" { + if idTokenRaw, ok := metadata["id_token"].(string); ok && strings.TrimSpace(idTokenRaw) != "" { + if claims, errParse := codex.ParseJWTToken(idTokenRaw); errParse == nil && claims != nil { + if pt := strings.TrimSpace(claims.CodexAuthInfo.ChatgptPlanType); pt != "" { + a.Attributes["plan_type"] = pt + } + } + } + } if provider == "gemini-cli" { if virtuals := SynthesizeGeminiVirtualAuths(a, metadata, now); len(virtuals) > 0 { for _, v := range virtuals { diff --git a/sdk/api/handlers/openai/openai_responses_websocket_test.go b/sdk/api/handlers/openai/openai_responses_websocket_test.go index c7348583..981c6630 100644 --- a/sdk/api/handlers/openai/openai_responses_websocket_test.go +++ b/sdk/api/handlers/openai/openai_responses_websocket_test.go @@ -266,7 +266,6 @@ func TestAppendWebsocketEvent(t *testing.T) { } } - func TestAppendWebsocketEventTruncatesAtLimit(t *testing.T) { var builder strings.Builder payload := bytes.Repeat([]byte("x"), wsBodyLogMaxSize) diff --git a/sdk/auth/codex_device.go b/sdk/auth/codex_device.go index 78a95af8..10f59fb9 100644 --- a/sdk/auth/codex_device.go +++ b/sdk/auth/codex_device.go @@ -287,5 +287,8 @@ func (a *CodexAuthenticator) buildAuthRecord(authSvc *codex.CodexAuth, authBundl FileName: fileName, Storage: tokenStorage, Metadata: metadata, + Attributes: map[string]string{ + "plan_type": planType, + }, }, nil } diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index 4dbf44f3..82f6c85d 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -858,7 +858,22 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) { } models = applyExcludedModels(models, excluded) case "codex": - models = registry.GetOpenAIModels() + codexPlanType := "" + if a.Attributes != nil { + codexPlanType = strings.TrimSpace(a.Attributes["plan_type"]) + } + switch strings.ToLower(codexPlanType) { + case "pro": + models = registry.GetCodexProModels() + case "plus": + models = registry.GetCodexPlusModels() + case "team": + models = registry.GetCodexTeamModels() + case "free": + models = registry.GetCodexFreeModels() + default: + models = registry.GetCodexProModels() + } if entry := s.resolveConfigCodexKey(a); entry != nil { if len(entry.Models) > 0 { models = buildCodexConfigModels(entry)