diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go index 7b8b262e..e036a04f 100644 --- a/internal/registry/model_registry.go +++ b/internal/registry/model_registry.go @@ -47,6 +47,10 @@ type ModelInfo struct { MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` // SupportedParameters lists supported parameters SupportedParameters []string `json:"supported_parameters,omitempty"` + // SupportedInputModalities lists supported input modalities (e.g., TEXT, IMAGE, VIDEO, AUDIO) + SupportedInputModalities []string `json:"supportedInputModalities,omitempty"` + // SupportedOutputModalities lists supported output modalities (e.g., TEXT, IMAGE) + SupportedOutputModalities []string `json:"supportedOutputModalities,omitempty"` // Thinking holds provider-specific reasoning/thinking budget capabilities. // This is optional and currently used for Gemini thinking budget normalization. @@ -499,6 +503,12 @@ func cloneModelInfo(model *ModelInfo) *ModelInfo { if len(model.SupportedParameters) > 0 { copyModel.SupportedParameters = append([]string(nil), model.SupportedParameters...) } + if len(model.SupportedInputModalities) > 0 { + copyModel.SupportedInputModalities = append([]string(nil), model.SupportedInputModalities...) + } + if len(model.SupportedOutputModalities) > 0 { + copyModel.SupportedOutputModalities = append([]string(nil), model.SupportedOutputModalities...) + } return ©Model } @@ -1067,6 +1077,12 @@ func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) if len(model.SupportedGenerationMethods) > 0 { result["supportedGenerationMethods"] = model.SupportedGenerationMethods } + if len(model.SupportedInputModalities) > 0 { + result["supportedInputModalities"] = model.SupportedInputModalities + } + if len(model.SupportedOutputModalities) > 0 { + result["supportedOutputModalities"] = model.SupportedOutputModalities + } return result default: diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 7e480a97..e697b64e 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -1176,6 +1176,29 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c OwnedBy: antigravityAuthType, Type: antigravityAuthType, } + + // Build input modalities from upstream capability flags. + inputModalities := []string{"TEXT"} + if modelData.Get("supportsImages").Bool() { + inputModalities = append(inputModalities, "IMAGE") + } + if modelData.Get("supportsVideo").Bool() { + inputModalities = append(inputModalities, "VIDEO") + } + modelInfo.SupportedInputModalities = inputModalities + modelInfo.SupportedOutputModalities = []string{"TEXT"} + + // Token limits from upstream. + if maxTok := modelData.Get("maxTokens").Int(); maxTok > 0 { + modelInfo.InputTokenLimit = int(maxTok) + } + if maxOut := modelData.Get("maxOutputTokens").Int(); maxOut > 0 { + modelInfo.OutputTokenLimit = int(maxOut) + } + + // Supported generation methods (Gemini v1beta convention). + modelInfo.SupportedGenerationMethods = []string{"generateContent", "countTokens"} + // Look up Thinking support from static config using upstream model name. if modelCfg != nil { if modelCfg.Thinking != nil {