v6.5.56 (#12)

* feat(api): add comprehensive ampcode management endpoints Add new REST API endpoints under /v0/management/ampcode for managing ampcode configuration including upstream URL, API key, localhost restriction, model mappings, and force model mappings settings. - Move force-model-mappings from config_basic to config_lists - Add GET/PUT/PATCH/DELETE endpoints for all ampcode settings - Support model mapping CRUD with upsert (PATCH) capability - Add comprehensive test coverage for all ampcode endpoints * refactor(api): simplify request body parsing in ampcode handlers * feat(logging): add upstream API request/response capture to streaming logs * style(logging): remove redundant separator line from response section * feat(antigravity): enforce thinking budget limits for Claude models * refactor(logging): remove unused variable in `ensureAttempt` and redundant function call --------- Co-authored-by: hkfires <10558748+hkfires@users.noreply.github.com>
2026-04-21 07:02:05 +00:00 · 2025-12-08 22:32:29 +08:00
parent 6b49580716
commit ab9e9442ec
11 changed files with 1263 additions and 70 deletions
--- a/internal/api/handlers/management/config_basic.go
+++ b/internal/api/handlers/management/config_basic.go
@@ -241,11 +241,3 @@ func (h *Handler) DeleteProxyURL(c *gin.Context) {
 	h.cfg.ProxyURL = ""
 	h.persist(c)
 }
-
-// Force Model Mappings (for Amp CLI)
-func (h *Handler) GetForceModelMappings(c *gin.Context) {
-	c.JSON(200, gin.H{"force-model-mappings": h.cfg.AmpCode.ForceModelMappings})
-}
-func (h *Handler) PutForceModelMappings(c *gin.Context) {
-	h.updateBoolField(c, func(v bool) { h.cfg.AmpCode.ForceModelMappings = v })
-}
--- a/internal/api/handlers/management/config_lists.go
+++ b/internal/api/handlers/management/config_lists.go
@@ -706,3 +706,155 @@ func normalizeClaudeKey(entry *config.ClaudeKey) {
 	}
 	entry.Models = normalized
 }
+
+// GetAmpCode returns the complete ampcode configuration.
+func (h *Handler) GetAmpCode(c *gin.Context) {
+	if h == nil || h.cfg == nil {
+		c.JSON(200, gin.H{"ampcode": config.AmpCode{}})
+		return
+	}
+	c.JSON(200, gin.H{"ampcode": h.cfg.AmpCode})
+}
+
+// GetAmpUpstreamURL returns the ampcode upstream URL.
+func (h *Handler) GetAmpUpstreamURL(c *gin.Context) {
+	if h == nil || h.cfg == nil {
+		c.JSON(200, gin.H{"upstream-url": ""})
+		return
+	}
+	c.JSON(200, gin.H{"upstream-url": h.cfg.AmpCode.UpstreamURL})
+}
+
+// PutAmpUpstreamURL updates the ampcode upstream URL.
+func (h *Handler) PutAmpUpstreamURL(c *gin.Context) {
+	h.updateStringField(c, func(v string) { h.cfg.AmpCode.UpstreamURL = strings.TrimSpace(v) })
+}
+
+// DeleteAmpUpstreamURL clears the ampcode upstream URL.
+func (h *Handler) DeleteAmpUpstreamURL(c *gin.Context) {
+	h.cfg.AmpCode.UpstreamURL = ""
+	h.persist(c)
+}
+
+// GetAmpUpstreamAPIKey returns the ampcode upstream API key.
+func (h *Handler) GetAmpUpstreamAPIKey(c *gin.Context) {
+	if h == nil || h.cfg == nil {
+		c.JSON(200, gin.H{"upstream-api-key": ""})
+		return
+	}
+	c.JSON(200, gin.H{"upstream-api-key": h.cfg.AmpCode.UpstreamAPIKey})
+}
+
+// PutAmpUpstreamAPIKey updates the ampcode upstream API key.
+func (h *Handler) PutAmpUpstreamAPIKey(c *gin.Context) {
+	h.updateStringField(c, func(v string) { h.cfg.AmpCode.UpstreamAPIKey = strings.TrimSpace(v) })
+}
+
+// DeleteAmpUpstreamAPIKey clears the ampcode upstream API key.
+func (h *Handler) DeleteAmpUpstreamAPIKey(c *gin.Context) {
+	h.cfg.AmpCode.UpstreamAPIKey = ""
+	h.persist(c)
+}
+
+// GetAmpRestrictManagementToLocalhost returns the localhost restriction setting.
+func (h *Handler) GetAmpRestrictManagementToLocalhost(c *gin.Context) {
+	if h == nil || h.cfg == nil {
+		c.JSON(200, gin.H{"restrict-management-to-localhost": true})
+		return
+	}
+	c.JSON(200, gin.H{"restrict-management-to-localhost": h.cfg.AmpCode.RestrictManagementToLocalhost})
+}
+
+// PutAmpRestrictManagementToLocalhost updates the localhost restriction setting.
+func (h *Handler) PutAmpRestrictManagementToLocalhost(c *gin.Context) {
+	h.updateBoolField(c, func(v bool) { h.cfg.AmpCode.RestrictManagementToLocalhost = v })
+}
+
+// GetAmpModelMappings returns the ampcode model mappings.
+func (h *Handler) GetAmpModelMappings(c *gin.Context) {
+	if h == nil || h.cfg == nil {
+		c.JSON(200, gin.H{"model-mappings": []config.AmpModelMapping{}})
+		return
+	}
+	c.JSON(200, gin.H{"model-mappings": h.cfg.AmpCode.ModelMappings})
+}
+
+// PutAmpModelMappings replaces all ampcode model mappings.
+func (h *Handler) PutAmpModelMappings(c *gin.Context) {
+	var body struct {
+		Value []config.AmpModelMapping `json:"value"`
+	}
+	if err := c.ShouldBindJSON(&body); err != nil {
+		c.JSON(400, gin.H{"error": "invalid body"})
+		return
+	}
+	h.cfg.AmpCode.ModelMappings = body.Value
+	h.persist(c)
+}
+
+// PatchAmpModelMappings adds or updates model mappings.
+func (h *Handler) PatchAmpModelMappings(c *gin.Context) {
+	var body struct {
+		Value []config.AmpModelMapping `json:"value"`
+	}
+	if err := c.ShouldBindJSON(&body); err != nil {
+		c.JSON(400, gin.H{"error": "invalid body"})
+		return
+	}
+
+	existing := make(map[string]int)
+	for i, m := range h.cfg.AmpCode.ModelMappings {
+		existing[strings.TrimSpace(m.From)] = i
+	}
+
+	for _, newMapping := range body.Value {
+		from := strings.TrimSpace(newMapping.From)
+		if idx, ok := existing[from]; ok {
+			h.cfg.AmpCode.ModelMappings[idx] = newMapping
+		} else {
+			h.cfg.AmpCode.ModelMappings = append(h.cfg.AmpCode.ModelMappings, newMapping)
+			existing[from] = len(h.cfg.AmpCode.ModelMappings) - 1
+		}
+	}
+	h.persist(c)
+}
+
+// DeleteAmpModelMappings removes specified model mappings by "from" field.
+func (h *Handler) DeleteAmpModelMappings(c *gin.Context) {
+	var body struct {
+		Value []string `json:"value"`
+	}
+	if err := c.ShouldBindJSON(&body); err != nil || len(body.Value) == 0 {
+		h.cfg.AmpCode.ModelMappings = nil
+		h.persist(c)
+		return
+	}
+
+	toRemove := make(map[string]bool)
+	for _, from := range body.Value {
+		toRemove[strings.TrimSpace(from)] = true
+	}
+
+	newMappings := make([]config.AmpModelMapping, 0, len(h.cfg.AmpCode.ModelMappings))
+	for _, m := range h.cfg.AmpCode.ModelMappings {
+		if !toRemove[strings.TrimSpace(m.From)] {
+			newMappings = append(newMappings, m)
+		}
+	}
+	h.cfg.AmpCode.ModelMappings = newMappings
+	h.persist(c)
+}
+
+// GetAmpForceModelMappings returns whether model mappings are forced.
+func (h *Handler) GetAmpForceModelMappings(c *gin.Context) {
+	if h == nil || h.cfg == nil {
+		c.JSON(200, gin.H{"force-model-mappings": false})
+		return
+	}
+	c.JSON(200, gin.H{"force-model-mappings": h.cfg.AmpCode.ForceModelMappings})
+}
+
+// PutAmpForceModelMappings updates the force model mappings setting.
+func (h *Handler) PutAmpForceModelMappings(c *gin.Context) {
+	h.updateBoolField(c, func(v bool) { h.cfg.AmpCode.ForceModelMappings = v })
+}
--- a/internal/api/handlers/management/handler.go
+++ b/internal/api/handlers/management/handler.go
@@ -240,16 +240,6 @@ func (h *Handler) updateBoolField(c *gin.Context, set func(bool)) {
 		Value *bool `json:"value"`
 	}
 	if err := c.ShouldBindJSON(&body); err != nil || body.Value == nil {
-		var m map[string]any
-		if err2 := c.ShouldBindJSON(&m); err2 == nil {
-			for _, v := range m {
-				if b, ok := v.(bool); ok {
-					set(b)
-					h.persist(c)
-					return
-				}
-			}
-		}
 		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid body"})
 		return
 	}
--- a/internal/api/middleware/response_writer.go
+++ b/internal/api/middleware/response_writer.go
@@ -232,7 +232,16 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
 			w.streamDone = nil
 		}

+		// Write API Request and Response to the streaming log before closing
 		if w.streamWriter != nil {
+			apiRequest := w.extractAPIRequest(c)
+			if len(apiRequest) > 0 {
+				_ = w.streamWriter.WriteAPIRequest(apiRequest)
+			}
+			apiResponse := w.extractAPIResponse(c)
+			if len(apiResponse) > 0 {
+				_ = w.streamWriter.WriteAPIResponse(apiResponse)
+			}
 			if err := w.streamWriter.Close(); err != nil {
 				w.streamWriter = nil
 				return err
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -520,9 +520,25 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.PUT("/ws-auth", s.mgmt.PutWebsocketAuth)
 		mgmt.PATCH("/ws-auth", s.mgmt.PutWebsocketAuth)

-		mgmt.GET("/force-model-mappings", s.mgmt.GetForceModelMappings)
-		mgmt.PUT("/force-model-mappings", s.mgmt.PutForceModelMappings)
-		mgmt.PATCH("/force-model-mappings", s.mgmt.PutForceModelMappings)
+		mgmt.GET("/ampcode", s.mgmt.GetAmpCode)
+		mgmt.GET("/ampcode/upstream-url", s.mgmt.GetAmpUpstreamURL)
+		mgmt.PUT("/ampcode/upstream-url", s.mgmt.PutAmpUpstreamURL)
+		mgmt.PATCH("/ampcode/upstream-url", s.mgmt.PutAmpUpstreamURL)
+		mgmt.DELETE("/ampcode/upstream-url", s.mgmt.DeleteAmpUpstreamURL)
+		mgmt.GET("/ampcode/upstream-api-key", s.mgmt.GetAmpUpstreamAPIKey)
+		mgmt.PUT("/ampcode/upstream-api-key", s.mgmt.PutAmpUpstreamAPIKey)
+		mgmt.PATCH("/ampcode/upstream-api-key", s.mgmt.PutAmpUpstreamAPIKey)
+		mgmt.DELETE("/ampcode/upstream-api-key", s.mgmt.DeleteAmpUpstreamAPIKey)
+		mgmt.GET("/ampcode/restrict-management-to-localhost", s.mgmt.GetAmpRestrictManagementToLocalhost)
+		mgmt.PUT("/ampcode/restrict-management-to-localhost", s.mgmt.PutAmpRestrictManagementToLocalhost)
+		mgmt.PATCH("/ampcode/restrict-management-to-localhost", s.mgmt.PutAmpRestrictManagementToLocalhost)
+		mgmt.GET("/ampcode/model-mappings", s.mgmt.GetAmpModelMappings)
+		mgmt.PUT("/ampcode/model-mappings", s.mgmt.PutAmpModelMappings)
+		mgmt.PATCH("/ampcode/model-mappings", s.mgmt.PatchAmpModelMappings)
+		mgmt.DELETE("/ampcode/model-mappings", s.mgmt.DeleteAmpModelMappings)
+		mgmt.GET("/ampcode/force-model-mappings", s.mgmt.GetAmpForceModelMappings)
+		mgmt.PUT("/ampcode/force-model-mappings", s.mgmt.PutAmpForceModelMappings)
+		mgmt.PATCH("/ampcode/force-model-mappings", s.mgmt.PutAmpForceModelMappings)

 		mgmt.GET("/request-retry", s.mgmt.GetRequestRetry)
 		mgmt.PUT("/request-retry", s.mgmt.PutRequestRetry)
--- a/internal/logging/request_logger.go
+++ b/internal/logging/request_logger.go
@@ -84,6 +84,26 @@ type StreamingLogWriter interface {
 	//   - error: An error if writing fails, nil otherwise
 	WriteStatus(status int, headers map[string][]string) error

+	// WriteAPIRequest writes the upstream API request details to the log.
+	// This should be called before WriteStatus to maintain proper log ordering.
+	//
+	// Parameters:
+	//   - apiRequest: The API request data (typically includes URL, headers, body sent upstream)
+	//
+	// Returns:
+	//   - error: An error if writing fails, nil otherwise
+	WriteAPIRequest(apiRequest []byte) error
+
+	// WriteAPIResponse writes the upstream API response details to the log.
+	// This should be called after the streaming response is complete.
+	//
+	// Parameters:
+	//   - apiResponse: The API response data
+	//
+	// Returns:
+	//   - error: An error if writing fails, nil otherwise
+	WriteAPIResponse(apiResponse []byte) error
+
 	// Close finalizes the log file and cleans up resources.
 	//
 	// Returns:
@@ -248,10 +268,11 @@ func (l *FileRequestLogger) LogStreamingRequest(url, method string, headers map[

 	// Create streaming writer
 	writer := &FileStreamingLogWriter{
-		file:      file,
-		chunkChan: make(chan []byte, 100), // Buffered channel for async writes
-		closeChan: make(chan struct{}),
-		errorChan: make(chan error, 1),
+		file:           file,
+		chunkChan:      make(chan []byte, 100), // Buffered channel for async writes
+		closeChan:      make(chan struct{}),
+		errorChan:      make(chan error, 1),
+		bufferedChunks: &bytes.Buffer{},
 	}

 	// Start async writer goroutine
@@ -628,11 +649,12 @@ func (l *FileRequestLogger) formatRequestInfo(url, method string, headers map[st

 // FileStreamingLogWriter implements StreamingLogWriter for file-based streaming logs.
 // It handles asynchronous writing of streaming response chunks to a file.
+// All data is buffered and written in the correct order when Close is called.
 type FileStreamingLogWriter struct {
 	// file is the file where log data is written.
 	file *os.File

-	// chunkChan is a channel for receiving response chunks to write.
+	// chunkChan is a channel for receiving response chunks to buffer.
 	chunkChan chan []byte

 	// closeChan is a channel for signaling when the writer is closed.
@@ -641,8 +663,23 @@ type FileStreamingLogWriter struct {
 	// errorChan is a channel for reporting errors during writing.
 	errorChan chan error

-	// statusWritten indicates whether the response status has been written.
+	// bufferedChunks stores the response chunks in order.
+	bufferedChunks *bytes.Buffer
+
+	// responseStatus stores the HTTP status code.
+	responseStatus int
+
+	// statusWritten indicates whether a non-zero status was recorded.
 	statusWritten bool
+
+	// responseHeaders stores the response headers.
+	responseHeaders map[string][]string
+
+	// apiRequest stores the upstream API request data.
+	apiRequest []byte
+
+	// apiResponse stores the upstream API response data.
+	apiResponse []byte
 }

 // WriteChunkAsync writes a response chunk asynchronously (non-blocking).
@@ -666,39 +703,65 @@ func (w *FileStreamingLogWriter) WriteChunkAsync(chunk []byte) {
 	}
 }

-// WriteStatus writes the response status and headers to the log.
+// WriteStatus buffers the response status and headers for later writing.
 //
 // Parameters:
 //   - status: The response status code
 //   - headers: The response headers
 //
 // Returns:
-//   - error: An error if writing fails, nil otherwise
+//   - error: Always returns nil (buffering cannot fail)
 func (w *FileStreamingLogWriter) WriteStatus(status int, headers map[string][]string) error {
-	if w.file == nil || w.statusWritten {
+	if status == 0 {
 		return nil
 	}

-	var content strings.Builder
-	content.WriteString("========================================\n")
-	content.WriteString("=== RESPONSE ===\n")
-	content.WriteString(fmt.Sprintf("Status: %d\n", status))
-
-	for key, values := range headers {
-		for _, value := range values {
-			content.WriteString(fmt.Sprintf("%s: %s\n", key, value))
+	w.responseStatus = status
+	if headers != nil {
+		w.responseHeaders = make(map[string][]string, len(headers))
+		for key, values := range headers {
+			headerValues := make([]string, len(values))
+			copy(headerValues, values)
+			w.responseHeaders[key] = headerValues
 		}
 	}
-	content.WriteString("\n")
+	w.statusWritten = true
+	return nil
+}

-	_, err := w.file.WriteString(content.String())
-	if err == nil {
-		w.statusWritten = true
+// WriteAPIRequest buffers the upstream API request details for later writing.
+//
+// Parameters:
+//   - apiRequest: The API request data (typically includes URL, headers, body sent upstream)
+//
+// Returns:
+//   - error: Always returns nil (buffering cannot fail)
+func (w *FileStreamingLogWriter) WriteAPIRequest(apiRequest []byte) error {
+	if len(apiRequest) == 0 {
+		return nil
 	}
-	return err
+	w.apiRequest = bytes.Clone(apiRequest)
+	return nil
+}
+
+// WriteAPIResponse buffers the upstream API response details for later writing.
+//
+// Parameters:
+//   - apiResponse: The API response data
+//
+// Returns:
+//   - error: Always returns nil (buffering cannot fail)
+func (w *FileStreamingLogWriter) WriteAPIResponse(apiResponse []byte) error {
+	if len(apiResponse) == 0 {
+		return nil
+	}
+	w.apiResponse = bytes.Clone(apiResponse)
+	return nil
 }

 // Close finalizes the log file and cleans up resources.
+// It writes all buffered data to the file in the correct order:
+// API REQUEST -> API RESPONSE -> RESPONSE (status, headers, body chunks)
 //
 // Returns:
 //   - error: An error if closing fails, nil otherwise
@@ -707,27 +770,84 @@ func (w *FileStreamingLogWriter) Close() error {
 		close(w.chunkChan)
 	}

-	// Wait for async writer to finish
+	// Wait for async writer to finish buffering chunks
 	if w.closeChan != nil {
 		<-w.closeChan
 		w.chunkChan = nil
 	}

-	if w.file != nil {
-		return w.file.Close()
+	if w.file == nil {
+		return nil
 	}

-	return nil
+	// Write all content in the correct order
+	var content strings.Builder
+
+	// 1. Write API REQUEST section
+	if len(w.apiRequest) > 0 {
+		if bytes.HasPrefix(w.apiRequest, []byte("=== API REQUEST")) {
+			content.Write(w.apiRequest)
+			if !bytes.HasSuffix(w.apiRequest, []byte("\n")) {
+				content.WriteString("\n")
+			}
+		} else {
+			content.WriteString("=== API REQUEST ===\n")
+			content.Write(w.apiRequest)
+			content.WriteString("\n")
+		}
+		content.WriteString("\n")
+	}
+
+	// 2. Write API RESPONSE section
+	if len(w.apiResponse) > 0 {
+		if bytes.HasPrefix(w.apiResponse, []byte("=== API RESPONSE")) {
+			content.Write(w.apiResponse)
+			if !bytes.HasSuffix(w.apiResponse, []byte("\n")) {
+				content.WriteString("\n")
+			}
+		} else {
+			content.WriteString("=== API RESPONSE ===\n")
+			content.Write(w.apiResponse)
+			content.WriteString("\n")
+		}
+		content.WriteString("\n")
+	}
+
+	// 3. Write RESPONSE section (status, headers, buffered chunks)
+	content.WriteString("=== RESPONSE ===\n")
+	if w.statusWritten {
+		content.WriteString(fmt.Sprintf("Status: %d\n", w.responseStatus))
+	}
+
+	for key, values := range w.responseHeaders {
+		for _, value := range values {
+			content.WriteString(fmt.Sprintf("%s: %s\n", key, value))
+		}
+	}
+	content.WriteString("\n")
+
+	// Write buffered response body chunks
+	if w.bufferedChunks != nil && w.bufferedChunks.Len() > 0 {
+		content.Write(w.bufferedChunks.Bytes())
+	}
+
+	// Write the complete content to file
+	if _, err := w.file.WriteString(content.String()); err != nil {
+		_ = w.file.Close()
+		return err
+	}
+
+	return w.file.Close()
 }

-// asyncWriter runs in a goroutine to handle async chunk writing.
-// It continuously reads chunks from the channel and writes them to the file.
+// asyncWriter runs in a goroutine to buffer chunks from the channel.
+// It continuously reads chunks from the channel and buffers them for later writing.
 func (w *FileStreamingLogWriter) asyncWriter() {
 	defer close(w.closeChan)

 	for chunk := range w.chunkChan {
-		if w.file != nil {
-			_, _ = w.file.Write(chunk)
+		if w.bufferedChunks != nil {
+			w.bufferedChunks.Write(chunk)
 		}
 	}
 }
@@ -754,6 +874,28 @@ func (w *NoOpStreamingLogWriter) WriteStatus(_ int, _ map[string][]string) error
 	return nil
 }

+// WriteAPIRequest is a no-op implementation that does nothing and always returns nil.
+//
+// Parameters:
+//   - apiRequest: The API request data (ignored)
+//
+// Returns:
+//   - error: Always returns nil
+func (w *NoOpStreamingLogWriter) WriteAPIRequest(_ []byte) error {
+	return nil
+}
+
+// WriteAPIResponse is a no-op implementation that does nothing and always returns nil.
+//
+// Parameters:
+//   - apiResponse: The API response data (ignored)
+//
+// Returns:
+//   - error: Always returns nil
+func (w *NoOpStreamingLogWriter) WriteAPIResponse(_ []byte) error {
+	return nil
+}
+
 // Close is a no-op implementation that does nothing and always returns nil.
 //
 // Returns:
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -943,18 +943,6 @@ func GetQwenModels() []*ModelInfo {
 	}
 }

-// GetAntigravityThinkingConfig returns the Thinking configuration for antigravity models.
-// Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup.
-func GetAntigravityThinkingConfig() map[string]*ThinkingSupport {
-	return map[string]*ThinkingSupport{
-		"gemini-2.5-flash":                  {Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		"gemini-2.5-flash-lite":             {Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
-		"gemini-3-pro-preview":              {Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		"gemini-claude-sonnet-4-5-thinking": {Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true},
-		"gemini-claude-opus-4-5-thinking":   {Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true},
-	}
-}
-
 // GetIFlowModels returns supported models for iFlow OAuth accounts.
 func GetIFlowModels() []*ModelInfo {
 	entries := []struct {
@@ -998,6 +986,25 @@ func GetIFlowModels() []*ModelInfo {
 	return models
 }

+// AntigravityModelConfig captures static antigravity model overrides, including
+// Thinking budget limits and provider max completion tokens.
+type AntigravityModelConfig struct {
+	Thinking            *ThinkingSupport
+	MaxCompletionTokens int
+}
+
+// GetAntigravityModelConfig returns static configuration for antigravity models.
+// Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup.
+func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
+	return map[string]*AntigravityModelConfig{
+		"gemini-2.5-flash":                  {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
+		"gemini-2.5-flash-lite":             {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}},
+		"gemini-3-pro-preview":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}},
+		"gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+		"gemini-claude-opus-4-5-thinking":   {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
+	}
+}
+
 // GetGitHubCopilotModels returns the available models for GitHub Copilot.
 // These models are available through the GitHub Copilot API at api.githubcopilot.com.
 func GetGitHubCopilotModels() []*ModelInfo {
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -81,6 +81,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)

 	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = normalizeAntigravityThinking(req.Model, translated)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -174,6 +175,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

 	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = normalizeAntigravityThinking(req.Model, translated)

 	baseURLs := antigravityBaseURLFallbackOrder(auth)
 	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
@@ -370,7 +372,7 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 		}

 		now := time.Now().Unix()
-		thinkingConfig := registry.GetAntigravityThinkingConfig()
+		modelConfig := registry.GetAntigravityModelConfig()
 		models := make([]*registry.ModelInfo, 0, len(result.Map()))
 		for originalName := range result.Map() {
 			aliasName := modelName2Alias(originalName)
@@ -387,8 +389,13 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 					Type:        antigravityAuthType,
 				}
 				// Look up Thinking support from static config using alias name
-				if thinking, ok := thinkingConfig[aliasName]; ok {
-					modelInfo.Thinking = thinking
+				if cfg, ok := modelConfig[aliasName]; ok {
+					if cfg.Thinking != nil {
+						modelInfo.Thinking = cfg.Thinking
+					}
+					if cfg.MaxCompletionTokens > 0 {
+						modelInfo.MaxCompletionTokens = cfg.MaxCompletionTokens
+					}
 				}
 				models = append(models, modelInfo)
 			}
@@ -812,3 +819,53 @@ func alias2ModelName(modelName string) string {
 		return modelName
 	}
 }
+
+// normalizeAntigravityThinking clamps or removes thinking config based on model support.
+// For Claude models, it additionally ensures thinking budget < max_tokens.
+func normalizeAntigravityThinking(model string, payload []byte) []byte {
+	payload = util.StripThinkingConfigIfUnsupported(model, payload)
+	if !util.ModelSupportsThinking(model) {
+		return payload
+	}
+	budget := gjson.GetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget")
+	if !budget.Exists() {
+		return payload
+	}
+	raw := int(budget.Int())
+	normalized := util.NormalizeThinkingBudget(model, raw)
+
+	isClaude := strings.Contains(strings.ToLower(model), "claude")
+	if isClaude {
+		effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload)
+		if effectiveMax > 0 && normalized >= effectiveMax {
+			normalized = effectiveMax - 1
+			if normalized < 1 {
+				normalized = 1
+			}
+		}
+		if setDefaultMax {
+			if res, errSet := sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax); errSet == nil {
+				payload = res
+			}
+		}
+	}
+
+	updated, err := sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
+	if err != nil {
+		return payload
+	}
+	return updated
+}
+
+// antigravityEffectiveMaxTokens returns the max tokens to cap thinking:
+// prefer request-provided maxOutputTokens; otherwise fall back to model default.
+// The boolean indicates whether the value came from the model default (and thus should be written back).
+func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromModel bool) {
+	if maxTok := gjson.GetBytes(payload, "request.generationConfig.maxOutputTokens"); maxTok.Exists() && maxTok.Int() > 0 {
+		return int(maxTok.Int()), false
+	}
+	if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.MaxCompletionTokens > 0 {
+		return modelInfo.MaxCompletionTokens, true
+	}
+	return 0, false
+}
--- a/internal/runtime/executor/logging_helpers.go
+++ b/internal/runtime/executor/logging_helpers.go
@@ -157,7 +157,7 @@ func appendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byt
 	if ginCtx == nil {
 		return
 	}
-	attempts, attempt := ensureAttempt(ginCtx)
+	_, attempt := ensureAttempt(ginCtx)
 	ensureResponseIntro(attempt)

 	if !attempt.headersWritten {
@@ -175,8 +175,6 @@ func appendAPIResponseChunk(ctx context.Context, cfg *config.Config, chunk []byt
 	}
 	attempt.response.WriteString(string(data))
 	attempt.bodyHasContent = true
-
-	updateAggregatedResponse(ginCtx, attempts)
 }

 func ginContextFrom(ctx context.Context) *gin.Context {
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -111,7 +111,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
 	}

-	// Temperature/top_p/top_k
+	// Temperature/top_p/top_k/max_tokens
 	if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
 		out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
 	}
@@ -121,6 +121,9 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 	if tkr := gjson.GetBytes(rawJSON, "top_k"); tkr.Exists() && tkr.Type == gjson.Number {
 		out, _ = sjson.SetBytes(out, "request.generationConfig.topK", tkr.Num)
 	}
+	if maxTok := gjson.GetBytes(rawJSON, "max_tokens"); maxTok.Exists() && maxTok.Type == gjson.Number {
+		out, _ = sjson.SetBytes(out, "request.generationConfig.maxOutputTokens", maxTok.Num)
+	}

 	// Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities
 	// e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"]