From 8b9dbe10f0794738dfc9c6340179f81f314f97c0 Mon Sep 17 00:00:00 2001
From: Michael <michael@x.com>
Date: Mon, 6 Apr 2026 20:19:42 +0800
Subject: [PATCH 1/7] fix: record zero usage

---
 .../runtime/executor/helps/usage_helpers.go   |  3 -
 test/usage_logging_test.go                    | 97 +++++++++++++++++++
 2 files changed, 97 insertions(+), 3 deletions(-)
 create mode 100644 test/usage_logging_test.go

diff --git a/internal/runtime/executor/helps/usage_helpers.go b/internal/runtime/executor/helps/usage_helpers.go
index 23040984..8da8fd1e 100644
--- a/internal/runtime/executor/helps/usage_helpers.go
+++ b/internal/runtime/executor/helps/usage_helpers.go
@@ -69,9 +69,6 @@ func (r *UsageReporter) publishWithOutcome(ctx context.Context, detail usage.Det
 			detail.TotalTokens = total
 		}
 	}
-	if detail.InputTokens == 0 && detail.OutputTokens == 0 && detail.ReasoningTokens == 0 && detail.CachedTokens == 0 && detail.TotalTokens == 0 && !failed {
-		return
-	}
 	r.once.Do(func() {
 		usage.PublishRecord(ctx, r.buildRecord(detail, failed))
 	})
diff --git a/test/usage_logging_test.go b/test/usage_logging_test.go
new file mode 100644
index 00000000..41c2ee34
--- /dev/null
+++ b/test/usage_logging_test.go
@@ -0,0 +1,97 @@
+package test
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
+	runtimeexecutor "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
+	internalusage "github.com/router-for-me/CLIProxyAPI/v6/internal/usage"
+	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
+	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
+	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
+)
+
+func TestGeminiExecutorRecordsSuccessfulZeroUsageInStatistics(t *testing.T) {
+	model := fmt.Sprintf("gemini-2.5-flash-zero-usage-%d", time.Now().UnixNano())
+	source := fmt.Sprintf("zero-usage-%d@example.com", time.Now().UnixNano())
+
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		wantPath := "/v1beta/models/" + model + ":generateContent"
+		if r.URL.Path != wantPath {
+			t.Fatalf("path = %q, want %q", r.URL.Path, wantPath)
+		}
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte(`{"candidates":[{"content":{"role":"model","parts":[{"text":"ok"}]},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":0,"candidatesTokenCount":0,"totalTokenCount":0}}`))
+	}))
+	defer server.Close()
+
+	executor := runtimeexecutor.NewGeminiExecutor(&config.Config{})
+	auth := &cliproxyauth.Auth{
+		Provider: "gemini",
+		Attributes: map[string]string{
+			"api_key":  "test-upstream-key",
+			"base_url": server.URL,
+		},
+		Metadata: map[string]any{
+			"email": source,
+		},
+	}
+
+	prevStatsEnabled := internalusage.StatisticsEnabled()
+	internalusage.SetStatisticsEnabled(true)
+	t.Cleanup(func() {
+		internalusage.SetStatisticsEnabled(prevStatsEnabled)
+	})
+
+	_, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{
+		Model:   model,
+		Payload: []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat:    sdktranslator.FormatGemini,
+		OriginalRequest: []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`),
+	})
+	if err != nil {
+		t.Fatalf("Execute error: %v", err)
+	}
+
+	detail := waitForStatisticsDetail(t, "gemini", model, source)
+	if detail.Failed {
+		t.Fatalf("detail failed = true, want false")
+	}
+	if detail.Tokens.TotalTokens != 0 {
+		t.Fatalf("total tokens = %d, want 0", detail.Tokens.TotalTokens)
+	}
+}
+
+func waitForStatisticsDetail(t *testing.T, apiName, model, source string) internalusage.RequestDetail {
+	t.Helper()
+
+	deadline := time.Now().Add(2 * time.Second)
+	for time.Now().Before(deadline) {
+		snapshot := internalusage.GetRequestStatistics().Snapshot()
+		apiSnapshot, ok := snapshot.APIs[apiName]
+		if !ok {
+			time.Sleep(10 * time.Millisecond)
+			continue
+		}
+		modelSnapshot, ok := apiSnapshot.Models[model]
+		if !ok {
+			time.Sleep(10 * time.Millisecond)
+			continue
+		}
+		for _, detail := range modelSnapshot.Details {
+			if detail.Source == source {
+				return detail
+			}
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+
+	t.Fatalf("timed out waiting for statistics detail for api=%q model=%q source=%q", apiName, model, source)
+	return internalusage.RequestDetail{}
+}

From 91e7591955e4c55954de64e79ad618ecd24cf477 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Wed, 8 Apr 2026 02:48:53 +0800
Subject: [PATCH 2/7] fix(executor): add transient 429 resource exhausted
 handling with retry logic

---
 .../runtime/executor/antigravity_executor.go  | 81 +++++++++++++++++++
 .../antigravity_executor_credits_test.go      | 80 ++++++++++++++++--
 2 files changed, 154 insertions(+), 7 deletions(-)

diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go
index ecab3c87..ed4ce1dc 100644
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -261,6 +261,28 @@ func classifyAntigravity429(body []byte) antigravity429Category {
 	return antigravity429Unknown
 }
 
+func antigravityHasQuotaResetDelayOrModelInfo(body []byte) bool {
+	if len(body) == 0 {
+		return false
+	}
+	details := gjson.GetBytes(body, "error.details")
+	if !details.Exists() || !details.IsArray() {
+		return false
+	}
+	for _, detail := range details.Array() {
+		if detail.Get("@type").String() != "type.googleapis.com/google.rpc.ErrorInfo" {
+			continue
+		}
+		if strings.TrimSpace(detail.Get("metadata.quotaResetDelay").String()) != "" {
+			return true
+		}
+		if strings.TrimSpace(detail.Get("metadata.model").String()) != "" {
+			return true
+		}
+	}
+	return false
+}
+
 func antigravityCreditsRetryEnabled(cfg *config.Config) bool {
 	return cfg != nil && cfg.QuotaExceeded.AntigravityCredits
 }
@@ -362,6 +384,12 @@ func shouldMarkAntigravityCreditsExhausted(statusCode int, body []byte, reqErr e
 	lowerBody := strings.ToLower(string(body))
 	for _, keyword := range antigravityCreditsExhaustedKeywords {
 		if strings.Contains(lowerBody, keyword) {
+			if keyword == "resource has been exhausted" &&
+				statusCode == http.StatusTooManyRequests &&
+				classifyAntigravity429(body) == antigravity429Unknown &&
+				!antigravityHasQuotaResetDelayOrModelInfo(body) {
+				return false
+			}
 			return true
 		}
 	}
@@ -575,6 +603,14 @@ attemptLoop:
 					log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
 					continue
 				}
+				if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts {
+					delay := antigravityTransient429RetryDelay(attempt)
+					log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
+					if errWait := antigravityWait(ctx, delay); errWait != nil {
+						return resp, errWait
+					}
+					continue attemptLoop
+				}
 				if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
 					if idx+1 < len(baseURLs) {
 						log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
@@ -742,6 +778,14 @@ attemptLoop:
 					log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
 					continue
 				}
+				if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts {
+					delay := antigravityTransient429RetryDelay(attempt)
+					log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
+					if errWait := antigravityWait(ctx, delay); errWait != nil {
+						return resp, errWait
+					}
+					continue attemptLoop
+				}
 				if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
 					if idx+1 < len(baseURLs) {
 						log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
@@ -1158,6 +1202,14 @@ attemptLoop:
 					log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
 					continue
 				}
+				if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts {
+					delay := antigravityTransient429RetryDelay(attempt)
+					log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
+					if errWait := antigravityWait(ctx, delay); errWait != nil {
+						return nil, errWait
+					}
+					continue attemptLoop
+				}
 				if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
 					if idx+1 < len(baseURLs) {
 						log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
@@ -1774,6 +1826,24 @@ func antigravityShouldRetryNoCapacity(statusCode int, body []byte) bool {
 	return strings.Contains(msg, "no capacity available")
 }
 
+func antigravityShouldRetryTransientResourceExhausted429(statusCode int, body []byte) bool {
+	if statusCode != http.StatusTooManyRequests {
+		return false
+	}
+	if len(body) == 0 {
+		return false
+	}
+	if classifyAntigravity429(body) != antigravity429Unknown {
+		return false
+	}
+	status := strings.TrimSpace(gjson.GetBytes(body, "error.status").String())
+	if !strings.EqualFold(status, "RESOURCE_EXHAUSTED") {
+		return false
+	}
+	msg := strings.ToLower(string(body))
+	return strings.Contains(msg, "resource has been exhausted")
+}
+
 func antigravityNoCapacityRetryDelay(attempt int) time.Duration {
 	if attempt < 0 {
 		attempt = 0
@@ -1785,6 +1855,17 @@ func antigravityNoCapacityRetryDelay(attempt int) time.Duration {
 	return delay
 }
 
+func antigravityTransient429RetryDelay(attempt int) time.Duration {
+	if attempt < 0 {
+		attempt = 0
+	}
+	delay := time.Duration(attempt+1) * 100 * time.Millisecond
+	if delay > 500*time.Millisecond {
+		delay = 500 * time.Millisecond
+	}
+	return delay
+}
+
 func antigravityWait(ctx context.Context, wait time.Duration) error {
 	if wait <= 0 {
 		return nil
diff --git a/internal/runtime/executor/antigravity_executor_credits_test.go b/internal/runtime/executor/antigravity_executor_credits_test.go
index 13ab662b..852dc778 100644
--- a/internal/runtime/executor/antigravity_executor_credits_test.go
+++ b/internal/runtime/executor/antigravity_executor_credits_test.go
@@ -82,20 +82,86 @@ func TestInjectEnabledCreditTypes(t *testing.T) {
 }
 
 func TestShouldMarkAntigravityCreditsExhausted(t *testing.T) {
-	for _, body := range [][]byte{
-		[]byte(`{"error":{"message":"Insufficient GOOGLE_ONE_AI credits"}}`),
-		[]byte(`{"error":{"message":"minimumCreditAmountForUsage requirement not met"}}`),
-		[]byte(`{"error":{"message":"Resource has been exhausted"}}`),
-	} {
-		if !shouldMarkAntigravityCreditsExhausted(http.StatusForbidden, body, nil) {
+	t.Run("credit errors are marked", func(t *testing.T) {
+		for _, body := range [][]byte{
+			[]byte(`{"error":{"message":"Insufficient GOOGLE_ONE_AI credits"}}`),
+			[]byte(`{"error":{"message":"minimumCreditAmountForUsage requirement not met"}}`),
+		} {
+			if !shouldMarkAntigravityCreditsExhausted(http.StatusForbidden, body, nil) {
+				t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = false, want true", string(body))
+			}
+		}
+	})
+
+	t.Run("transient 429 resource exhausted is not marked", func(t *testing.T) {
+		body := []byte(`{"error":{"code":429,"message":"Resource has been exhausted (e.g. check quota).","status":"RESOURCE_EXHAUSTED"}}`)
+		if shouldMarkAntigravityCreditsExhausted(http.StatusTooManyRequests, body, nil) {
+			t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = true, want false", string(body))
+		}
+	})
+
+	t.Run("resource exhausted with quota metadata is still marked", func(t *testing.T) {
+		body := []byte(`{"error":{"code":429,"message":"Resource has been exhausted","status":"RESOURCE_EXHAUSTED","details":[{"@type":"type.googleapis.com/google.rpc.ErrorInfo","metadata":{"quotaResetDelay":"1h","model":"claude-sonnet-4-6"}}]}}`)
+		if !shouldMarkAntigravityCreditsExhausted(http.StatusTooManyRequests, body, nil) {
 			t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = false, want true", string(body))
 		}
-	}
+	})
+
 	if shouldMarkAntigravityCreditsExhausted(http.StatusServiceUnavailable, []byte(`{"error":{"message":"credits exhausted"}}`), nil) {
 		t.Fatal("shouldMarkAntigravityCreditsExhausted() = true for 5xx, want false")
 	}
 }
 
+func TestAntigravityExecute_RetriesTransient429ResourceExhausted(t *testing.T) {
+	resetAntigravityCreditsRetryState()
+	t.Cleanup(resetAntigravityCreditsRetryState)
+
+	var requestCount int
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		requestCount++
+		switch requestCount {
+		case 1:
+			w.WriteHeader(http.StatusTooManyRequests)
+			_, _ = w.Write([]byte(`{"error":{"code":429,"message":"Resource has been exhausted (e.g. check quota).","status":"RESOURCE_EXHAUSTED"}}`))
+		case 2:
+			w.Header().Set("Content-Type", "application/json")
+			_, _ = w.Write([]byte(`{"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"ok"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2}}}`))
+		default:
+			t.Fatalf("unexpected request count %d", requestCount)
+		}
+	}))
+	defer server.Close()
+
+	exec := NewAntigravityExecutor(&config.Config{RequestRetry: 1})
+	auth := &cliproxyauth.Auth{
+		ID: "auth-transient-429",
+		Attributes: map[string]string{
+			"base_url": server.URL,
+		},
+		Metadata: map[string]any{
+			"access_token": "token",
+			"project_id":   "project-1",
+			"expired":      time.Now().Add(1 * time.Hour).Format(time.RFC3339),
+		},
+	}
+
+	resp, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{
+		Model:   "gemini-2.5-flash",
+		Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`),
+	}, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FormatAntigravity,
+	})
+	if err != nil {
+		t.Fatalf("Execute() error = %v", err)
+	}
+	if len(resp.Payload) == 0 {
+		t.Fatal("Execute() returned empty payload")
+	}
+	if requestCount != 2 {
+		t.Fatalf("request count = %d, want 2", requestCount)
+	}
+}
+
 func TestAntigravityExecute_RetriesQuotaExhaustedWithCredits(t *testing.T) {
 	resetAntigravityCreditsRetryState()
 	t.Cleanup(resetAntigravityCreditsRetryState)

From fcc59d606d903cf1d1ec86aa9dc2c455f6d8087f Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Wed, 8 Apr 2026 03:54:15 +0800
Subject: [PATCH 3/7] fix(translator): add unit tests to validate
 output_item.done fallback logic for Gemini and Claude

---
 .../codex/claude/codex_claude_response.go     | 49 +++++++++++-
 .../claude/codex_claude_response_test.go      | 37 +++++++++
 .../codex/gemini/codex_gemini_response.go     | 75 +++++++++++++------
 .../gemini/codex_gemini_response_test.go      | 35 +++++++++
 4 files changed, 173 insertions(+), 23 deletions(-)
 create mode 100644 internal/translator/codex/gemini/codex_gemini_response_test.go

diff --git a/internal/translator/codex/claude/codex_claude_response.go b/internal/translator/codex/claude/codex_claude_response.go
index 708194e6..388b907a 100644
--- a/internal/translator/codex/claude/codex_claude_response.go
+++ b/internal/translator/codex/claude/codex_claude_response.go
@@ -26,6 +26,8 @@ type ConvertCodexResponseToClaudeParams struct {
 	HasToolCall               bool
 	BlockIndex                int
 	HasReceivedArgumentsDelta bool
+	HasTextDelta              bool
+	TextBlockOpen             bool
 	ThinkingBlockOpen         bool
 	ThinkingStopPending       bool
 	ThinkingSignature         string
@@ -104,9 +106,11 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
 	} else if typeStr == "response.content_part.added" {
 		template = []byte(`{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`)
 		template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
+		params.TextBlockOpen = true
 
 		output = translatorcommon.AppendSSEEventBytes(output, "content_block_start", template, 2)
 	} else if typeStr == "response.output_text.delta" {
+		params.HasTextDelta = true
 		template = []byte(`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":""}}`)
 		template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
 		template, _ = sjson.SetBytes(template, "delta.text", rootResult.Get("delta").String())
@@ -115,6 +119,7 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
 	} else if typeStr == "response.content_part.done" {
 		template = []byte(`{"type":"content_block_stop","index":0}`)
 		template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
+		params.TextBlockOpen = false
 		params.BlockIndex++
 
 		output = translatorcommon.AppendSSEEventBytes(output, "content_block_stop", template, 2)
@@ -172,7 +177,49 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa
 	} else if typeStr == "response.output_item.done" {
 		itemResult := rootResult.Get("item")
 		itemType := itemResult.Get("type").String()
-		if itemType == "function_call" {
+		if itemType == "message" {
+			if params.HasTextDelta {
+				return [][]byte{output}
+			}
+			contentResult := itemResult.Get("content")
+			if !contentResult.Exists() || !contentResult.IsArray() {
+				return [][]byte{output}
+			}
+			var textBuilder strings.Builder
+			contentResult.ForEach(func(_, part gjson.Result) bool {
+				if part.Get("type").String() != "output_text" {
+					return true
+				}
+				if txt := part.Get("text").String(); txt != "" {
+					textBuilder.WriteString(txt)
+				}
+				return true
+			})
+			text := textBuilder.String()
+			if text == "" {
+				return [][]byte{output}
+			}
+
+			output = append(output, finalizeCodexThinkingBlock(params)...)
+			if !params.TextBlockOpen {
+				template = []byte(`{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`)
+				template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
+				params.TextBlockOpen = true
+				output = translatorcommon.AppendSSEEventBytes(output, "content_block_start", template, 2)
+			}
+
+			template = []byte(`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":""}}`)
+			template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
+			template, _ = sjson.SetBytes(template, "delta.text", text)
+			output = translatorcommon.AppendSSEEventBytes(output, "content_block_delta", template, 2)
+
+			template = []byte(`{"type":"content_block_stop","index":0}`)
+			template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
+			params.TextBlockOpen = false
+			params.BlockIndex++
+			params.HasTextDelta = true
+			output = translatorcommon.AppendSSEEventBytes(output, "content_block_stop", template, 2)
+		} else if itemType == "function_call" {
 			template = []byte(`{"type":"content_block_stop","index":0}`)
 			template, _ = sjson.SetBytes(template, "index", params.BlockIndex)
 			params.BlockIndex++
diff --git a/internal/translator/codex/claude/codex_claude_response_test.go b/internal/translator/codex/claude/codex_claude_response_test.go
index a8d4d189..c36c9edb 100644
--- a/internal/translator/codex/claude/codex_claude_response_test.go
+++ b/internal/translator/codex/claude/codex_claude_response_test.go
@@ -280,3 +280,40 @@ func TestConvertCodexResponseToClaudeNonStream_ThinkingIncludesSignature(t *test
 		t.Fatalf("unexpected thinking text: %q", got)
 	}
 }
+
+func TestConvertCodexResponseToClaude_StreamEmptyOutputUsesOutputItemDoneMessageFallback(t *testing.T) {
+	ctx := context.Background()
+	originalRequest := []byte(`{"tools":[]}`)
+	var param any
+
+	chunks := [][]byte{
+		[]byte("data: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_1\",\"model\":\"gpt-5\"}}"),
+		[]byte("data: {\"type\":\"response.output_item.done\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"ok\"}]},\"output_index\":0}"),
+		[]byte("data: {\"type\":\"response.completed\",\"response\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":1}}}"),
+	}
+
+	var outputs [][]byte
+	for _, chunk := range chunks {
+		outputs = append(outputs, ConvertCodexResponseToClaude(ctx, "", originalRequest, nil, chunk, &param)...)
+	}
+
+	foundText := false
+	for _, out := range outputs {
+		for _, line := range strings.Split(string(out), "\n") {
+			if !strings.HasPrefix(line, "data: ") {
+				continue
+			}
+			data := gjson.Parse(strings.TrimPrefix(line, "data: "))
+			if data.Get("type").String() == "content_block_delta" && data.Get("delta.type").String() == "text_delta" && data.Get("delta.text").String() == "ok" {
+				foundText = true
+				break
+			}
+		}
+		if foundText {
+			break
+		}
+	}
+	if !foundText {
+		t.Fatalf("expected fallback content from response.output_item.done message; outputs=%q", outputs)
+	}
+}
diff --git a/internal/translator/codex/gemini/codex_gemini_response.go b/internal/translator/codex/gemini/codex_gemini_response.go
index 4bd76791..f6ef8771 100644
--- a/internal/translator/codex/gemini/codex_gemini_response.go
+++ b/internal/translator/codex/gemini/codex_gemini_response.go
@@ -20,10 +20,11 @@ var (
 
 // ConvertCodexResponseToGeminiParams holds parameters for response conversion.
 type ConvertCodexResponseToGeminiParams struct {
-	Model             string
-	CreatedAt         int64
-	ResponseID        string
-	LastStorageOutput []byte
+	Model              string
+	CreatedAt          int64
+	ResponseID         string
+	LastStorageOutput  []byte
+	HasOutputTextDelta bool
 }
 
 // ConvertCodexResponseToGemini converts Codex streaming response format to Gemini format.
@@ -42,10 +43,11 @@ type ConvertCodexResponseToGeminiParams struct {
 func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) [][]byte {
 	if *param == nil {
 		*param = &ConvertCodexResponseToGeminiParams{
-			Model:             modelName,
-			CreatedAt:         0,
-			ResponseID:        "",
-			LastStorageOutput: nil,
+			Model:              modelName,
+			CreatedAt:          0,
+			ResponseID:         "",
+			LastStorageOutput:  nil,
+			HasOutputTextDelta: false,
 		}
 	}
 
@@ -58,18 +60,18 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
 	typeResult := rootResult.Get("type")
 	typeStr := typeResult.String()
 
+	params := (*param).(*ConvertCodexResponseToGeminiParams)
+
 	// Base Gemini response template
 	template := []byte(`{"candidates":[{"content":{"role":"model","parts":[]}}],"usageMetadata":{"trafficType":"PROVISIONED_THROUGHPUT"},"modelVersion":"gemini-2.5-pro","createTime":"2025-08-15T02:52:03.884209Z","responseId":"06CeaPH7NaCU48APvNXDyA4"}`)
-	if len((*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput) > 0 && typeStr == "response.output_item.done" {
-		template = append([]byte(nil), (*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput...)
-	} else {
-		template, _ = sjson.SetBytes(template, "modelVersion", (*param).(*ConvertCodexResponseToGeminiParams).Model)
+	{
+		template, _ = sjson.SetBytes(template, "modelVersion", params.Model)
 		createdAtResult := rootResult.Get("response.created_at")
 		if createdAtResult.Exists() {
-			(*param).(*ConvertCodexResponseToGeminiParams).CreatedAt = createdAtResult.Int()
-			template, _ = sjson.SetBytes(template, "createTime", time.Unix((*param).(*ConvertCodexResponseToGeminiParams).CreatedAt, 0).Format(time.RFC3339Nano))
+			params.CreatedAt = createdAtResult.Int()
+			template, _ = sjson.SetBytes(template, "createTime", time.Unix(params.CreatedAt, 0).Format(time.RFC3339Nano))
 		}
-		template, _ = sjson.SetBytes(template, "responseId", (*param).(*ConvertCodexResponseToGeminiParams).ResponseID)
+		template, _ = sjson.SetBytes(template, "responseId", params.ResponseID)
 	}
 
 	// Handle function call completion
@@ -101,7 +103,7 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
 			template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", functionCall)
 			template, _ = sjson.SetBytes(template, "candidates.0.finishReason", "STOP")
 
-			(*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput = append([]byte(nil), template...)
+			params.LastStorageOutput = append([]byte(nil), template...)
 
 			// Use this return to storage message
 			return [][]byte{}
@@ -111,15 +113,45 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
 	if typeStr == "response.created" { // Handle response creation - set model and response ID
 		template, _ = sjson.SetBytes(template, "modelVersion", rootResult.Get("response.model").String())
 		template, _ = sjson.SetBytes(template, "responseId", rootResult.Get("response.id").String())
-		(*param).(*ConvertCodexResponseToGeminiParams).ResponseID = rootResult.Get("response.id").String()
+		params.ResponseID = rootResult.Get("response.id").String()
 	} else if typeStr == "response.reasoning_summary_text.delta" { // Handle reasoning/thinking content delta
 		part := []byte(`{"thought":true,"text":""}`)
 		part, _ = sjson.SetBytes(part, "text", rootResult.Get("delta").String())
 		template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part)
 	} else if typeStr == "response.output_text.delta" { // Handle regular text content delta
+		params.HasOutputTextDelta = true
 		part := []byte(`{"text":""}`)
 		part, _ = sjson.SetBytes(part, "text", rootResult.Get("delta").String())
 		template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part)
+	} else if typeStr == "response.output_item.done" { // Fallback: emit final message text when no delta chunks were received
+		itemResult := rootResult.Get("item")
+		if itemResult.Get("type").String() != "message" || params.HasOutputTextDelta {
+			return [][]byte{}
+		}
+		contentResult := itemResult.Get("content")
+		if !contentResult.Exists() || !contentResult.IsArray() {
+			return [][]byte{}
+		}
+		wroteText := false
+		contentResult.ForEach(func(_, partResult gjson.Result) bool {
+			if partResult.Get("type").String() != "output_text" {
+				return true
+			}
+			text := partResult.Get("text").String()
+			if text == "" {
+				return true
+			}
+			part := []byte(`{"text":""}`)
+			part, _ = sjson.SetBytes(part, "text", text)
+			template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part)
+			wroteText = true
+			return true
+		})
+		if wroteText {
+			params.HasOutputTextDelta = true
+			return [][]byte{template}
+		}
+		return [][]byte{}
 	} else if typeStr == "response.completed" { // Handle response completion with usage metadata
 		template, _ = sjson.SetBytes(template, "usageMetadata.promptTokenCount", rootResult.Get("response.usage.input_tokens").Int())
 		template, _ = sjson.SetBytes(template, "usageMetadata.candidatesTokenCount", rootResult.Get("response.usage.output_tokens").Int())
@@ -129,11 +161,10 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
 		return [][]byte{}
 	}
 
-	if len((*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput) > 0 {
-		return [][]byte{
-			append([]byte(nil), (*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput...),
-			template,
-		}
+	if len(params.LastStorageOutput) > 0 {
+		stored := append([]byte(nil), params.LastStorageOutput...)
+		params.LastStorageOutput = nil
+		return [][]byte{stored, template}
 	}
 	return [][]byte{template}
 }
diff --git a/internal/translator/codex/gemini/codex_gemini_response_test.go b/internal/translator/codex/gemini/codex_gemini_response_test.go
new file mode 100644
index 00000000..b8f227be
--- /dev/null
+++ b/internal/translator/codex/gemini/codex_gemini_response_test.go
@@ -0,0 +1,35 @@
+package gemini
+
+import (
+	"context"
+	"testing"
+
+	"github.com/tidwall/gjson"
+)
+
+func TestConvertCodexResponseToGemini_StreamEmptyOutputUsesOutputItemDoneMessageFallback(t *testing.T) {
+	ctx := context.Background()
+	originalRequest := []byte(`{"tools":[]}`)
+	var param any
+
+	chunks := [][]byte{
+		[]byte("data: {\"type\":\"response.output_item.done\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"ok\"}]},\"output_index\":0}"),
+		[]byte("data: {\"type\":\"response.completed\",\"response\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":1}}}"),
+	}
+
+	var outputs [][]byte
+	for _, chunk := range chunks {
+		outputs = append(outputs, ConvertCodexResponseToGemini(ctx, "gemini-2.5-pro", originalRequest, nil, chunk, &param)...)
+	}
+
+	found := false
+	for _, out := range outputs {
+		if gjson.GetBytes(out, "candidates.0.content.parts.0.text").String() == "ok" {
+			found = true
+			break
+		}
+	}
+	if !found {
+		t.Fatalf("expected fallback content from response.output_item.done message; outputs=%q", outputs)
+	}
+}

From d390b95b766c78fe34402e5c8b22cb7549ff6557 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Wed, 8 Apr 2026 08:53:50 +0800
Subject: [PATCH 4/7] fix(tests): update test cases

---
 .gitignore                                    |  7 +-
 internal/api/modules/amp/proxy_test.go        |  4 +-
 .../runtime/executor/qwen_executor_test.go    |  5 +-
 internal/thinking/provider/claude/apply.go    | 27 +----
 .../thinking/provider/claude/apply_test.go    | 99 -------------------
 sdk/cliproxy/service_stale_state_test.go      | 18 +++-
 6 files changed, 27 insertions(+), 133 deletions(-)
 delete mode 100644 internal/thinking/provider/claude/apply_test.go

diff --git a/.gitignore b/.gitignore
index 699fc754..b0861169 100644
--- a/.gitignore
+++ b/.gitignore
@@ -42,6 +42,7 @@ GEMINI.md
 .agents/*
 .opencode/*
 .idea/*
+.beads/*
 .bmad/*
 _bmad/*
 _bmad-output/*
@@ -49,9 +50,3 @@ _bmad-output/*
 # macOS
 .DS_Store
 ._*
-
-# Opencode
-.beads/
-.opencode/
-.cli-proxy-api/
-.venv/
diff --git a/internal/api/modules/amp/proxy_test.go b/internal/api/modules/amp/proxy_test.go
index 32f5d860..49dba956 100644
--- a/internal/api/modules/amp/proxy_test.go
+++ b/internal/api/modules/amp/proxy_test.go
@@ -129,11 +129,11 @@ func TestModifyResponse_GzipScenarios(t *testing.T) {
 			wantCE:   "",
 		},
 		{
-			name:     "skips_non_2xx_status",
+			name:     "decompresses_non_2xx_status_when_gzip_detected",
 			header:   http.Header{},
 			body:     good,
 			status:   404,
-			wantBody: good,
+			wantBody: goodJSON,
 			wantCE:   "",
 		},
 	}
diff --git a/internal/runtime/executor/qwen_executor_test.go b/internal/runtime/executor/qwen_executor_test.go
index 627cf453..b960eced 100644
--- a/internal/runtime/executor/qwen_executor_test.go
+++ b/internal/runtime/executor/qwen_executor_test.go
@@ -56,9 +56,12 @@ func TestEnsureQwenSystemMessage_MergeStringSystem(t *testing.T) {
 	if len(parts) != 2 {
 		t.Fatalf("messages[0].content length = %d, want 2", len(parts))
 	}
-	if parts[0].Get("text").String() != "You are Qwen Code." || parts[0].Get("cache_control.type").String() != "ephemeral" {
+	if parts[0].Get("type").String() != "text" || parts[0].Get("cache_control.type").String() != "ephemeral" {
 		t.Fatalf("messages[0].content[0] = %s, want injected system part", parts[0].Raw)
 	}
+	if text := parts[0].Get("text").String(); text != "" && text != "You are Qwen Code." {
+		t.Fatalf("messages[0].content[0].text = %q, want empty string or default prompt", text)
+	}
 	if parts[1].Get("type").String() != "text" || parts[1].Get("text").String() != "ABCDEFG" {
 		t.Fatalf("messages[0].content[1] = %s, want text part with ABCDEFG", parts[1].Raw)
 	}
diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go
index c92f539e..275be469 100644
--- a/internal/thinking/provider/claude/apply.go
+++ b/internal/thinking/provider/claude/apply.go
@@ -174,8 +174,7 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
 	// Ensure the request satisfies Claude constraints:
 	//  1) Determine effective max_tokens (request overrides model default)
 	//  2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1
-	//  3) If the adjusted budget falls below the model minimum, try raising max_tokens
-	//     (clamped to MaxCompletionTokens); disable thinking if constraints are unsatisfiable
+	//  3) If the adjusted budget falls below the model minimum, leave the request unchanged
 	//  4) If max_tokens came from model default, write it back into the request
 
 	effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
@@ -194,28 +193,8 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
 		minBudget = modelInfo.Thinking.Min
 	}
 	if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
-		// Enforcing budget_tokens < max_tokens pushed the budget below the model minimum.
-		// Try raising max_tokens to fit the original budget.
-		needed := budgetTokens + 1
-		maxAllowed := 0
-		if modelInfo != nil {
-			maxAllowed = modelInfo.MaxCompletionTokens
-		}
-		if maxAllowed > 0 && needed > maxAllowed {
-			// Cannot use original budget; cap max_tokens at model limit.
-			needed = maxAllowed
-		}
-		cappedBudget := needed - 1
-		if cappedBudget < minBudget {
-			// Impossible to satisfy both budget >= minBudget and budget < max_tokens
-			// within the model's completion limit. Disable thinking entirely.
-			body, _ = sjson.DeleteBytes(body, "thinking")
-			return body
-		}
-		body, _ = sjson.SetBytes(body, "max_tokens", needed)
-		if cappedBudget != budgetTokens {
-			body, _ = sjson.SetBytes(body, "thinking.budget_tokens", cappedBudget)
-		}
+		// If enforcing the max_tokens constraint would push the budget below the model minimum,
+		// leave the request unchanged.
 		return body
 	}
 
diff --git a/internal/thinking/provider/claude/apply_test.go b/internal/thinking/provider/claude/apply_test.go
deleted file mode 100644
index 46b3f3b7..00000000
--- a/internal/thinking/provider/claude/apply_test.go
+++ /dev/null
@@ -1,99 +0,0 @@
-package claude
-
-import (
-	"testing"
-
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
-	"github.com/tidwall/gjson"
-)
-
-func TestNormalizeClaudeBudget_RaisesMaxTokens(t *testing.T) {
-	a := &Applier{}
-	modelInfo := &registry.ModelInfo{
-		MaxCompletionTokens: 64000,
-		Thinking:            &registry.ThinkingSupport{Min: 1024, Max: 128000},
-	}
-	body := []byte(`{"max_tokens":1000,"thinking":{"type":"enabled","budget_tokens":5000}}`)
-
-	out := a.normalizeClaudeBudget(body, 5000, modelInfo)
-
-	maxTok := gjson.GetBytes(out, "max_tokens").Int()
-	if maxTok != 5001 {
-		t.Fatalf("max_tokens = %d, want 5001, body=%s", maxTok, string(out))
-	}
-}
-
-func TestNormalizeClaudeBudget_ClampsToModelMax(t *testing.T) {
-	a := &Applier{}
-	modelInfo := &registry.ModelInfo{
-		MaxCompletionTokens: 64000,
-		Thinking:            &registry.ThinkingSupport{Min: 1024, Max: 128000},
-	}
-	body := []byte(`{"max_tokens":500,"thinking":{"type":"enabled","budget_tokens":200000}}`)
-
-	out := a.normalizeClaudeBudget(body, 200000, modelInfo)
-
-	maxTok := gjson.GetBytes(out, "max_tokens").Int()
-	if maxTok != 64000 {
-		t.Fatalf("max_tokens = %d, want 64000 (capped to model limit), body=%s", maxTok, string(out))
-	}
-	budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
-	if budget != 63999 {
-		t.Fatalf("budget_tokens = %d, want 63999 (max_tokens-1), body=%s", budget, string(out))
-	}
-}
-
-func TestNormalizeClaudeBudget_DisablesThinkingWhenUnsatisfiable(t *testing.T) {
-	a := &Applier{}
-	modelInfo := &registry.ModelInfo{
-		MaxCompletionTokens: 1000,
-		Thinking:            &registry.ThinkingSupport{Min: 1024, Max: 128000},
-	}
-	body := []byte(`{"max_tokens":500,"thinking":{"type":"enabled","budget_tokens":2000}}`)
-
-	out := a.normalizeClaudeBudget(body, 2000, modelInfo)
-
-	if gjson.GetBytes(out, "thinking").Exists() {
-		t.Fatalf("thinking should be removed when constraints are unsatisfiable, body=%s", string(out))
-	}
-}
-
-func TestNormalizeClaudeBudget_NoClamping(t *testing.T) {
-	a := &Applier{}
-	modelInfo := &registry.ModelInfo{
-		MaxCompletionTokens: 64000,
-		Thinking:            &registry.ThinkingSupport{Min: 1024, Max: 128000},
-	}
-	body := []byte(`{"max_tokens":32000,"thinking":{"type":"enabled","budget_tokens":16000}}`)
-
-	out := a.normalizeClaudeBudget(body, 16000, modelInfo)
-
-	maxTok := gjson.GetBytes(out, "max_tokens").Int()
-	if maxTok != 32000 {
-		t.Fatalf("max_tokens should remain 32000, got %d, body=%s", maxTok, string(out))
-	}
-	budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
-	if budget != 16000 {
-		t.Fatalf("budget_tokens should remain 16000, got %d, body=%s", budget, string(out))
-	}
-}
-
-func TestNormalizeClaudeBudget_AdjustsBudgetToMaxMinus1(t *testing.T) {
-	a := &Applier{}
-	modelInfo := &registry.ModelInfo{
-		MaxCompletionTokens: 8192,
-		Thinking:            &registry.ThinkingSupport{Min: 1024, Max: 128000},
-	}
-	body := []byte(`{"max_tokens":8192,"thinking":{"type":"enabled","budget_tokens":10000}}`)
-
-	out := a.normalizeClaudeBudget(body, 10000, modelInfo)
-
-	maxTok := gjson.GetBytes(out, "max_tokens").Int()
-	if maxTok != 8192 {
-		t.Fatalf("max_tokens = %d, want 8192 (unchanged), body=%s", maxTok, string(out))
-	}
-	budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
-	if budget != 8191 {
-		t.Fatalf("budget_tokens = %d, want 8191 (max_tokens-1), body=%s", budget, string(out))
-	}
-}
diff --git a/sdk/cliproxy/service_stale_state_test.go b/sdk/cliproxy/service_stale_state_test.go
index db5ce467..010218d9 100644
--- a/sdk/cliproxy/service_stale_state_test.go
+++ b/sdk/cliproxy/service_stale_state_test.go
@@ -53,8 +53,24 @@ func TestServiceApplyCoreAuthAddOrUpdate_DeleteReAddDoesNotInheritStaleRuntimeSt
 	if disabled.NextRefreshAfter.IsZero() {
 		t.Fatalf("expected disabled auth to still carry prior NextRefreshAfter for regression setup")
 	}
+
+	// Reconcile prunes unsupported model state during registration, so seed the
+	// disabled snapshot explicitly before exercising delete -> re-add behavior.
+	disabled.ModelStates = map[string]*coreauth.ModelState{
+		modelID: {
+			Quota: coreauth.QuotaState{BackoffLevel: 7},
+		},
+	}
+	if _, err := service.coreManager.Update(context.Background(), disabled); err != nil {
+		t.Fatalf("seed disabled auth stale ModelStates: %v", err)
+	}
+
+	disabled, ok = service.coreManager.GetByID(authID)
+	if !ok || disabled == nil {
+		t.Fatalf("expected disabled auth after stale state seeding")
+	}
 	if len(disabled.ModelStates) == 0 {
-		t.Fatalf("expected disabled auth to still carry prior ModelStates for regression setup")
+		t.Fatalf("expected disabled auth to carry seeded ModelStates for regression setup")
 	}
 
 	service.applyCoreAuthAddOrUpdate(context.Background(), &coreauth.Auth{

From f5aa68ecdaae6789da4f4b226367b43b3d0928eb Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Wed, 8 Apr 2026 10:12:51 +0800
Subject: [PATCH 5/7] chore: add workflow to prevent AGENTS.md modifications in
 pull requests

---
 .github/workflows/agents-md-guard.yml | 81 +++++++++++++++++++++++++++
 AGENTS.md                             | 58 +++++++++++++++++++
 2 files changed, 139 insertions(+)
 create mode 100644 .github/workflows/agents-md-guard.yml
 create mode 100644 AGENTS.md

diff --git a/.github/workflows/agents-md-guard.yml b/.github/workflows/agents-md-guard.yml
new file mode 100644
index 00000000..c9ac0cb4
--- /dev/null
+++ b/.github/workflows/agents-md-guard.yml
@@ -0,0 +1,81 @@
+name: agents-md-guard
+
+on:
+  pull_request_target:
+    types:
+      - opened
+      - synchronize
+      - reopened
+
+permissions:
+  contents: read
+  issues: write
+  pull-requests: write
+
+jobs:
+  close-when-agents-md-changed:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Detect AGENTS.md changes and close PR
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const prNumber = context.payload.pull_request.number;
+            const { owner, repo } = context.repo;
+
+            const files = await github.paginate(github.rest.pulls.listFiles, {
+              owner,
+              repo,
+              pull_number: prNumber,
+              per_page: 100,
+            });
+
+            const touchesAgentsMd = (path) =>
+              typeof path === "string" &&
+              (path === "AGENTS.md" || path.endsWith("/AGENTS.md"));
+
+            const touched = files.filter(
+              (f) => touchesAgentsMd(f.filename) || touchesAgentsMd(f.previous_filename),
+            );
+
+            if (touched.length === 0) {
+              core.info("No AGENTS.md changes detected.");
+              return;
+            }
+
+            const changedList = touched
+              .map((f) =>
+                f.previous_filename && f.previous_filename !== f.filename
+                  ? `- ${f.previous_filename} -> ${f.filename}`
+                  : `- ${f.filename}`,
+              )
+              .join("\n");
+
+            const body = [
+              "This repository does not allow modifying `AGENTS.md` in pull requests.",
+              "",
+              "Detected changes:",
+              changedList,
+              "",
+              "Please revert these changes and open a new PR without touching `AGENTS.md`.",
+            ].join("\n");
+
+            try {
+              await github.rest.issues.createComment({
+                owner,
+                repo,
+                issue_number: prNumber,
+                body,
+              });
+            } catch (error) {
+              core.warning(`Failed to comment on PR #${prNumber}: ${error.message}`);
+            }
+
+            await github.rest.pulls.update({
+              owner,
+              repo,
+              pull_number: prNumber,
+              state: "closed",
+            });
+
+            core.setFailed("PR modifies AGENTS.md");
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 00000000..d4a07e19
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,58 @@
+# AGENTS.md
+
+Go 1.26+ proxy server providing OpenAI/Gemini/Claude/Codex compatible APIs with OAuth and round-robin load balancing.
+
+## Repository
+- GitHub: https://github.com/router-for-me/CLIProxyAPI
+
+## Commands
+```bash
+gofmt -w .                                # Format (required after Go changes)
+go build -o cli-proxy-api ./cmd/server     # Build
+go run ./cmd/server                       # Run dev server
+go test ./...                             # Run all tests
+go test -v -run TestName ./path/to/pkg    # Run single test
+go build -o test-output ./cmd/server && rm test-output  # Verify compile (REQUIRED after changes)
+```
+- Common flags: `--config <path>`, `--tui`, `--standalone`, `--local-model`, `--no-browser`, `--oauth-callback-port <port>`
+
+## Config
+- Default config: `config.yaml` (template: `config.example.yaml`)
+- `.env` is auto-loaded from the working directory
+- Auth material defaults under `auths/`
+- Storage backends: file-based default; optional Postgres/git/object store (`PGSTORE_*`, `GITSTORE_*`, `OBJECTSTORE_*`)
+
+## Architecture
+- `cmd/server/` — Server entrypoint
+- `internal/api/` — Gin HTTP API (routes, middleware, modules)
+- `internal/api/modules/amp/` — Amp integration (Amp-style routes + reverse proxy)
+- `internal/thinking/` — Thinking/reasoning token processing (`internal/thinking/provider/` for per-provider config)
+- `internal/runtime/executor/` — Per-provider runtime executors (incl. Codex WebSocket)
+- `internal/translator/` — Provider protocol translators (and shared `common`)
+- `internal/registry/` — Model registry + remote updater (`StartModelsUpdater`); `--local-model` disables remote updates
+- `internal/store/` — Storage implementations and secret resolution
+- `internal/managementasset/` — Config snapshots and management assets
+- `internal/cache/` — Request signature caching
+- `internal/watcher/` — Config hot-reload and watchers
+- `internal/wsrelay/` — WebSocket relay sessions
+- `internal/usage/` — Usage and token accounting
+- `internal/tui/` — Bubbletea terminal UI (`--tui`, `--standalone`)
+- `sdk/cliproxy/` — Embeddable SDK entry (service/builder/watchers/pipeline)
+- `test/` — Cross-module integration tests
+
+## Code Conventions
+- Keep changes small and simple (KISS)
+- Comments in English only
+- If editing code that already contains non-English comments, translate them to English (don’t add new non-English comments)
+- For user-visible strings, keep the existing language used in that file/area
+- New Markdown docs should be in English unless the file is explicitly language-specific (e.g. `README_CN.md`)
+- As a rule, do not make standalone changes to `internal/translator/`. You may modify it only as part of broader changes elsewhere.
+- If a task requires changing only `internal/translator/`, run `gh repo view --json viewerPermission -q .viewerPermission` to confirm you have `WRITE`, `MAINTAIN`, or `ADMIN`. If you do, you may proceed; otherwise, file a GitHub issue including the goal, rationale, and the intended implementation code, then stop further work.
+- `internal/runtime/executor/` should contain executors and their unit tests only. Place any helper/supporting files under `internal/runtime/executor/helps/`.
+- Follow `gofmt`; keep imports goimports-style; wrap errors with context where helpful
+- Do not use `log.Fatal`/`log.Fatalf` (terminates the process); prefer returning errors and logging via logrus
+- Shadowed variables: use method suffix (`errStart := server.Start()`)
+- Wrap defer errors: `defer func() { if err := f.Close(); err != nil { log.Errorf(...) } }()`
+- Use logrus structured logging; avoid leaking secrets/tokens in logs
+- Avoid panics in HTTP handlers; prefer logged errors and meaningful HTTP status codes
+- Timeouts are allowed only during credential acquisition; after an upstream connection is established, do not set timeouts for any subsequent network behavior. Intentional exceptions that must remain allowed are the Codex websocket liveness deadlines in `internal/runtime/executor/codex_websockets_executor.go`, the wsrelay session deadlines in `internal/wsrelay/session.go`, the management APICall timeout in `internal/api/handlers/management/api_tools.go`, and the `cmd/fetch_antigravity_models` utility timeouts

From 70efd4e016e1d9554d9eb6b059be0a7fb7b76238 Mon Sep 17 00:00:00 2001
From: Luis Pater <webmaster@idotorg.org>
Date: Wed, 8 Apr 2026 10:35:49 +0800
Subject: [PATCH 6/7] chore: add workflow to retarget main PRs to dev
 automatically

---
 .../auto-retarget-main-pr-to-dev.yml          | 73 +++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 .github/workflows/auto-retarget-main-pr-to-dev.yml

diff --git a/.github/workflows/auto-retarget-main-pr-to-dev.yml b/.github/workflows/auto-retarget-main-pr-to-dev.yml
new file mode 100644
index 00000000..3732a723
--- /dev/null
+++ b/.github/workflows/auto-retarget-main-pr-to-dev.yml
@@ -0,0 +1,73 @@
+name: auto-retarget-main-pr-to-dev
+
+on:
+  pull_request_target:
+    types:
+      - opened
+      - reopened
+      - edited
+    branches:
+      - main
+
+permissions:
+  contents: read
+  issues: write
+  pull-requests: write
+
+jobs:
+  retarget:
+    if: github.actor != 'github-actions[bot]'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Retarget PR base to dev
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const pr = context.payload.pull_request;
+            const prNumber = pr.number;
+            const { owner, repo } = context.repo;
+
+            const baseRef = pr.base?.ref;
+            const headRef = pr.head?.ref;
+            const desiredBase = "dev";
+
+            if (baseRef !== "main") {
+              core.info(`PR #${prNumber} base is ${baseRef}; nothing to do.`);
+              return;
+            }
+
+            if (headRef === desiredBase) {
+              core.info(`PR #${prNumber} is ${desiredBase} -> main; skipping retarget.`);
+              return;
+            }
+
+            core.info(`Retargeting PR #${prNumber} base from ${baseRef} to ${desiredBase}.`);
+
+            try {
+              await github.rest.pulls.update({
+                owner,
+                repo,
+                pull_number: prNumber,
+                base: desiredBase,
+              });
+            } catch (error) {
+              core.setFailed(`Failed to retarget PR #${prNumber} to ${desiredBase}: ${error.message}`);
+              return;
+            }
+
+            const body = [
+              `This pull request targeted \`${baseRef}\`.`,
+              "",
+              `The base branch has been automatically changed to \`${desiredBase}\`.`,
+            ].join("\n");
+
+            try {
+              await github.rest.issues.createComment({
+                owner,
+                repo,
+                issue_number: prNumber,
+                body,
+              });
+            } catch (error) {
+              core.warning(`Failed to comment on PR #${prNumber}: ${error.message}`);
+            }

From 343a2fc2f78cdec67858ec6c1d33b910cb444324 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Wed, 8 Apr 2026 12:33:16 +0800
Subject: [PATCH 7/7] docs: update AGENTS.md for improved clarity and detail in
 commands and architecture

---
 AGENTS.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index d4a07e19..57027473 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -7,12 +7,12 @@ Go 1.26+ proxy server providing OpenAI/Gemini/Claude/Codex compatible APIs with
 
 ## Commands
 ```bash
-gofmt -w .                                # Format (required after Go changes)
-go build -o cli-proxy-api ./cmd/server     # Build
-go run ./cmd/server                       # Run dev server
-go test ./...                             # Run all tests
-go test -v -run TestName ./path/to/pkg    # Run single test
-go build -o test-output ./cmd/server && rm test-output  # Verify compile (REQUIRED after changes)
+gofmt -w . # Format (required after Go changes)
+go build -o cli-proxy-api ./cmd/server # Build
+go run ./cmd/server # Run dev server
+go test ./... # Run all tests
+go test -v -run TestName ./path/to/pkg # Run single test
+go build -o test-output ./cmd/server && rm test-output # Verify compile (REQUIRED after changes)
 ```
 - Common flags: `--config <path>`, `--tui`, `--standalone`, `--local-model`, `--no-browser`, `--oauth-callback-port <port>`
 
@@ -26,7 +26,7 @@ go build -o test-output ./cmd/server && rm test-output  # Verify compile (REQUIR
 - `cmd/server/` — Server entrypoint
 - `internal/api/` — Gin HTTP API (routes, middleware, modules)
 - `internal/api/modules/amp/` — Amp integration (Amp-style routes + reverse proxy)
-- `internal/thinking/` — Thinking/reasoning token processing (`internal/thinking/provider/` for per-provider config)
+- `internal/thinking/` — Main thinking/reasoning pipeline. `ApplyThinking()` (apply.go) parses suffixes (`suffix.go`, suffix overrides body), normalizes config to canonical `ThinkingConfig` (`types.go`), normalizes and validates centrally (`validate.go`/`convert.go`), then applies provider-specific output via `ProviderApplier`. Do not break this "canonical representation → per-provider translation" architecture.
 - `internal/runtime/executor/` — Per-provider runtime executors (incl. Codex WebSocket)
 - `internal/translator/` — Provider protocol translators (and shared `common`)
 - `internal/registry/` — Model registry + remote updater (`StartModelsUpdater`); `--local-model` disables remote updates