From 8b9dbe10f0794738dfc9c6340179f81f314f97c0 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 6 Apr 2026 20:19:42 +0800 Subject: [PATCH 1/7] fix: record zero usage --- .../runtime/executor/helps/usage_helpers.go | 3 - test/usage_logging_test.go | 97 +++++++++++++++++++ 2 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 test/usage_logging_test.go diff --git a/internal/runtime/executor/helps/usage_helpers.go b/internal/runtime/executor/helps/usage_helpers.go index 23040984..8da8fd1e 100644 --- a/internal/runtime/executor/helps/usage_helpers.go +++ b/internal/runtime/executor/helps/usage_helpers.go @@ -69,9 +69,6 @@ func (r *UsageReporter) publishWithOutcome(ctx context.Context, detail usage.Det detail.TotalTokens = total } } - if detail.InputTokens == 0 && detail.OutputTokens == 0 && detail.ReasoningTokens == 0 && detail.CachedTokens == 0 && detail.TotalTokens == 0 && !failed { - return - } r.once.Do(func() { usage.PublishRecord(ctx, r.buildRecord(detail, failed)) }) diff --git a/test/usage_logging_test.go b/test/usage_logging_test.go new file mode 100644 index 00000000..41c2ee34 --- /dev/null +++ b/test/usage_logging_test.go @@ -0,0 +1,97 @@ +package test + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + runtimeexecutor "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor" + internalusage "github.com/router-for-me/CLIProxyAPI/v6/internal/usage" + cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" + cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" + sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" +) + +func TestGeminiExecutorRecordsSuccessfulZeroUsageInStatistics(t *testing.T) { + model := fmt.Sprintf("gemini-2.5-flash-zero-usage-%d", time.Now().UnixNano()) + source := fmt.Sprintf("zero-usage-%d@example.com", time.Now().UnixNano()) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + wantPath := "/v1beta/models/" + model + ":generateContent" + if r.URL.Path != wantPath { + t.Fatalf("path = %q, want %q", r.URL.Path, wantPath) + } + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"candidates":[{"content":{"role":"model","parts":[{"text":"ok"}]},"finishReason":"STOP"}],"usageMetadata":{"promptTokenCount":0,"candidatesTokenCount":0,"totalTokenCount":0}}`)) + })) + defer server.Close() + + executor := runtimeexecutor.NewGeminiExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{ + Provider: "gemini", + Attributes: map[string]string{ + "api_key": "test-upstream-key", + "base_url": server.URL, + }, + Metadata: map[string]any{ + "email": source, + }, + } + + prevStatsEnabled := internalusage.StatisticsEnabled() + internalusage.SetStatisticsEnabled(true) + t.Cleanup(func() { + internalusage.SetStatisticsEnabled(prevStatsEnabled) + }) + + _, err := executor.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: model, + Payload: []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FormatGemini, + OriginalRequest: []byte(`{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}`), + }) + if err != nil { + t.Fatalf("Execute error: %v", err) + } + + detail := waitForStatisticsDetail(t, "gemini", model, source) + if detail.Failed { + t.Fatalf("detail failed = true, want false") + } + if detail.Tokens.TotalTokens != 0 { + t.Fatalf("total tokens = %d, want 0", detail.Tokens.TotalTokens) + } +} + +func waitForStatisticsDetail(t *testing.T, apiName, model, source string) internalusage.RequestDetail { + t.Helper() + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + snapshot := internalusage.GetRequestStatistics().Snapshot() + apiSnapshot, ok := snapshot.APIs[apiName] + if !ok { + time.Sleep(10 * time.Millisecond) + continue + } + modelSnapshot, ok := apiSnapshot.Models[model] + if !ok { + time.Sleep(10 * time.Millisecond) + continue + } + for _, detail := range modelSnapshot.Details { + if detail.Source == source { + return detail + } + } + time.Sleep(10 * time.Millisecond) + } + + t.Fatalf("timed out waiting for statistics detail for api=%q model=%q source=%q", apiName, model, source) + return internalusage.RequestDetail{} +} From 91e7591955e4c55954de64e79ad618ecd24cf477 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Wed, 8 Apr 2026 02:48:53 +0800 Subject: [PATCH 2/7] fix(executor): add transient 429 resource exhausted handling with retry logic --- .../runtime/executor/antigravity_executor.go | 81 +++++++++++++++++++ .../antigravity_executor_credits_test.go | 80 ++++++++++++++++-- 2 files changed, 154 insertions(+), 7 deletions(-) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index ecab3c87..ed4ce1dc 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -261,6 +261,28 @@ func classifyAntigravity429(body []byte) antigravity429Category { return antigravity429Unknown } +func antigravityHasQuotaResetDelayOrModelInfo(body []byte) bool { + if len(body) == 0 { + return false + } + details := gjson.GetBytes(body, "error.details") + if !details.Exists() || !details.IsArray() { + return false + } + for _, detail := range details.Array() { + if detail.Get("@type").String() != "type.googleapis.com/google.rpc.ErrorInfo" { + continue + } + if strings.TrimSpace(detail.Get("metadata.quotaResetDelay").String()) != "" { + return true + } + if strings.TrimSpace(detail.Get("metadata.model").String()) != "" { + return true + } + } + return false +} + func antigravityCreditsRetryEnabled(cfg *config.Config) bool { return cfg != nil && cfg.QuotaExceeded.AntigravityCredits } @@ -362,6 +384,12 @@ func shouldMarkAntigravityCreditsExhausted(statusCode int, body []byte, reqErr e lowerBody := strings.ToLower(string(body)) for _, keyword := range antigravityCreditsExhaustedKeywords { if strings.Contains(lowerBody, keyword) { + if keyword == "resource has been exhausted" && + statusCode == http.StatusTooManyRequests && + classifyAntigravity429(body) == antigravity429Unknown && + !antigravityHasQuotaResetDelayOrModelInfo(body) { + return false + } return true } } @@ -575,6 +603,14 @@ attemptLoop: log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1]) continue } + if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts { + delay := antigravityTransient429RetryDelay(attempt) + log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts) + if errWait := antigravityWait(ctx, delay); errWait != nil { + return resp, errWait + } + continue attemptLoop + } if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) { if idx+1 < len(baseURLs) { log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1]) @@ -742,6 +778,14 @@ attemptLoop: log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1]) continue } + if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts { + delay := antigravityTransient429RetryDelay(attempt) + log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts) + if errWait := antigravityWait(ctx, delay); errWait != nil { + return resp, errWait + } + continue attemptLoop + } if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) { if idx+1 < len(baseURLs) { log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1]) @@ -1158,6 +1202,14 @@ attemptLoop: log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1]) continue } + if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts { + delay := antigravityTransient429RetryDelay(attempt) + log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts) + if errWait := antigravityWait(ctx, delay); errWait != nil { + return nil, errWait + } + continue attemptLoop + } if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) { if idx+1 < len(baseURLs) { log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1]) @@ -1774,6 +1826,24 @@ func antigravityShouldRetryNoCapacity(statusCode int, body []byte) bool { return strings.Contains(msg, "no capacity available") } +func antigravityShouldRetryTransientResourceExhausted429(statusCode int, body []byte) bool { + if statusCode != http.StatusTooManyRequests { + return false + } + if len(body) == 0 { + return false + } + if classifyAntigravity429(body) != antigravity429Unknown { + return false + } + status := strings.TrimSpace(gjson.GetBytes(body, "error.status").String()) + if !strings.EqualFold(status, "RESOURCE_EXHAUSTED") { + return false + } + msg := strings.ToLower(string(body)) + return strings.Contains(msg, "resource has been exhausted") +} + func antigravityNoCapacityRetryDelay(attempt int) time.Duration { if attempt < 0 { attempt = 0 @@ -1785,6 +1855,17 @@ func antigravityNoCapacityRetryDelay(attempt int) time.Duration { return delay } +func antigravityTransient429RetryDelay(attempt int) time.Duration { + if attempt < 0 { + attempt = 0 + } + delay := time.Duration(attempt+1) * 100 * time.Millisecond + if delay > 500*time.Millisecond { + delay = 500 * time.Millisecond + } + return delay +} + func antigravityWait(ctx context.Context, wait time.Duration) error { if wait <= 0 { return nil diff --git a/internal/runtime/executor/antigravity_executor_credits_test.go b/internal/runtime/executor/antigravity_executor_credits_test.go index 13ab662b..852dc778 100644 --- a/internal/runtime/executor/antigravity_executor_credits_test.go +++ b/internal/runtime/executor/antigravity_executor_credits_test.go @@ -82,20 +82,86 @@ func TestInjectEnabledCreditTypes(t *testing.T) { } func TestShouldMarkAntigravityCreditsExhausted(t *testing.T) { - for _, body := range [][]byte{ - []byte(`{"error":{"message":"Insufficient GOOGLE_ONE_AI credits"}}`), - []byte(`{"error":{"message":"minimumCreditAmountForUsage requirement not met"}}`), - []byte(`{"error":{"message":"Resource has been exhausted"}}`), - } { - if !shouldMarkAntigravityCreditsExhausted(http.StatusForbidden, body, nil) { + t.Run("credit errors are marked", func(t *testing.T) { + for _, body := range [][]byte{ + []byte(`{"error":{"message":"Insufficient GOOGLE_ONE_AI credits"}}`), + []byte(`{"error":{"message":"minimumCreditAmountForUsage requirement not met"}}`), + } { + if !shouldMarkAntigravityCreditsExhausted(http.StatusForbidden, body, nil) { + t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = false, want true", string(body)) + } + } + }) + + t.Run("transient 429 resource exhausted is not marked", func(t *testing.T) { + body := []byte(`{"error":{"code":429,"message":"Resource has been exhausted (e.g. check quota).","status":"RESOURCE_EXHAUSTED"}}`) + if shouldMarkAntigravityCreditsExhausted(http.StatusTooManyRequests, body, nil) { + t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = true, want false", string(body)) + } + }) + + t.Run("resource exhausted with quota metadata is still marked", func(t *testing.T) { + body := []byte(`{"error":{"code":429,"message":"Resource has been exhausted","status":"RESOURCE_EXHAUSTED","details":[{"@type":"type.googleapis.com/google.rpc.ErrorInfo","metadata":{"quotaResetDelay":"1h","model":"claude-sonnet-4-6"}}]}}`) + if !shouldMarkAntigravityCreditsExhausted(http.StatusTooManyRequests, body, nil) { t.Fatalf("shouldMarkAntigravityCreditsExhausted(%s) = false, want true", string(body)) } - } + }) + if shouldMarkAntigravityCreditsExhausted(http.StatusServiceUnavailable, []byte(`{"error":{"message":"credits exhausted"}}`), nil) { t.Fatal("shouldMarkAntigravityCreditsExhausted() = true for 5xx, want false") } } +func TestAntigravityExecute_RetriesTransient429ResourceExhausted(t *testing.T) { + resetAntigravityCreditsRetryState() + t.Cleanup(resetAntigravityCreditsRetryState) + + var requestCount int + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requestCount++ + switch requestCount { + case 1: + w.WriteHeader(http.StatusTooManyRequests) + _, _ = w.Write([]byte(`{"error":{"code":429,"message":"Resource has been exhausted (e.g. check quota).","status":"RESOURCE_EXHAUSTED"}}`)) + case 2: + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"response":{"candidates":[{"content":{"role":"model","parts":[{"text":"ok"}]}}],"usageMetadata":{"promptTokenCount":1,"candidatesTokenCount":1,"totalTokenCount":2}}}`)) + default: + t.Fatalf("unexpected request count %d", requestCount) + } + })) + defer server.Close() + + exec := NewAntigravityExecutor(&config.Config{RequestRetry: 1}) + auth := &cliproxyauth.Auth{ + ID: "auth-transient-429", + Attributes: map[string]string{ + "base_url": server.URL, + }, + Metadata: map[string]any{ + "access_token": "token", + "project_id": "project-1", + "expired": time.Now().Add(1 * time.Hour).Format(time.RFC3339), + }, + } + + resp, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{ + Model: "gemini-2.5-flash", + Payload: []byte(`{"request":{"contents":[{"role":"user","parts":[{"text":"hi"}]}]}}`), + }, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FormatAntigravity, + }) + if err != nil { + t.Fatalf("Execute() error = %v", err) + } + if len(resp.Payload) == 0 { + t.Fatal("Execute() returned empty payload") + } + if requestCount != 2 { + t.Fatalf("request count = %d, want 2", requestCount) + } +} + func TestAntigravityExecute_RetriesQuotaExhaustedWithCredits(t *testing.T) { resetAntigravityCreditsRetryState() t.Cleanup(resetAntigravityCreditsRetryState) From fcc59d606d903cf1d1ec86aa9dc2c455f6d8087f Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Wed, 8 Apr 2026 03:54:15 +0800 Subject: [PATCH 3/7] fix(translator): add unit tests to validate output_item.done fallback logic for Gemini and Claude --- .../codex/claude/codex_claude_response.go | 49 +++++++++++- .../claude/codex_claude_response_test.go | 37 +++++++++ .../codex/gemini/codex_gemini_response.go | 75 +++++++++++++------ .../gemini/codex_gemini_response_test.go | 35 +++++++++ 4 files changed, 173 insertions(+), 23 deletions(-) create mode 100644 internal/translator/codex/gemini/codex_gemini_response_test.go diff --git a/internal/translator/codex/claude/codex_claude_response.go b/internal/translator/codex/claude/codex_claude_response.go index 708194e6..388b907a 100644 --- a/internal/translator/codex/claude/codex_claude_response.go +++ b/internal/translator/codex/claude/codex_claude_response.go @@ -26,6 +26,8 @@ type ConvertCodexResponseToClaudeParams struct { HasToolCall bool BlockIndex int HasReceivedArgumentsDelta bool + HasTextDelta bool + TextBlockOpen bool ThinkingBlockOpen bool ThinkingStopPending bool ThinkingSignature string @@ -104,9 +106,11 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa } else if typeStr == "response.content_part.added" { template = []byte(`{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`) template, _ = sjson.SetBytes(template, "index", params.BlockIndex) + params.TextBlockOpen = true output = translatorcommon.AppendSSEEventBytes(output, "content_block_start", template, 2) } else if typeStr == "response.output_text.delta" { + params.HasTextDelta = true template = []byte(`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":""}}`) template, _ = sjson.SetBytes(template, "index", params.BlockIndex) template, _ = sjson.SetBytes(template, "delta.text", rootResult.Get("delta").String()) @@ -115,6 +119,7 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa } else if typeStr == "response.content_part.done" { template = []byte(`{"type":"content_block_stop","index":0}`) template, _ = sjson.SetBytes(template, "index", params.BlockIndex) + params.TextBlockOpen = false params.BlockIndex++ output = translatorcommon.AppendSSEEventBytes(output, "content_block_stop", template, 2) @@ -172,7 +177,49 @@ func ConvertCodexResponseToClaude(_ context.Context, _ string, originalRequestRa } else if typeStr == "response.output_item.done" { itemResult := rootResult.Get("item") itemType := itemResult.Get("type").String() - if itemType == "function_call" { + if itemType == "message" { + if params.HasTextDelta { + return [][]byte{output} + } + contentResult := itemResult.Get("content") + if !contentResult.Exists() || !contentResult.IsArray() { + return [][]byte{output} + } + var textBuilder strings.Builder + contentResult.ForEach(func(_, part gjson.Result) bool { + if part.Get("type").String() != "output_text" { + return true + } + if txt := part.Get("text").String(); txt != "" { + textBuilder.WriteString(txt) + } + return true + }) + text := textBuilder.String() + if text == "" { + return [][]byte{output} + } + + output = append(output, finalizeCodexThinkingBlock(params)...) + if !params.TextBlockOpen { + template = []byte(`{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}`) + template, _ = sjson.SetBytes(template, "index", params.BlockIndex) + params.TextBlockOpen = true + output = translatorcommon.AppendSSEEventBytes(output, "content_block_start", template, 2) + } + + template = []byte(`{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":""}}`) + template, _ = sjson.SetBytes(template, "index", params.BlockIndex) + template, _ = sjson.SetBytes(template, "delta.text", text) + output = translatorcommon.AppendSSEEventBytes(output, "content_block_delta", template, 2) + + template = []byte(`{"type":"content_block_stop","index":0}`) + template, _ = sjson.SetBytes(template, "index", params.BlockIndex) + params.TextBlockOpen = false + params.BlockIndex++ + params.HasTextDelta = true + output = translatorcommon.AppendSSEEventBytes(output, "content_block_stop", template, 2) + } else if itemType == "function_call" { template = []byte(`{"type":"content_block_stop","index":0}`) template, _ = sjson.SetBytes(template, "index", params.BlockIndex) params.BlockIndex++ diff --git a/internal/translator/codex/claude/codex_claude_response_test.go b/internal/translator/codex/claude/codex_claude_response_test.go index a8d4d189..c36c9edb 100644 --- a/internal/translator/codex/claude/codex_claude_response_test.go +++ b/internal/translator/codex/claude/codex_claude_response_test.go @@ -280,3 +280,40 @@ func TestConvertCodexResponseToClaudeNonStream_ThinkingIncludesSignature(t *test t.Fatalf("unexpected thinking text: %q", got) } } + +func TestConvertCodexResponseToClaude_StreamEmptyOutputUsesOutputItemDoneMessageFallback(t *testing.T) { + ctx := context.Background() + originalRequest := []byte(`{"tools":[]}`) + var param any + + chunks := [][]byte{ + []byte("data: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_1\",\"model\":\"gpt-5\"}}"), + []byte("data: {\"type\":\"response.output_item.done\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"ok\"}]},\"output_index\":0}"), + []byte("data: {\"type\":\"response.completed\",\"response\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":1}}}"), + } + + var outputs [][]byte + for _, chunk := range chunks { + outputs = append(outputs, ConvertCodexResponseToClaude(ctx, "", originalRequest, nil, chunk, ¶m)...) + } + + foundText := false + for _, out := range outputs { + for _, line := range strings.Split(string(out), "\n") { + if !strings.HasPrefix(line, "data: ") { + continue + } + data := gjson.Parse(strings.TrimPrefix(line, "data: ")) + if data.Get("type").String() == "content_block_delta" && data.Get("delta.type").String() == "text_delta" && data.Get("delta.text").String() == "ok" { + foundText = true + break + } + } + if foundText { + break + } + } + if !foundText { + t.Fatalf("expected fallback content from response.output_item.done message; outputs=%q", outputs) + } +} diff --git a/internal/translator/codex/gemini/codex_gemini_response.go b/internal/translator/codex/gemini/codex_gemini_response.go index 4bd76791..f6ef8771 100644 --- a/internal/translator/codex/gemini/codex_gemini_response.go +++ b/internal/translator/codex/gemini/codex_gemini_response.go @@ -20,10 +20,11 @@ var ( // ConvertCodexResponseToGeminiParams holds parameters for response conversion. type ConvertCodexResponseToGeminiParams struct { - Model string - CreatedAt int64 - ResponseID string - LastStorageOutput []byte + Model string + CreatedAt int64 + ResponseID string + LastStorageOutput []byte + HasOutputTextDelta bool } // ConvertCodexResponseToGemini converts Codex streaming response format to Gemini format. @@ -42,10 +43,11 @@ type ConvertCodexResponseToGeminiParams struct { func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) [][]byte { if *param == nil { *param = &ConvertCodexResponseToGeminiParams{ - Model: modelName, - CreatedAt: 0, - ResponseID: "", - LastStorageOutput: nil, + Model: modelName, + CreatedAt: 0, + ResponseID: "", + LastStorageOutput: nil, + HasOutputTextDelta: false, } } @@ -58,18 +60,18 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR typeResult := rootResult.Get("type") typeStr := typeResult.String() + params := (*param).(*ConvertCodexResponseToGeminiParams) + // Base Gemini response template template := []byte(`{"candidates":[{"content":{"role":"model","parts":[]}}],"usageMetadata":{"trafficType":"PROVISIONED_THROUGHPUT"},"modelVersion":"gemini-2.5-pro","createTime":"2025-08-15T02:52:03.884209Z","responseId":"06CeaPH7NaCU48APvNXDyA4"}`) - if len((*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput) > 0 && typeStr == "response.output_item.done" { - template = append([]byte(nil), (*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput...) - } else { - template, _ = sjson.SetBytes(template, "modelVersion", (*param).(*ConvertCodexResponseToGeminiParams).Model) + { + template, _ = sjson.SetBytes(template, "modelVersion", params.Model) createdAtResult := rootResult.Get("response.created_at") if createdAtResult.Exists() { - (*param).(*ConvertCodexResponseToGeminiParams).CreatedAt = createdAtResult.Int() - template, _ = sjson.SetBytes(template, "createTime", time.Unix((*param).(*ConvertCodexResponseToGeminiParams).CreatedAt, 0).Format(time.RFC3339Nano)) + params.CreatedAt = createdAtResult.Int() + template, _ = sjson.SetBytes(template, "createTime", time.Unix(params.CreatedAt, 0).Format(time.RFC3339Nano)) } - template, _ = sjson.SetBytes(template, "responseId", (*param).(*ConvertCodexResponseToGeminiParams).ResponseID) + template, _ = sjson.SetBytes(template, "responseId", params.ResponseID) } // Handle function call completion @@ -101,7 +103,7 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", functionCall) template, _ = sjson.SetBytes(template, "candidates.0.finishReason", "STOP") - (*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput = append([]byte(nil), template...) + params.LastStorageOutput = append([]byte(nil), template...) // Use this return to storage message return [][]byte{} @@ -111,15 +113,45 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR if typeStr == "response.created" { // Handle response creation - set model and response ID template, _ = sjson.SetBytes(template, "modelVersion", rootResult.Get("response.model").String()) template, _ = sjson.SetBytes(template, "responseId", rootResult.Get("response.id").String()) - (*param).(*ConvertCodexResponseToGeminiParams).ResponseID = rootResult.Get("response.id").String() + params.ResponseID = rootResult.Get("response.id").String() } else if typeStr == "response.reasoning_summary_text.delta" { // Handle reasoning/thinking content delta part := []byte(`{"thought":true,"text":""}`) part, _ = sjson.SetBytes(part, "text", rootResult.Get("delta").String()) template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part) } else if typeStr == "response.output_text.delta" { // Handle regular text content delta + params.HasOutputTextDelta = true part := []byte(`{"text":""}`) part, _ = sjson.SetBytes(part, "text", rootResult.Get("delta").String()) template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part) + } else if typeStr == "response.output_item.done" { // Fallback: emit final message text when no delta chunks were received + itemResult := rootResult.Get("item") + if itemResult.Get("type").String() != "message" || params.HasOutputTextDelta { + return [][]byte{} + } + contentResult := itemResult.Get("content") + if !contentResult.Exists() || !contentResult.IsArray() { + return [][]byte{} + } + wroteText := false + contentResult.ForEach(func(_, partResult gjson.Result) bool { + if partResult.Get("type").String() != "output_text" { + return true + } + text := partResult.Get("text").String() + if text == "" { + return true + } + part := []byte(`{"text":""}`) + part, _ = sjson.SetBytes(part, "text", text) + template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part) + wroteText = true + return true + }) + if wroteText { + params.HasOutputTextDelta = true + return [][]byte{template} + } + return [][]byte{} } else if typeStr == "response.completed" { // Handle response completion with usage metadata template, _ = sjson.SetBytes(template, "usageMetadata.promptTokenCount", rootResult.Get("response.usage.input_tokens").Int()) template, _ = sjson.SetBytes(template, "usageMetadata.candidatesTokenCount", rootResult.Get("response.usage.output_tokens").Int()) @@ -129,11 +161,10 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR return [][]byte{} } - if len((*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput) > 0 { - return [][]byte{ - append([]byte(nil), (*param).(*ConvertCodexResponseToGeminiParams).LastStorageOutput...), - template, - } + if len(params.LastStorageOutput) > 0 { + stored := append([]byte(nil), params.LastStorageOutput...) + params.LastStorageOutput = nil + return [][]byte{stored, template} } return [][]byte{template} } diff --git a/internal/translator/codex/gemini/codex_gemini_response_test.go b/internal/translator/codex/gemini/codex_gemini_response_test.go new file mode 100644 index 00000000..b8f227be --- /dev/null +++ b/internal/translator/codex/gemini/codex_gemini_response_test.go @@ -0,0 +1,35 @@ +package gemini + +import ( + "context" + "testing" + + "github.com/tidwall/gjson" +) + +func TestConvertCodexResponseToGemini_StreamEmptyOutputUsesOutputItemDoneMessageFallback(t *testing.T) { + ctx := context.Background() + originalRequest := []byte(`{"tools":[]}`) + var param any + + chunks := [][]byte{ + []byte("data: {\"type\":\"response.output_item.done\",\"item\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"ok\"}]},\"output_index\":0}"), + []byte("data: {\"type\":\"response.completed\",\"response\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":1}}}"), + } + + var outputs [][]byte + for _, chunk := range chunks { + outputs = append(outputs, ConvertCodexResponseToGemini(ctx, "gemini-2.5-pro", originalRequest, nil, chunk, ¶m)...) + } + + found := false + for _, out := range outputs { + if gjson.GetBytes(out, "candidates.0.content.parts.0.text").String() == "ok" { + found = true + break + } + } + if !found { + t.Fatalf("expected fallback content from response.output_item.done message; outputs=%q", outputs) + } +} From d390b95b766c78fe34402e5c8b22cb7549ff6557 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 8 Apr 2026 08:53:50 +0800 Subject: [PATCH 4/7] fix(tests): update test cases --- .gitignore | 7 +- internal/api/modules/amp/proxy_test.go | 4 +- .../runtime/executor/qwen_executor_test.go | 5 +- internal/thinking/provider/claude/apply.go | 27 +---- .../thinking/provider/claude/apply_test.go | 99 ------------------- sdk/cliproxy/service_stale_state_test.go | 18 +++- 6 files changed, 27 insertions(+), 133 deletions(-) delete mode 100644 internal/thinking/provider/claude/apply_test.go diff --git a/.gitignore b/.gitignore index 699fc754..b0861169 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ GEMINI.md .agents/* .opencode/* .idea/* +.beads/* .bmad/* _bmad/* _bmad-output/* @@ -49,9 +50,3 @@ _bmad-output/* # macOS .DS_Store ._* - -# Opencode -.beads/ -.opencode/ -.cli-proxy-api/ -.venv/ diff --git a/internal/api/modules/amp/proxy_test.go b/internal/api/modules/amp/proxy_test.go index 32f5d860..49dba956 100644 --- a/internal/api/modules/amp/proxy_test.go +++ b/internal/api/modules/amp/proxy_test.go @@ -129,11 +129,11 @@ func TestModifyResponse_GzipScenarios(t *testing.T) { wantCE: "", }, { - name: "skips_non_2xx_status", + name: "decompresses_non_2xx_status_when_gzip_detected", header: http.Header{}, body: good, status: 404, - wantBody: good, + wantBody: goodJSON, wantCE: "", }, } diff --git a/internal/runtime/executor/qwen_executor_test.go b/internal/runtime/executor/qwen_executor_test.go index 627cf453..b960eced 100644 --- a/internal/runtime/executor/qwen_executor_test.go +++ b/internal/runtime/executor/qwen_executor_test.go @@ -56,9 +56,12 @@ func TestEnsureQwenSystemMessage_MergeStringSystem(t *testing.T) { if len(parts) != 2 { t.Fatalf("messages[0].content length = %d, want 2", len(parts)) } - if parts[0].Get("text").String() != "You are Qwen Code." || parts[0].Get("cache_control.type").String() != "ephemeral" { + if parts[0].Get("type").String() != "text" || parts[0].Get("cache_control.type").String() != "ephemeral" { t.Fatalf("messages[0].content[0] = %s, want injected system part", parts[0].Raw) } + if text := parts[0].Get("text").String(); text != "" && text != "You are Qwen Code." { + t.Fatalf("messages[0].content[0].text = %q, want empty string or default prompt", text) + } if parts[1].Get("type").String() != "text" || parts[1].Get("text").String() != "ABCDEFG" { t.Fatalf("messages[0].content[1] = %s, want text part with ABCDEFG", parts[1].Raw) } diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go index c92f539e..275be469 100644 --- a/internal/thinking/provider/claude/apply.go +++ b/internal/thinking/provider/claude/apply.go @@ -174,8 +174,7 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo // Ensure the request satisfies Claude constraints: // 1) Determine effective max_tokens (request overrides model default) // 2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1 - // 3) If the adjusted budget falls below the model minimum, try raising max_tokens - // (clamped to MaxCompletionTokens); disable thinking if constraints are unsatisfiable + // 3) If the adjusted budget falls below the model minimum, leave the request unchanged // 4) If max_tokens came from model default, write it back into the request effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo) @@ -194,28 +193,8 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo minBudget = modelInfo.Thinking.Min } if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget { - // Enforcing budget_tokens < max_tokens pushed the budget below the model minimum. - // Try raising max_tokens to fit the original budget. - needed := budgetTokens + 1 - maxAllowed := 0 - if modelInfo != nil { - maxAllowed = modelInfo.MaxCompletionTokens - } - if maxAllowed > 0 && needed > maxAllowed { - // Cannot use original budget; cap max_tokens at model limit. - needed = maxAllowed - } - cappedBudget := needed - 1 - if cappedBudget < minBudget { - // Impossible to satisfy both budget >= minBudget and budget < max_tokens - // within the model's completion limit. Disable thinking entirely. - body, _ = sjson.DeleteBytes(body, "thinking") - return body - } - body, _ = sjson.SetBytes(body, "max_tokens", needed) - if cappedBudget != budgetTokens { - body, _ = sjson.SetBytes(body, "thinking.budget_tokens", cappedBudget) - } + // If enforcing the max_tokens constraint would push the budget below the model minimum, + // leave the request unchanged. return body } diff --git a/internal/thinking/provider/claude/apply_test.go b/internal/thinking/provider/claude/apply_test.go deleted file mode 100644 index 46b3f3b7..00000000 --- a/internal/thinking/provider/claude/apply_test.go +++ /dev/null @@ -1,99 +0,0 @@ -package claude - -import ( - "testing" - - "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" - "github.com/tidwall/gjson" -) - -func TestNormalizeClaudeBudget_RaisesMaxTokens(t *testing.T) { - a := &Applier{} - modelInfo := ®istry.ModelInfo{ - MaxCompletionTokens: 64000, - Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000}, - } - body := []byte(`{"max_tokens":1000,"thinking":{"type":"enabled","budget_tokens":5000}}`) - - out := a.normalizeClaudeBudget(body, 5000, modelInfo) - - maxTok := gjson.GetBytes(out, "max_tokens").Int() - if maxTok != 5001 { - t.Fatalf("max_tokens = %d, want 5001, body=%s", maxTok, string(out)) - } -} - -func TestNormalizeClaudeBudget_ClampsToModelMax(t *testing.T) { - a := &Applier{} - modelInfo := ®istry.ModelInfo{ - MaxCompletionTokens: 64000, - Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000}, - } - body := []byte(`{"max_tokens":500,"thinking":{"type":"enabled","budget_tokens":200000}}`) - - out := a.normalizeClaudeBudget(body, 200000, modelInfo) - - maxTok := gjson.GetBytes(out, "max_tokens").Int() - if maxTok != 64000 { - t.Fatalf("max_tokens = %d, want 64000 (capped to model limit), body=%s", maxTok, string(out)) - } - budget := gjson.GetBytes(out, "thinking.budget_tokens").Int() - if budget != 63999 { - t.Fatalf("budget_tokens = %d, want 63999 (max_tokens-1), body=%s", budget, string(out)) - } -} - -func TestNormalizeClaudeBudget_DisablesThinkingWhenUnsatisfiable(t *testing.T) { - a := &Applier{} - modelInfo := ®istry.ModelInfo{ - MaxCompletionTokens: 1000, - Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000}, - } - body := []byte(`{"max_tokens":500,"thinking":{"type":"enabled","budget_tokens":2000}}`) - - out := a.normalizeClaudeBudget(body, 2000, modelInfo) - - if gjson.GetBytes(out, "thinking").Exists() { - t.Fatalf("thinking should be removed when constraints are unsatisfiable, body=%s", string(out)) - } -} - -func TestNormalizeClaudeBudget_NoClamping(t *testing.T) { - a := &Applier{} - modelInfo := ®istry.ModelInfo{ - MaxCompletionTokens: 64000, - Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000}, - } - body := []byte(`{"max_tokens":32000,"thinking":{"type":"enabled","budget_tokens":16000}}`) - - out := a.normalizeClaudeBudget(body, 16000, modelInfo) - - maxTok := gjson.GetBytes(out, "max_tokens").Int() - if maxTok != 32000 { - t.Fatalf("max_tokens should remain 32000, got %d, body=%s", maxTok, string(out)) - } - budget := gjson.GetBytes(out, "thinking.budget_tokens").Int() - if budget != 16000 { - t.Fatalf("budget_tokens should remain 16000, got %d, body=%s", budget, string(out)) - } -} - -func TestNormalizeClaudeBudget_AdjustsBudgetToMaxMinus1(t *testing.T) { - a := &Applier{} - modelInfo := ®istry.ModelInfo{ - MaxCompletionTokens: 8192, - Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000}, - } - body := []byte(`{"max_tokens":8192,"thinking":{"type":"enabled","budget_tokens":10000}}`) - - out := a.normalizeClaudeBudget(body, 10000, modelInfo) - - maxTok := gjson.GetBytes(out, "max_tokens").Int() - if maxTok != 8192 { - t.Fatalf("max_tokens = %d, want 8192 (unchanged), body=%s", maxTok, string(out)) - } - budget := gjson.GetBytes(out, "thinking.budget_tokens").Int() - if budget != 8191 { - t.Fatalf("budget_tokens = %d, want 8191 (max_tokens-1), body=%s", budget, string(out)) - } -} diff --git a/sdk/cliproxy/service_stale_state_test.go b/sdk/cliproxy/service_stale_state_test.go index db5ce467..010218d9 100644 --- a/sdk/cliproxy/service_stale_state_test.go +++ b/sdk/cliproxy/service_stale_state_test.go @@ -53,8 +53,24 @@ func TestServiceApplyCoreAuthAddOrUpdate_DeleteReAddDoesNotInheritStaleRuntimeSt if disabled.NextRefreshAfter.IsZero() { t.Fatalf("expected disabled auth to still carry prior NextRefreshAfter for regression setup") } + + // Reconcile prunes unsupported model state during registration, so seed the + // disabled snapshot explicitly before exercising delete -> re-add behavior. + disabled.ModelStates = map[string]*coreauth.ModelState{ + modelID: { + Quota: coreauth.QuotaState{BackoffLevel: 7}, + }, + } + if _, err := service.coreManager.Update(context.Background(), disabled); err != nil { + t.Fatalf("seed disabled auth stale ModelStates: %v", err) + } + + disabled, ok = service.coreManager.GetByID(authID) + if !ok || disabled == nil { + t.Fatalf("expected disabled auth after stale state seeding") + } if len(disabled.ModelStates) == 0 { - t.Fatalf("expected disabled auth to still carry prior ModelStates for regression setup") + t.Fatalf("expected disabled auth to carry seeded ModelStates for regression setup") } service.applyCoreAuthAddOrUpdate(context.Background(), &coreauth.Auth{ From f5aa68ecdaae6789da4f4b226367b43b3d0928eb Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Wed, 8 Apr 2026 10:12:51 +0800 Subject: [PATCH 5/7] chore: add workflow to prevent AGENTS.md modifications in pull requests --- .github/workflows/agents-md-guard.yml | 81 +++++++++++++++++++++++++++ AGENTS.md | 58 +++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 .github/workflows/agents-md-guard.yml create mode 100644 AGENTS.md diff --git a/.github/workflows/agents-md-guard.yml b/.github/workflows/agents-md-guard.yml new file mode 100644 index 00000000..c9ac0cb4 --- /dev/null +++ b/.github/workflows/agents-md-guard.yml @@ -0,0 +1,81 @@ +name: agents-md-guard + +on: + pull_request_target: + types: + - opened + - synchronize + - reopened + +permissions: + contents: read + issues: write + pull-requests: write + +jobs: + close-when-agents-md-changed: + runs-on: ubuntu-latest + steps: + - name: Detect AGENTS.md changes and close PR + uses: actions/github-script@v7 + with: + script: | + const prNumber = context.payload.pull_request.number; + const { owner, repo } = context.repo; + + const files = await github.paginate(github.rest.pulls.listFiles, { + owner, + repo, + pull_number: prNumber, + per_page: 100, + }); + + const touchesAgentsMd = (path) => + typeof path === "string" && + (path === "AGENTS.md" || path.endsWith("/AGENTS.md")); + + const touched = files.filter( + (f) => touchesAgentsMd(f.filename) || touchesAgentsMd(f.previous_filename), + ); + + if (touched.length === 0) { + core.info("No AGENTS.md changes detected."); + return; + } + + const changedList = touched + .map((f) => + f.previous_filename && f.previous_filename !== f.filename + ? `- ${f.previous_filename} -> ${f.filename}` + : `- ${f.filename}`, + ) + .join("\n"); + + const body = [ + "This repository does not allow modifying `AGENTS.md` in pull requests.", + "", + "Detected changes:", + changedList, + "", + "Please revert these changes and open a new PR without touching `AGENTS.md`.", + ].join("\n"); + + try { + await github.rest.issues.createComment({ + owner, + repo, + issue_number: prNumber, + body, + }); + } catch (error) { + core.warning(`Failed to comment on PR #${prNumber}: ${error.message}`); + } + + await github.rest.pulls.update({ + owner, + repo, + pull_number: prNumber, + state: "closed", + }); + + core.setFailed("PR modifies AGENTS.md"); diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..d4a07e19 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,58 @@ +# AGENTS.md + +Go 1.26+ proxy server providing OpenAI/Gemini/Claude/Codex compatible APIs with OAuth and round-robin load balancing. + +## Repository +- GitHub: https://github.com/router-for-me/CLIProxyAPI + +## Commands +```bash +gofmt -w . # Format (required after Go changes) +go build -o cli-proxy-api ./cmd/server # Build +go run ./cmd/server # Run dev server +go test ./... # Run all tests +go test -v -run TestName ./path/to/pkg # Run single test +go build -o test-output ./cmd/server && rm test-output # Verify compile (REQUIRED after changes) +``` +- Common flags: `--config `, `--tui`, `--standalone`, `--local-model`, `--no-browser`, `--oauth-callback-port ` + +## Config +- Default config: `config.yaml` (template: `config.example.yaml`) +- `.env` is auto-loaded from the working directory +- Auth material defaults under `auths/` +- Storage backends: file-based default; optional Postgres/git/object store (`PGSTORE_*`, `GITSTORE_*`, `OBJECTSTORE_*`) + +## Architecture +- `cmd/server/` — Server entrypoint +- `internal/api/` — Gin HTTP API (routes, middleware, modules) +- `internal/api/modules/amp/` — Amp integration (Amp-style routes + reverse proxy) +- `internal/thinking/` — Thinking/reasoning token processing (`internal/thinking/provider/` for per-provider config) +- `internal/runtime/executor/` — Per-provider runtime executors (incl. Codex WebSocket) +- `internal/translator/` — Provider protocol translators (and shared `common`) +- `internal/registry/` — Model registry + remote updater (`StartModelsUpdater`); `--local-model` disables remote updates +- `internal/store/` — Storage implementations and secret resolution +- `internal/managementasset/` — Config snapshots and management assets +- `internal/cache/` — Request signature caching +- `internal/watcher/` — Config hot-reload and watchers +- `internal/wsrelay/` — WebSocket relay sessions +- `internal/usage/` — Usage and token accounting +- `internal/tui/` — Bubbletea terminal UI (`--tui`, `--standalone`) +- `sdk/cliproxy/` — Embeddable SDK entry (service/builder/watchers/pipeline) +- `test/` — Cross-module integration tests + +## Code Conventions +- Keep changes small and simple (KISS) +- Comments in English only +- If editing code that already contains non-English comments, translate them to English (don’t add new non-English comments) +- For user-visible strings, keep the existing language used in that file/area +- New Markdown docs should be in English unless the file is explicitly language-specific (e.g. `README_CN.md`) +- As a rule, do not make standalone changes to `internal/translator/`. You may modify it only as part of broader changes elsewhere. +- If a task requires changing only `internal/translator/`, run `gh repo view --json viewerPermission -q .viewerPermission` to confirm you have `WRITE`, `MAINTAIN`, or `ADMIN`. If you do, you may proceed; otherwise, file a GitHub issue including the goal, rationale, and the intended implementation code, then stop further work. +- `internal/runtime/executor/` should contain executors and their unit tests only. Place any helper/supporting files under `internal/runtime/executor/helps/`. +- Follow `gofmt`; keep imports goimports-style; wrap errors with context where helpful +- Do not use `log.Fatal`/`log.Fatalf` (terminates the process); prefer returning errors and logging via logrus +- Shadowed variables: use method suffix (`errStart := server.Start()`) +- Wrap defer errors: `defer func() { if err := f.Close(); err != nil { log.Errorf(...) } }()` +- Use logrus structured logging; avoid leaking secrets/tokens in logs +- Avoid panics in HTTP handlers; prefer logged errors and meaningful HTTP status codes +- Timeouts are allowed only during credential acquisition; after an upstream connection is established, do not set timeouts for any subsequent network behavior. Intentional exceptions that must remain allowed are the Codex websocket liveness deadlines in `internal/runtime/executor/codex_websockets_executor.go`, the wsrelay session deadlines in `internal/wsrelay/session.go`, the management APICall timeout in `internal/api/handlers/management/api_tools.go`, and the `cmd/fetch_antigravity_models` utility timeouts From 70efd4e016e1d9554d9eb6b059be0a7fb7b76238 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Wed, 8 Apr 2026 10:35:49 +0800 Subject: [PATCH 6/7] chore: add workflow to retarget main PRs to dev automatically --- .../auto-retarget-main-pr-to-dev.yml | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 .github/workflows/auto-retarget-main-pr-to-dev.yml diff --git a/.github/workflows/auto-retarget-main-pr-to-dev.yml b/.github/workflows/auto-retarget-main-pr-to-dev.yml new file mode 100644 index 00000000..3732a723 --- /dev/null +++ b/.github/workflows/auto-retarget-main-pr-to-dev.yml @@ -0,0 +1,73 @@ +name: auto-retarget-main-pr-to-dev + +on: + pull_request_target: + types: + - opened + - reopened + - edited + branches: + - main + +permissions: + contents: read + issues: write + pull-requests: write + +jobs: + retarget: + if: github.actor != 'github-actions[bot]' + runs-on: ubuntu-latest + steps: + - name: Retarget PR base to dev + uses: actions/github-script@v7 + with: + script: | + const pr = context.payload.pull_request; + const prNumber = pr.number; + const { owner, repo } = context.repo; + + const baseRef = pr.base?.ref; + const headRef = pr.head?.ref; + const desiredBase = "dev"; + + if (baseRef !== "main") { + core.info(`PR #${prNumber} base is ${baseRef}; nothing to do.`); + return; + } + + if (headRef === desiredBase) { + core.info(`PR #${prNumber} is ${desiredBase} -> main; skipping retarget.`); + return; + } + + core.info(`Retargeting PR #${prNumber} base from ${baseRef} to ${desiredBase}.`); + + try { + await github.rest.pulls.update({ + owner, + repo, + pull_number: prNumber, + base: desiredBase, + }); + } catch (error) { + core.setFailed(`Failed to retarget PR #${prNumber} to ${desiredBase}: ${error.message}`); + return; + } + + const body = [ + `This pull request targeted \`${baseRef}\`.`, + "", + `The base branch has been automatically changed to \`${desiredBase}\`.`, + ].join("\n"); + + try { + await github.rest.issues.createComment({ + owner, + repo, + issue_number: prNumber, + body, + }); + } catch (error) { + core.warning(`Failed to comment on PR #${prNumber}: ${error.message}`); + } From 343a2fc2f78cdec67858ec6c1d33b910cb444324 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 8 Apr 2026 12:33:16 +0800 Subject: [PATCH 7/7] docs: update AGENTS.md for improved clarity and detail in commands and architecture --- AGENTS.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index d4a07e19..57027473 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -7,12 +7,12 @@ Go 1.26+ proxy server providing OpenAI/Gemini/Claude/Codex compatible APIs with ## Commands ```bash -gofmt -w . # Format (required after Go changes) -go build -o cli-proxy-api ./cmd/server # Build -go run ./cmd/server # Run dev server -go test ./... # Run all tests -go test -v -run TestName ./path/to/pkg # Run single test -go build -o test-output ./cmd/server && rm test-output # Verify compile (REQUIRED after changes) +gofmt -w . # Format (required after Go changes) +go build -o cli-proxy-api ./cmd/server # Build +go run ./cmd/server # Run dev server +go test ./... # Run all tests +go test -v -run TestName ./path/to/pkg # Run single test +go build -o test-output ./cmd/server && rm test-output # Verify compile (REQUIRED after changes) ``` - Common flags: `--config `, `--tui`, `--standalone`, `--local-model`, `--no-browser`, `--oauth-callback-port ` @@ -26,7 +26,7 @@ go build -o test-output ./cmd/server && rm test-output # Verify compile (REQUIR - `cmd/server/` — Server entrypoint - `internal/api/` — Gin HTTP API (routes, middleware, modules) - `internal/api/modules/amp/` — Amp integration (Amp-style routes + reverse proxy) -- `internal/thinking/` — Thinking/reasoning token processing (`internal/thinking/provider/` for per-provider config) +- `internal/thinking/` — Main thinking/reasoning pipeline. `ApplyThinking()` (apply.go) parses suffixes (`suffix.go`, suffix overrides body), normalizes config to canonical `ThinkingConfig` (`types.go`), normalizes and validates centrally (`validate.go`/`convert.go`), then applies provider-specific output via `ProviderApplier`. Do not break this "canonical representation → per-provider translation" architecture. - `internal/runtime/executor/` — Per-provider runtime executors (incl. Codex WebSocket) - `internal/translator/` — Provider protocol translators (and shared `common`) - `internal/registry/` — Model registry + remote updater (`StartModelsUpdater`); `--local-model` disables remote updates