diff --git a/gitlab-duo-codex-parity-plan.md b/gitlab-duo-codex-parity-plan.md new file mode 100644 index 00000000..f4fc90d0 --- /dev/null +++ b/gitlab-duo-codex-parity-plan.md @@ -0,0 +1,278 @@ +# Plan: GitLab Duo Codex Parity + +**Generated**: 2026-03-10 +**Estimated Complexity**: High + +## Overview +Bring GitLab Duo support from the current "auth + basic executor" stage to the same practical level as `codex` inside `CLIProxyAPI`: a user logs in once, points external clients such as Claude Code at `CLIProxyAPI`, selects GitLab Duo-backed models, and gets stable streaming, multi-turn behavior, tool calling compatibility, and predictable model routing without manual provider-specific workarounds. + +The core architectural shift is to stop treating GitLab Duo as only two REST wrappers (`/api/v4/chat/completions` and `/api/v4/code_suggestions/completions`) and instead use GitLab's `direct_access` contract as the primary runtime entrypoint wherever possible. Official GitLab docs confirm that `direct_access` returns AI gateway connection details, headers, token, and expiry; that contract is the closest path to codex-like provider behavior. + +## Prerequisites +- Official GitLab Duo API references confirmed during implementation: + - `POST /api/v4/code_suggestions/direct_access` + - `POST /api/v4/code_suggestions/completions` + - `POST /api/v4/chat/completions` +- Access to at least one real GitLab Duo account for manual verification. +- One downstream client target for acceptance testing: + - Claude Code against Claude-compatible endpoint + - OpenAI-compatible client against `/v1/chat/completions` and `/v1/responses` +- Existing PR branch as starting point: + - `feat/gitlab-duo-auth` + - PR [#2028](https://github.com/router-for-me/CLIProxyAPI/pull/2028) + +## Definition Of Done +- GitLab Duo models can be used via `CLIProxyAPI` from the same client surfaces that already work for `codex`. +- Upstream streaming is real passthrough or faithful chunked forwarding, not synthetic whole-response replay. +- Tool/function calling survives translation layers without dropping fields or corrupting names. +- Multi-turn and session semantics are stable across `chat/completions`, `responses`, and Claude-compatible routes. +- Model exposure stays current from GitLab metadata or gateway discovery without hardcoded stale model tables. +- `go test ./...` stays green and at least one real manual end-to-end client flow is documented. + +## Sprint 1: Contract And Gap Closure +**Goal**: Replace assumptions with a hard compatibility contract between current `codex` behavior and what GitLab Duo can actually support. + +**Demo/Validation**: +- Written matrix showing `codex` features vs current GitLab Duo behavior. +- One checked-in developer note or test fixture for real GitLab Duo payload examples. + +### Task 1.1: Freeze Codex Parity Checklist +- **Location**: [internal/runtime/executor/codex_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/codex_executor.go), [internal/runtime/executor/codex_websockets_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/codex_websockets_executor.go), [sdk/api/handlers/openai/openai_responses_handlers.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_handlers.go), [sdk/api/handlers/openai/openai_responses_websocket.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_websocket.go) +- **Description**: Produce a concrete feature matrix for `codex`: HTTP execute, SSE execute, `/v1/responses`, websocket downstream path, tool calling, request IDs, session close semantics, and model registration behavior. +- **Dependencies**: None +- **Acceptance Criteria**: + - A checklist exists in repo docs or issue notes. + - Each capability is marked `required`, `optional`, or `not possible` for GitLab Duo. +- **Validation**: + - Review against current `codex` code paths. + +### Task 1.2: Lock GitLab Duo Runtime Contract +- **Location**: [internal/auth/gitlab/gitlab.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/auth/gitlab/gitlab.go), [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go) +- **Description**: Validate the exact upstream contract we can rely on: + - `direct_access` fields and refresh cadence + - whether AI gateway path is usable directly + - when `chat/completions` is available vs when fallback is required + - what streaming shape is returned by `code_suggestions/completions?stream=true` +- **Dependencies**: Task 1.1 +- **Acceptance Criteria**: + - GitLab transport decision is explicit: `gateway-first`, `REST-first`, or `hybrid`. + - Unknown areas are isolated behind feature flags, not spread across executor logic. +- **Validation**: + - Official docs + captured real responses from a Duo account. + +### Task 1.3: Define Client-Facing Compatibility Targets +- **Location**: [README.md](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/README.md), [gitlab-duo-codex-parity-plan.md](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/gitlab-duo-codex-parity-plan.md) +- **Description**: Define exactly which external flows must work to call GitLab Duo support "like codex". +- **Dependencies**: Task 1.2 +- **Acceptance Criteria**: + - Required surfaces are listed: + - Claude-compatible route + - OpenAI `chat/completions` + - OpenAI `responses` + - optional downstream websocket path + - Non-goals are explicit if GitLab upstream cannot support them. +- **Validation**: + - Maintainer review of stated scope. + +## Sprint 2: Primary Transport Parity +**Goal**: Move GitLab Duo execution onto a transport that supports codex-like runtime behavior. + +**Demo/Validation**: +- A GitLab Duo model works over real streaming through `/v1/chat/completions`. +- No synthetic "collect full body then fake stream" path remains on the primary flow. + +### Task 2.1: Refactor GitLab Executor Into Strategy Layers +- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go) +- **Description**: Split current executor into explicit strategies: + - auth refresh/direct access refresh + - gateway transport + - GitLab REST fallback transport + - downstream translation helpers +- **Dependencies**: Sprint 1 +- **Acceptance Criteria**: + - Executor no longer mixes discovery, refresh, fallback selection, and response synthesis in one path. + - Transport choice is testable in isolation. +- **Validation**: + - Unit tests for strategy selection and fallback boundaries. + +### Task 2.2: Implement Real Streaming Path +- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [internal/runtime/executor/gitlab_executor_test.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor_test.go) +- **Description**: Replace synthetic streaming with true upstream incremental forwarding: + - use gateway stream if available + - otherwise consume GitLab Code Suggestions streaming response and map chunks incrementally +- **Dependencies**: Task 2.1 +- **Acceptance Criteria**: + - `ExecuteStream` emits chunks before upstream completion. + - error handling preserves status and early failure semantics. +- **Validation**: + - tests with chunked upstream server + - manual curl check against `/v1/chat/completions` with `stream=true` + +### Task 2.3: Preserve Upstream Auth And Headers Correctly +- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [internal/auth/gitlab/gitlab.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/auth/gitlab/gitlab.go) +- **Description**: Use `direct_access` connection details as first-class transport state: + - gateway token + - expiry + - mandatory forwarded headers + - model metadata +- **Dependencies**: Task 2.1 +- **Acceptance Criteria**: + - executor stops ignoring gateway headers/token when transport requires them + - refresh logic never over-fetches `direct_access` +- **Validation**: + - tests verifying propagated headers and refresh interval behavior + +## Sprint 3: Request/Response Semantics Parity +**Goal**: Make GitLab Duo behave correctly under the same request shapes that current `codex` consumers send. + +**Demo/Validation**: +- OpenAI and Claude-compatible clients can do non-streaming and streaming conversations without losing structure. + +### Task 3.1: Normalize Multi-Turn Message Mapping +- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [sdk/translator](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/translator) +- **Description**: Replace the current "flatten prompt into one instruction" behavior with stable multi-turn mapping: + - preserve system context + - preserve user/assistant ordering + - maintain bounded context truncation +- **Dependencies**: Sprint 2 +- **Acceptance Criteria**: + - multi-turn requests are not collapsed into a lossy single string unless fallback mode explicitly requires it + - truncation policy is deterministic and tested +- **Validation**: + - golden tests for request mapping + +### Task 3.2: Tool Calling Compatibility Layer +- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [sdk/api/handlers/openai/openai_responses_handlers.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_handlers.go) +- **Description**: Decide and implement one of two paths: + - native pass-through if GitLab gateway supports tool/function structures + - strict downgrade path with explicit unsupported errors instead of silent field loss +- **Dependencies**: Task 3.1 +- **Acceptance Criteria**: + - tool-related fields are either preserved correctly or rejected explicitly + - no silent corruption of tool names, tool calls, or tool results +- **Validation**: + - table-driven tests for tool payloads + - one manual client scenario using tools + +### Task 3.3: Token Counting And Usage Reporting Fidelity +- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [internal/runtime/executor/usage_helpers.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/usage_helpers.go) +- **Description**: Improve token/usage reporting so GitLab models behave like first-class providers in logs and scheduling. +- **Dependencies**: Sprint 2 +- **Acceptance Criteria**: + - `CountTokens` uses the closest supported estimation path + - usage logging distinguishes prompt vs completion when possible +- **Validation**: + - unit tests for token estimation outputs + +## Sprint 4: Responses And Session Parity +**Goal**: Reach codex-level support for OpenAI Responses clients and long-lived sessions where GitLab upstream permits it. + +**Demo/Validation**: +- `/v1/responses` works with GitLab Duo in a realistic client flow. +- If websocket parity is not possible, the code explicitly declines it and keeps HTTP paths stable. + +### Task 4.1: Make GitLab Compatible With `/v1/responses` +- **Location**: [sdk/api/handlers/openai/openai_responses_handlers.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_handlers.go), [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go) +- **Description**: Ensure GitLab transport can safely back the Responses API path, including compact responses if applicable. +- **Dependencies**: Sprint 3 +- **Acceptance Criteria**: + - GitLab Duo can be selected behind `/v1/responses` + - response IDs and follow-up semantics are defined +- **Validation**: + - handler tests analogous to codex/openai responses tests + +### Task 4.2: Evaluate Downstream Websocket Parity +- **Location**: [sdk/api/handlers/openai/openai_responses_websocket.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_websocket.go), [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go) +- **Description**: Decide whether GitLab Duo can support downstream websocket sessions like codex: + - if yes, add session-aware execution path + - if no, mark GitLab auth as websocket-ineligible and keep HTTP routes first-class +- **Dependencies**: Task 4.1 +- **Acceptance Criteria**: + - websocket behavior is explicit, not accidental + - no route claims websocket support when the upstream cannot honor it +- **Validation**: + - websocket handler tests or explicit capability tests + +### Task 4.3: Add Session Cleanup And Failure Recovery Semantics +- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [sdk/cliproxy/auth/conductor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/cliproxy/auth/conductor.go) +- **Description**: Add codex-like session cleanup, retry boundaries, and model suspension/resume behavior for GitLab failures and quota events. +- **Dependencies**: Sprint 2 +- **Acceptance Criteria**: + - auth/model cooldown behavior is predictable on GitLab 4xx/5xx/quota responses + - executor cleans up per-session resources if any are introduced +- **Validation**: + - tests for quota and retry behavior + +## Sprint 5: Client UX, Model UX, And Manual E2E +**Goal**: Make GitLab Duo feel like a normal built-in provider to operators and downstream clients. + +**Demo/Validation**: +- A documented setup exists for "login once, point Claude Code at CLIProxyAPI, use GitLab Duo-backed model". + +### Task 5.1: Model Alias And Provider UX Cleanup +- **Location**: [sdk/cliproxy/service.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/cliproxy/service.go), [README.md](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/README.md) +- **Description**: Normalize what users see: + - stable alias such as `gitlab-duo` + - discovered upstream model names + - optional prefix behavior + - account labels that clearly distinguish OAuth vs PAT +- **Dependencies**: Sprint 3 +- **Acceptance Criteria**: + - users can select a stable GitLab alias even when upstream model changes + - dynamic model discovery does not cause confusing model churn +- **Validation**: + - registry tests and manual `/v1/models` inspection + +### Task 5.2: Add Real End-To-End Acceptance Tests +- **Location**: [internal/runtime/executor/gitlab_executor_test.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor_test.go), [sdk/api/handlers/openai](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai) +- **Description**: Add higher-level tests covering the actual proxy surfaces: + - OpenAI `chat/completions` + - OpenAI `responses` + - Claude-compatible request path if GitLab is routed there +- **Dependencies**: Sprint 4 +- **Acceptance Criteria**: + - tests fail if streaming regresses into synthetic buffering again + - tests cover at least one tool-related request and one multi-turn request +- **Validation**: + - `go test ./...` + +### Task 5.3: Publish Operator Documentation +- **Location**: [README.md](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/README.md) +- **Description**: Document: + - OAuth setup requirements + - PAT requirements + - current capability matrix + - known limitations if websocket/tool parity is partial +- **Dependencies**: Sprint 5.1 +- **Acceptance Criteria**: + - setup instructions are enough for a new user to reproduce the GitLab Duo flow + - limitations are explicit +- **Validation**: + - dry-run docs review from a clean environment + +## Testing Strategy +- Keep `go test ./...` green after every committable task. +- Add table-driven tests first for request mapping, refresh behavior, and dynamic model registration. +- Add transport tests with `httptest.Server` for: + - real chunked streaming + - header propagation from `direct_access` + - upstream fallback rules +- Add at least one manual acceptance checklist: + - login via OAuth + - login via PAT + - list models + - run one streaming prompt via OpenAI route + - run one prompt from the target downstream client + +## Potential Risks & Gotchas +- GitLab public docs expose `direct_access`, but do not fully document every possible AI gateway path. We should isolate any empirically discovered gateway assumptions behind one transport layer and feature flags. +- `chat/completions` availability differs by GitLab offering and version. The executor must not assume it always exists. +- Code Suggestions is completion-oriented; lossy mapping from rich chat/tool payloads will make GitLab Duo feel worse than codex unless explicitly handled. +- Synthetic streaming is not good enough for codex parity and will cause regressions in interactive clients. +- Dynamic model discovery can create unstable UX if the stable alias and discovered model IDs are not separated cleanly. +- PAT auth may validate successfully while still lacking effective Duo permissions. Error reporting must surface this explicitly. + +## Rollback Plan +- Keep the current basic GitLab executor behind a fallback mode until the new transport path is stable. +- If parity work destabilizes existing providers, revert only GitLab-specific executor changes and leave auth support intact. +- Preserve the stable `gitlab-duo` alias so rollback does not break client configuration. diff --git a/internal/runtime/executor/gitlab_executor.go b/internal/runtime/executor/gitlab_executor.go index 16441cce..5b2713ae 100644 --- a/internal/runtime/executor/gitlab_executor.go +++ b/internal/runtime/executor/gitlab_executor.go @@ -1,6 +1,7 @@ package executor import ( + "bufio" "bytes" "context" "encoding/json" @@ -14,6 +15,7 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" @@ -26,6 +28,7 @@ const ( gitLabAuthMethodPAT = "pat" gitLabChatEndpoint = "/api/v4/chat/completions" gitLabCodeSuggestionsEndpoint = "/api/v4/code_suggestions/completions" + gitLabSSEStreamingHeader = "X-Supports-Sse-Streaming" ) type GitLabExecutor struct { @@ -40,6 +43,15 @@ type gitLabPrompt struct { CodeSuggestionContext []map[string]any } +type gitLabOpenAIStreamState struct { + ID string + Model string + Created int64 + LastFullText string + Started bool + Finished bool +} + func NewGitLabExecutor(cfg *config.Config) *GitLabExecutor { return &GitLabExecutor{cfg: cfg} } @@ -62,7 +74,7 @@ func (e *GitLabExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r return resp, err } - text, err := e.invoke(ctx, auth, prompt) + text, err := e.invokeText(ctx, auth, prompt) if err != nil { return resp, err } @@ -101,11 +113,16 @@ func (e *GitLabExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A return nil, statusErr{code: http.StatusBadRequest, msg: "gitlab duo executor: request has no usable text content"} } - text, err := e.invoke(ctx, auth, prompt) + if result, streamErr := e.requestCodeSuggestionsStream(ctx, auth, prompt, translated, req, opts, reporter); streamErr == nil { + return result, nil + } else if !shouldFallbackToCodeSuggestions(streamErr) { + return nil, streamErr + } + + text, err := e.invokeText(ctx, auth, prompt) if err != nil { return nil, err } - responseModel := gitLabResolvedModel(auth, req.Model) openAIResponse := buildGitLabOpenAIResponse(responseModel, text, translated) reporter.publish(ctx, parseOpenAIUsage(openAIResponse)) @@ -215,7 +232,7 @@ func (e *GitLabExecutor) translateToOpenAI(req cliproxyexecutor.Request, opts cl return sdktranslator.TranslateRequest(opts.SourceFormat, sdktranslator.FromString("openai"), baseModel, req.Payload, opts.Stream), nil } -func (e *GitLabExecutor) invoke(ctx context.Context, auth *cliproxyauth.Auth, prompt gitLabPrompt) (string, error) { +func (e *GitLabExecutor) invokeText(ctx context.Context, auth *cliproxyauth.Auth, prompt gitLabPrompt) (string, error) { if text, err := e.requestChat(ctx, auth, prompt); err == nil { return text, nil } else if !shouldFallbackToCodeSuggestions(err) { @@ -257,27 +274,189 @@ func (e *GitLabExecutor) requestCodeSuggestions(ctx context.Context, auth *clipr return e.doJSONTextRequest(ctx, auth, gitLabCodeSuggestionsEndpoint, body) } +func (e *GitLabExecutor) requestCodeSuggestionsStream( + ctx context.Context, + auth *cliproxyauth.Auth, + prompt gitLabPrompt, + translated []byte, + req cliproxyexecutor.Request, + opts cliproxyexecutor.Options, + reporter *usageReporter, +) (*cliproxyexecutor.StreamResult, error) { + contentAbove := strings.TrimSpace(prompt.ContentAboveCursor) + if contentAbove == "" { + contentAbove = prompt.Instruction + } + body := map[string]any{ + "current_file": map[string]any{ + "file_name": prompt.FileName, + "content_above_cursor": contentAbove, + "content_below_cursor": "", + }, + "intent": "generation", + "generation_type": "small_file", + "user_instruction": prompt.Instruction, + "stream": true, + } + if len(prompt.CodeSuggestionContext) > 0 { + body["context"] = prompt.CodeSuggestionContext + } + + httpResp, bodyRaw, err := e.doJSONRequest(ctx, auth, gitLabCodeSuggestionsEndpoint, body, "text/event-stream") + if err != nil { + return nil, err + } + if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { + defer func() { _ = httpResp.Body.Close() }() + respBody, readErr := io.ReadAll(httpResp.Body) + if readErr != nil { + recordAPIResponseError(ctx, e.cfg, readErr) + return nil, readErr + } + appendAPIResponseChunk(ctx, e.cfg, respBody) + return nil, statusErr{code: httpResp.StatusCode, msg: strings.TrimSpace(string(respBody))} + } + + responseModel := gitLabResolvedModel(auth, req.Model) + out := make(chan cliproxyexecutor.StreamChunk, 16) + go func() { + defer close(out) + defer func() { _ = httpResp.Body.Close() }() + + scanner := bufio.NewScanner(httpResp.Body) + scanner.Buffer(nil, 52_428_800) + + var ( + param any + eventName string + state gitLabOpenAIStreamState + ) + for scanner.Scan() { + line := bytes.Clone(scanner.Bytes()) + appendAPIResponseChunk(ctx, e.cfg, line) + trimmed := bytes.TrimSpace(line) + if len(trimmed) == 0 { + continue + } + if bytes.HasPrefix(trimmed, []byte("event:")) { + eventName = strings.TrimSpace(string(trimmed[len("event:"):])) + continue + } + if !bytes.HasPrefix(trimmed, []byte("data:")) { + continue + } + payload := bytes.TrimSpace(trimmed[len("data:"):]) + normalized := normalizeGitLabStreamChunk(eventName, payload, responseModel, &state) + eventName = "" + for _, item := range normalized { + if detail, ok := parseOpenAIStreamUsage(item); ok { + reporter.publish(ctx, detail) + } + chunks := sdktranslator.TranslateStream( + ctx, + sdktranslator.FromString("openai"), + opts.SourceFormat, + req.Model, + opts.OriginalRequest, + translated, + item, + ¶m, + ) + for i := range chunks { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])} + } + } + } + if errScan := scanner.Err(); errScan != nil { + recordAPIResponseError(ctx, e.cfg, errScan) + reporter.publishFailure(ctx) + out <- cliproxyexecutor.StreamChunk{Err: errScan} + return + } + if !state.Finished { + for _, item := range finalizeGitLabStream(responseModel, &state) { + chunks := sdktranslator.TranslateStream( + ctx, + sdktranslator.FromString("openai"), + opts.SourceFormat, + req.Model, + opts.OriginalRequest, + translated, + item, + ¶m, + ) + for i := range chunks { + out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])} + } + } + } + reporter.ensurePublished(ctx) + }() + + return &cliproxyexecutor.StreamResult{ + Headers: cloneGitLabStreamHeaders(httpResp.Header, bodyRaw), + Chunks: out, + }, nil +} + func (e *GitLabExecutor) doJSONTextRequest(ctx context.Context, auth *cliproxyauth.Auth, endpoint string, payload map[string]any) (string, error) { + resp, _, err := e.doJSONRequest(ctx, auth, endpoint, payload, "application/json") + if err != nil { + return "", err + } + defer func() { _ = resp.Body.Close() }() + + respBody, err := io.ReadAll(resp.Body) + if err != nil { + recordAPIResponseError(ctx, e.cfg, err) + return "", err + } + appendAPIResponseChunk(ctx, e.cfg, respBody) + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return "", statusErr{code: resp.StatusCode, msg: strings.TrimSpace(string(respBody))} + } + + text, err := parseGitLabTextResponse(endpoint, respBody) + if err != nil { + return "", err + } + return strings.TrimSpace(text), nil +} + +func (e *GitLabExecutor) doJSONRequest( + ctx context.Context, + auth *cliproxyauth.Auth, + endpoint string, + payload map[string]any, + accept string, +) (*http.Response, []byte, error) { token := gitLabPrimaryToken(auth) baseURL := gitLabBaseURL(auth) if token == "" || baseURL == "" { - return "", statusErr{code: http.StatusUnauthorized, msg: "gitlab duo executor: missing credentials"} + return nil, nil, statusErr{code: http.StatusUnauthorized, msg: "gitlab duo executor: missing credentials"} } body, err := json.Marshal(payload) if err != nil { - return "", fmt.Errorf("gitlab duo executor: marshal request failed: %w", err) + return nil, nil, fmt.Errorf("gitlab duo executor: marshal request failed: %w", err) } url := strings.TrimRight(baseURL, "/") + endpoint req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { - return "", err + return nil, nil, err } req.Header.Set("Authorization", "Bearer "+token) req.Header.Set("Content-Type", "application/json") - req.Header.Set("Accept", "application/json") + req.Header.Set("Accept", accept) req.Header.Set("User-Agent", "CLIProxyAPI/GitLab-Duo") + applyGitLabRequestHeaders(req, auth) + if strings.EqualFold(accept, "text/event-stream") { + req.Header.Set("Cache-Control", "no-cache") + req.Header.Set(gitLabSSEStreamingHeader, "true") + req.Header.Set("Accept-Encoding", "identity") + } var authID, authLabel, authType, authValue string if auth != nil { @@ -301,27 +480,10 @@ func (e *GitLabExecutor) doJSONTextRequest(ctx context.Context, auth *cliproxyau resp, err := httpClient.Do(req) if err != nil { recordAPIResponseError(ctx, e.cfg, err) - return "", err + return nil, body, err } - defer func() { _ = resp.Body.Close() }() recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone()) - - respBody, err := io.ReadAll(resp.Body) - if err != nil { - recordAPIResponseError(ctx, e.cfg, err) - return "", err - } - appendAPIResponseChunk(ctx, e.cfg, respBody) - - if resp.StatusCode < 200 || resp.StatusCode >= 300 { - return "", statusErr{code: resp.StatusCode, msg: strings.TrimSpace(string(respBody))} - } - - text, err := parseGitLabTextResponse(endpoint, respBody) - if err != nil { - return "", err - } - return strings.TrimSpace(text), nil + return resp, body, nil } func (e *GitLabExecutor) refreshOAuthToken(ctx context.Context, client *gitlab.AuthClient, auth *cliproxyauth.Auth, baseURL string) (*gitlab.TokenResponse, error) { @@ -455,6 +617,236 @@ func parseGitLabTextResponse(endpoint string, body []byte) (string, error) { return "", fmt.Errorf("gitlab duo executor: upstream returned no text payload") } +func applyGitLabRequestHeaders(req *http.Request, auth *cliproxyauth.Auth) { + if req == nil { + return + } + if auth != nil { + util.ApplyCustomHeadersFromAttrs(req, auth.Attributes) + } + for key, value := range gitLabGatewayHeaders(auth) { + if key == "" || value == "" { + continue + } + req.Header.Set(key, value) + } +} + +func gitLabGatewayHeaders(auth *cliproxyauth.Auth) map[string]string { + if auth == nil || auth.Metadata == nil { + return nil + } + raw, ok := auth.Metadata["duo_gateway_headers"] + if !ok { + return nil + } + out := make(map[string]string) + switch typed := raw.(type) { + case map[string]string: + for key, value := range typed { + key = strings.TrimSpace(key) + value = strings.TrimSpace(value) + if key != "" && value != "" { + out[key] = value + } + } + case map[string]any: + for key, value := range typed { + key = strings.TrimSpace(key) + if key == "" { + continue + } + strValue := strings.TrimSpace(fmt.Sprint(value)) + if strValue != "" { + out[key] = strValue + } + } + } + if len(out) == 0 { + return nil + } + return out +} + +func cloneGitLabStreamHeaders(headers http.Header, _ []byte) http.Header { + cloned := headers.Clone() + if cloned == nil { + cloned = make(http.Header) + } + cloned.Set("Content-Type", "text/event-stream") + return cloned +} + +func normalizeGitLabStreamChunk(eventName string, payload []byte, fallbackModel string, state *gitLabOpenAIStreamState) [][]byte { + payload = bytes.TrimSpace(payload) + if len(payload) == 0 { + return nil + } + if bytes.Equal(payload, []byte("[DONE]")) { + return finalizeGitLabStream(fallbackModel, state) + } + + root := gjson.ParseBytes(payload) + if root.Exists() { + if obj := root.Get("object").String(); obj == "chat.completion.chunk" { + return [][]byte{append([]byte("data: "), bytes.Clone(payload)...)} + } + if root.Get("choices.0.delta").Exists() || root.Get("choices.0.finish_reason").Exists() { + return [][]byte{append([]byte("data: "), bytes.Clone(payload)...)} + } + } + + state.ensureInitialized(fallbackModel, root) + + switch strings.TrimSpace(eventName) { + case "stream_end": + return finalizeGitLabStream(fallbackModel, state) + case "stream_start": + if text := extractGitLabStreamText(root); text != "" { + return state.emitText(text) + } + return nil + } + + if done := root.Get("done"); done.Exists() && done.Bool() { + return finalizeGitLabStream(fallbackModel, state) + } + if finishReason := strings.TrimSpace(root.Get("finish_reason").String()); finishReason != "" { + out := state.emitText(extractGitLabStreamText(root)) + return append(out, state.finish(finishReason)...) + } + + return state.emitText(extractGitLabStreamText(root)) +} + +func extractGitLabStreamText(root gjson.Result) string { + for _, key := range []string{ + "choices.0.delta.content", + "choices.0.text", + "delta.content", + "content_chunk", + "content", + "text", + "response", + "completion", + } { + if value := root.Get(key).String(); strings.TrimSpace(value) != "" { + return value + } + } + return "" +} + +func finalizeGitLabStream(fallbackModel string, state *gitLabOpenAIStreamState) [][]byte { + if state == nil { + return nil + } + state.ensureInitialized(fallbackModel, gjson.Result{}) + return state.finish("stop") +} + +func (s *gitLabOpenAIStreamState) ensureInitialized(fallbackModel string, root gjson.Result) { + if s == nil { + return + } + if s.ID == "" { + s.ID = fmt.Sprintf("gitlab-%d", time.Now().UnixNano()) + } + if s.Created == 0 { + s.Created = time.Now().Unix() + } + if s.Model == "" { + for _, key := range []string{"model.name", "model", "metadata.model_name"} { + if value := strings.TrimSpace(root.Get(key).String()); value != "" { + s.Model = value + break + } + } + } + if s.Model == "" { + s.Model = fallbackModel + } +} + +func (s *gitLabOpenAIStreamState) emitText(text string) [][]byte { + if s == nil { + return nil + } + if strings.TrimSpace(text) == "" { + return nil + } + delta := s.nextDelta(text) + if delta == "" { + return nil + } + out := make([][]byte, 0, 2) + if !s.Started { + out = append(out, s.buildChunk(map[string]any{"role": "assistant"}, "")) + s.Started = true + } + out = append(out, s.buildChunk(map[string]any{"content": delta}, "")) + return out +} + +func (s *gitLabOpenAIStreamState) finish(reason string) [][]byte { + if s == nil || s.Finished { + return nil + } + if !s.Started { + s.Started = true + } + s.Finished = true + return [][]byte{ + s.buildChunk(map[string]any{}, reason), + []byte("data: [DONE]"), + } +} + +func (s *gitLabOpenAIStreamState) nextDelta(text string) string { + if s == nil { + return text + } + if strings.TrimSpace(text) == "" { + return "" + } + if s.LastFullText == "" { + s.LastFullText = text + return text + } + if text == s.LastFullText { + return "" + } + if strings.HasPrefix(text, s.LastFullText) { + delta := text[len(s.LastFullText):] + s.LastFullText = text + return delta + } + s.LastFullText += text + return text +} + +func (s *gitLabOpenAIStreamState) buildChunk(delta map[string]any, finishReason string) []byte { + payload := map[string]any{ + "id": s.ID, + "object": "chat.completion.chunk", + "created": s.Created, + "model": s.Model, + "choices": []map[string]any{{ + "index": 0, + "delta": delta, + }}, + } + if finishReason != "" { + payload["choices"] = []map[string]any{{ + "index": 0, + "delta": delta, + "finish_reason": finishReason, + }} + } + raw, _ := json.Marshal(payload) + return append([]byte("data: "), raw...) +} + func shouldFallbackToCodeSuggestions(err error) bool { if err == nil { return false diff --git a/internal/runtime/executor/gitlab_executor_test.go b/internal/runtime/executor/gitlab_executor_test.go index 89eecaea..3836cd88 100644 --- a/internal/runtime/executor/gitlab_executor_test.go +++ b/internal/runtime/executor/gitlab_executor_test.go @@ -3,8 +3,10 @@ package executor import ( "context" "encoding/json" + "io" "net/http" "net/http/httptest" + "strings" "testing" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" @@ -153,3 +155,147 @@ func TestGitLabExecutorRefreshUpdatesMetadata(t *testing.T) { t.Fatalf("expected refreshed model metadata, got %#v", got) } } + +func TestGitLabExecutorExecuteStreamUsesCodeSuggestionsSSE(t *testing.T) { + var gotAccept, gotStreamingHeader, gotEncoding string + var gotStreamFlag bool + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != gitLabCodeSuggestionsEndpoint { + t.Fatalf("unexpected path %q", r.URL.Path) + } + gotAccept = r.Header.Get("Accept") + gotStreamingHeader = r.Header.Get(gitLabSSEStreamingHeader) + gotEncoding = r.Header.Get("Accept-Encoding") + gotStreamFlag = gjson.GetBytes(readBody(t, r), "stream").Bool() + + w.Header().Set("Content-Type", "text/event-stream") + _, _ = w.Write([]byte("event: stream_start\n")) + _, _ = w.Write([]byte("data: {\"model\":{\"name\":\"claude-sonnet-4-5\"}}\n\n")) + _, _ = w.Write([]byte("event: content_chunk\n")) + _, _ = w.Write([]byte("data: {\"content\":\"hello\"}\n\n")) + _, _ = w.Write([]byte("event: content_chunk\n")) + _, _ = w.Write([]byte("data: {\"content\":\" world\"}\n\n")) + _, _ = w.Write([]byte("event: stream_end\n")) + _, _ = w.Write([]byte("data: {}\n\n")) + })) + defer srv.Close() + + exec := NewGitLabExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{ + Provider: "gitlab", + Metadata: map[string]any{ + "base_url": srv.URL, + "access_token": "oauth-access", + "model_name": "claude-sonnet-4-5", + }, + } + req := cliproxyexecutor.Request{ + Model: "gitlab-duo", + Payload: []byte(`{"model":"gitlab-duo","stream":true,"messages":[{"role":"user","content":"hello"}]}`), + } + + result, err := exec.ExecuteStream(context.Background(), auth, req, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai"), + }) + if err != nil { + t.Fatalf("ExecuteStream() error = %v", err) + } + + lines := collectStreamLines(t, result) + if gotAccept != "text/event-stream" { + t.Fatalf("Accept = %q, want text/event-stream", gotAccept) + } + if gotStreamingHeader != "true" { + t.Fatalf("%s = %q, want true", gitLabSSEStreamingHeader, gotStreamingHeader) + } + if gotEncoding != "identity" { + t.Fatalf("Accept-Encoding = %q, want identity", gotEncoding) + } + if !gotStreamFlag { + t.Fatalf("expected upstream request to set stream=true") + } + if len(lines) < 4 { + t.Fatalf("expected translated stream chunks, got %d", len(lines)) + } + if !strings.Contains(strings.Join(lines, "\n"), `"content":"hello"`) { + t.Fatalf("expected hello delta in stream, got %q", strings.Join(lines, "\n")) + } + if !strings.Contains(strings.Join(lines, "\n"), `"content":" world"`) { + t.Fatalf("expected world delta in stream, got %q", strings.Join(lines, "\n")) + } + if last := lines[len(lines)-1]; last != "data: [DONE]" { + t.Fatalf("expected stream terminator, got %q", last) + } +} + +func TestGitLabExecutorExecuteStreamFallsBackToSyntheticChat(t *testing.T) { + chatCalls := 0 + streamCalls := 0 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case gitLabCodeSuggestionsEndpoint: + streamCalls++ + http.Error(w, "feature unavailable", http.StatusForbidden) + case gitLabChatEndpoint: + chatCalls++ + _, _ = w.Write([]byte(`"chat fallback response"`)) + default: + t.Fatalf("unexpected path %q", r.URL.Path) + } + })) + defer srv.Close() + + exec := NewGitLabExecutor(&config.Config{}) + auth := &cliproxyauth.Auth{ + Provider: "gitlab", + Metadata: map[string]any{ + "base_url": srv.URL, + "access_token": "oauth-access", + "model_name": "claude-sonnet-4-5", + }, + } + req := cliproxyexecutor.Request{ + Model: "gitlab-duo", + Payload: []byte(`{"model":"gitlab-duo","stream":true,"messages":[{"role":"user","content":"hello"}]}`), + } + + result, err := exec.ExecuteStream(context.Background(), auth, req, cliproxyexecutor.Options{ + SourceFormat: sdktranslator.FromString("openai"), + }) + if err != nil { + t.Fatalf("ExecuteStream() error = %v", err) + } + + lines := collectStreamLines(t, result) + if streamCalls != 1 { + t.Fatalf("expected streaming endpoint once, got %d", streamCalls) + } + if chatCalls != 1 { + t.Fatalf("expected chat fallback once, got %d", chatCalls) + } + if !strings.Contains(strings.Join(lines, "\n"), `"content":"chat fallback response"`) { + t.Fatalf("expected fallback content in stream, got %q", strings.Join(lines, "\n")) + } +} + +func collectStreamLines(t *testing.T, result *cliproxyexecutor.StreamResult) []string { + t.Helper() + lines := make([]string, 0, 8) + for chunk := range result.Chunks { + if chunk.Err != nil { + t.Fatalf("unexpected stream error: %v", chunk.Err) + } + lines = append(lines, string(chunk.Payload)) + } + return lines +} + +func readBody(t *testing.T, r *http.Request) []byte { + t.Helper() + defer func() { _ = r.Body.Close() }() + body, err := io.ReadAll(r.Body) + if err != nil { + t.Fatalf("ReadAll() error = %v", err) + } + return body +}