feat(gitlab): add duo streaming transport

2026-04-20 14:41:49 +00:00 · 2026-03-10 18:39:25 +04:00
parent 54c3eb1b1e
commit c631df8c3b
3 changed files with 843 additions and 27 deletions
--- a/gitlab-duo-codex-parity-plan.md
+++ b/gitlab-duo-codex-parity-plan.md
@@ -0,0 +1,278 @@
+# Plan: GitLab Duo Codex Parity
+
+**Generated**: 2026-03-10
+**Estimated Complexity**: High
+
+## Overview
+Bring GitLab Duo support from the current "auth + basic executor" stage to the same practical level as `codex` inside `CLIProxyAPI`: a user logs in once, points external clients such as Claude Code at `CLIProxyAPI`, selects GitLab Duo-backed models, and gets stable streaming, multi-turn behavior, tool calling compatibility, and predictable model routing without manual provider-specific workarounds.
+
+The core architectural shift is to stop treating GitLab Duo as only two REST wrappers (`/api/v4/chat/completions` and `/api/v4/code_suggestions/completions`) and instead use GitLab's `direct_access` contract as the primary runtime entrypoint wherever possible. Official GitLab docs confirm that `direct_access` returns AI gateway connection details, headers, token, and expiry; that contract is the closest path to codex-like provider behavior.
+
+## Prerequisites
+- Official GitLab Duo API references confirmed during implementation:
+  - `POST /api/v4/code_suggestions/direct_access`
+  - `POST /api/v4/code_suggestions/completions`
+  - `POST /api/v4/chat/completions`
+- Access to at least one real GitLab Duo account for manual verification.
+- One downstream client target for acceptance testing:
+  - Claude Code against Claude-compatible endpoint
+  - OpenAI-compatible client against `/v1/chat/completions` and `/v1/responses`
+- Existing PR branch as starting point:
+  - `feat/gitlab-duo-auth`
+  - PR [#2028](https://github.com/router-for-me/CLIProxyAPI/pull/2028)
+
+## Definition Of Done
+- GitLab Duo models can be used via `CLIProxyAPI` from the same client surfaces that already work for `codex`.
+- Upstream streaming is real passthrough or faithful chunked forwarding, not synthetic whole-response replay.
+- Tool/function calling survives translation layers without dropping fields or corrupting names.
+- Multi-turn and session semantics are stable across `chat/completions`, `responses`, and Claude-compatible routes.
+- Model exposure stays current from GitLab metadata or gateway discovery without hardcoded stale model tables.
+- `go test ./...` stays green and at least one real manual end-to-end client flow is documented.
+
+## Sprint 1: Contract And Gap Closure
+**Goal**: Replace assumptions with a hard compatibility contract between current `codex` behavior and what GitLab Duo can actually support.
+
+**Demo/Validation**:
+- Written matrix showing `codex` features vs current GitLab Duo behavior.
+- One checked-in developer note or test fixture for real GitLab Duo payload examples.
+
+### Task 1.1: Freeze Codex Parity Checklist
+- **Location**: [internal/runtime/executor/codex_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/codex_executor.go), [internal/runtime/executor/codex_websockets_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/codex_websockets_executor.go), [sdk/api/handlers/openai/openai_responses_handlers.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_handlers.go), [sdk/api/handlers/openai/openai_responses_websocket.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_websocket.go)
+- **Description**: Produce a concrete feature matrix for `codex`: HTTP execute, SSE execute, `/v1/responses`, websocket downstream path, tool calling, request IDs, session close semantics, and model registration behavior.
+- **Dependencies**: None
+- **Acceptance Criteria**:
+  - A checklist exists in repo docs or issue notes.
+  - Each capability is marked `required`, `optional`, or `not possible` for GitLab Duo.
+- **Validation**:
+  - Review against current `codex` code paths.
+
+### Task 1.2: Lock GitLab Duo Runtime Contract
+- **Location**: [internal/auth/gitlab/gitlab.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/auth/gitlab/gitlab.go), [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go)
+- **Description**: Validate the exact upstream contract we can rely on:
+  - `direct_access` fields and refresh cadence
+  - whether AI gateway path is usable directly
+  - when `chat/completions` is available vs when fallback is required
+  - what streaming shape is returned by `code_suggestions/completions?stream=true`
+- **Dependencies**: Task 1.1
+- **Acceptance Criteria**:
+  - GitLab transport decision is explicit: `gateway-first`, `REST-first`, or `hybrid`.
+  - Unknown areas are isolated behind feature flags, not spread across executor logic.
+- **Validation**:
+  - Official docs + captured real responses from a Duo account.
+
+### Task 1.3: Define Client-Facing Compatibility Targets
+- **Location**: [README.md](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/README.md), [gitlab-duo-codex-parity-plan.md](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/gitlab-duo-codex-parity-plan.md)
+- **Description**: Define exactly which external flows must work to call GitLab Duo support "like codex".
+- **Dependencies**: Task 1.2
+- **Acceptance Criteria**:
+  - Required surfaces are listed:
+    - Claude-compatible route
+    - OpenAI `chat/completions`
+    - OpenAI `responses`
+    - optional downstream websocket path
+  - Non-goals are explicit if GitLab upstream cannot support them.
+- **Validation**:
+  - Maintainer review of stated scope.
+
+## Sprint 2: Primary Transport Parity
+**Goal**: Move GitLab Duo execution onto a transport that supports codex-like runtime behavior.
+
+**Demo/Validation**:
+- A GitLab Duo model works over real streaming through `/v1/chat/completions`.
+- No synthetic "collect full body then fake stream" path remains on the primary flow.
+
+### Task 2.1: Refactor GitLab Executor Into Strategy Layers
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go)
+- **Description**: Split current executor into explicit strategies:
+  - auth refresh/direct access refresh
+  - gateway transport
+  - GitLab REST fallback transport
+  - downstream translation helpers
+- **Dependencies**: Sprint 1
+- **Acceptance Criteria**:
+  - Executor no longer mixes discovery, refresh, fallback selection, and response synthesis in one path.
+  - Transport choice is testable in isolation.
+- **Validation**:
+  - Unit tests for strategy selection and fallback boundaries.
+
+### Task 2.2: Implement Real Streaming Path
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [internal/runtime/executor/gitlab_executor_test.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor_test.go)
+- **Description**: Replace synthetic streaming with true upstream incremental forwarding:
+  - use gateway stream if available
+  - otherwise consume GitLab Code Suggestions streaming response and map chunks incrementally
+- **Dependencies**: Task 2.1
+- **Acceptance Criteria**:
+  - `ExecuteStream` emits chunks before upstream completion.
+  - error handling preserves status and early failure semantics.
+- **Validation**:
+  - tests with chunked upstream server
+  - manual curl check against `/v1/chat/completions` with `stream=true`
+
+### Task 2.3: Preserve Upstream Auth And Headers Correctly
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [internal/auth/gitlab/gitlab.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/auth/gitlab/gitlab.go)
+- **Description**: Use `direct_access` connection details as first-class transport state:
+  - gateway token
+  - expiry
+  - mandatory forwarded headers
+  - model metadata
+- **Dependencies**: Task 2.1
+- **Acceptance Criteria**:
+  - executor stops ignoring gateway headers/token when transport requires them
+  - refresh logic never over-fetches `direct_access`
+- **Validation**:
+  - tests verifying propagated headers and refresh interval behavior
+
+## Sprint 3: Request/Response Semantics Parity
+**Goal**: Make GitLab Duo behave correctly under the same request shapes that current `codex` consumers send.
+
+**Demo/Validation**:
+- OpenAI and Claude-compatible clients can do non-streaming and streaming conversations without losing structure.
+
+### Task 3.1: Normalize Multi-Turn Message Mapping
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [sdk/translator](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/translator)
+- **Description**: Replace the current "flatten prompt into one instruction" behavior with stable multi-turn mapping:
+  - preserve system context
+  - preserve user/assistant ordering
+  - maintain bounded context truncation
+- **Dependencies**: Sprint 2
+- **Acceptance Criteria**:
+  - multi-turn requests are not collapsed into a lossy single string unless fallback mode explicitly requires it
+  - truncation policy is deterministic and tested
+- **Validation**:
+  - golden tests for request mapping
+
+### Task 3.2: Tool Calling Compatibility Layer
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [sdk/api/handlers/openai/openai_responses_handlers.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_handlers.go)
+- **Description**: Decide and implement one of two paths:
+  - native pass-through if GitLab gateway supports tool/function structures
+  - strict downgrade path with explicit unsupported errors instead of silent field loss
+- **Dependencies**: Task 3.1
+- **Acceptance Criteria**:
+  - tool-related fields are either preserved correctly or rejected explicitly
+  - no silent corruption of tool names, tool calls, or tool results
+- **Validation**:
+  - table-driven tests for tool payloads
+  - one manual client scenario using tools
+
+### Task 3.3: Token Counting And Usage Reporting Fidelity
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [internal/runtime/executor/usage_helpers.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/usage_helpers.go)
+- **Description**: Improve token/usage reporting so GitLab models behave like first-class providers in logs and scheduling.
+- **Dependencies**: Sprint 2
+- **Acceptance Criteria**:
+  - `CountTokens` uses the closest supported estimation path
+  - usage logging distinguishes prompt vs completion when possible
+- **Validation**:
+  - unit tests for token estimation outputs
+
+## Sprint 4: Responses And Session Parity
+**Goal**: Reach codex-level support for OpenAI Responses clients and long-lived sessions where GitLab upstream permits it.
+
+**Demo/Validation**:
+- `/v1/responses` works with GitLab Duo in a realistic client flow.
+- If websocket parity is not possible, the code explicitly declines it and keeps HTTP paths stable.
+
+### Task 4.1: Make GitLab Compatible With `/v1/responses`
+- **Location**: [sdk/api/handlers/openai/openai_responses_handlers.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_handlers.go), [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go)
+- **Description**: Ensure GitLab transport can safely back the Responses API path, including compact responses if applicable.
+- **Dependencies**: Sprint 3
+- **Acceptance Criteria**:
+  - GitLab Duo can be selected behind `/v1/responses`
+  - response IDs and follow-up semantics are defined
+- **Validation**:
+  - handler tests analogous to codex/openai responses tests
+
+### Task 4.2: Evaluate Downstream Websocket Parity
+- **Location**: [sdk/api/handlers/openai/openai_responses_websocket.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai/openai_responses_websocket.go), [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go)
+- **Description**: Decide whether GitLab Duo can support downstream websocket sessions like codex:
+  - if yes, add session-aware execution path
+  - if no, mark GitLab auth as websocket-ineligible and keep HTTP routes first-class
+- **Dependencies**: Task 4.1
+- **Acceptance Criteria**:
+  - websocket behavior is explicit, not accidental
+  - no route claims websocket support when the upstream cannot honor it
+- **Validation**:
+  - websocket handler tests or explicit capability tests
+
+### Task 4.3: Add Session Cleanup And Failure Recovery Semantics
+- **Location**: [internal/runtime/executor/gitlab_executor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor.go), [sdk/cliproxy/auth/conductor.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/cliproxy/auth/conductor.go)
+- **Description**: Add codex-like session cleanup, retry boundaries, and model suspension/resume behavior for GitLab failures and quota events.
+- **Dependencies**: Sprint 2
+- **Acceptance Criteria**:
+  - auth/model cooldown behavior is predictable on GitLab 4xx/5xx/quota responses
+  - executor cleans up per-session resources if any are introduced
+- **Validation**:
+  - tests for quota and retry behavior
+
+## Sprint 5: Client UX, Model UX, And Manual E2E
+**Goal**: Make GitLab Duo feel like a normal built-in provider to operators and downstream clients.
+
+**Demo/Validation**:
+- A documented setup exists for "login once, point Claude Code at CLIProxyAPI, use GitLab Duo-backed model".
+
+### Task 5.1: Model Alias And Provider UX Cleanup
+- **Location**: [sdk/cliproxy/service.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/cliproxy/service.go), [README.md](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/README.md)
+- **Description**: Normalize what users see:
+  - stable alias such as `gitlab-duo`
+  - discovered upstream model names
+  - optional prefix behavior
+  - account labels that clearly distinguish OAuth vs PAT
+- **Dependencies**: Sprint 3
+- **Acceptance Criteria**:
+  - users can select a stable GitLab alias even when upstream model changes
+  - dynamic model discovery does not cause confusing model churn
+- **Validation**:
+  - registry tests and manual `/v1/models` inspection
+
+### Task 5.2: Add Real End-To-End Acceptance Tests
+- **Location**: [internal/runtime/executor/gitlab_executor_test.go](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/internal/runtime/executor/gitlab_executor_test.go), [sdk/api/handlers/openai](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/sdk/api/handlers/openai)
+- **Description**: Add higher-level tests covering the actual proxy surfaces:
+  - OpenAI `chat/completions`
+  - OpenAI `responses`
+  - Claude-compatible request path if GitLab is routed there
+- **Dependencies**: Sprint 4
+- **Acceptance Criteria**:
+  - tests fail if streaming regresses into synthetic buffering again
+  - tests cover at least one tool-related request and one multi-turn request
+- **Validation**:
+  - `go test ./...`
+
+### Task 5.3: Publish Operator Documentation
+- **Location**: [README.md](/home/luxvtz/projects/cliproxyapi/CLIProxyAPI/README.md)
+- **Description**: Document:
+  - OAuth setup requirements
+  - PAT requirements
+  - current capability matrix
+  - known limitations if websocket/tool parity is partial
+- **Dependencies**: Sprint 5.1
+- **Acceptance Criteria**:
+  - setup instructions are enough for a new user to reproduce the GitLab Duo flow
+  - limitations are explicit
+- **Validation**:
+  - dry-run docs review from a clean environment
+
+## Testing Strategy
+- Keep `go test ./...` green after every committable task.
+- Add table-driven tests first for request mapping, refresh behavior, and dynamic model registration.
+- Add transport tests with `httptest.Server` for:
+  - real chunked streaming
+  - header propagation from `direct_access`
+  - upstream fallback rules
+- Add at least one manual acceptance checklist:
+  - login via OAuth
+  - login via PAT
+  - list models
+  - run one streaming prompt via OpenAI route
+  - run one prompt from the target downstream client
+
+## Potential Risks & Gotchas
+- GitLab public docs expose `direct_access`, but do not fully document every possible AI gateway path. We should isolate any empirically discovered gateway assumptions behind one transport layer and feature flags.
+- `chat/completions` availability differs by GitLab offering and version. The executor must not assume it always exists.
+- Code Suggestions is completion-oriented; lossy mapping from rich chat/tool payloads will make GitLab Duo feel worse than codex unless explicitly handled.
+- Synthetic streaming is not good enough for codex parity and will cause regressions in interactive clients.
+- Dynamic model discovery can create unstable UX if the stable alias and discovered model IDs are not separated cleanly.
+- PAT auth may validate successfully while still lacking effective Duo permissions. Error reporting must surface this explicitly.
+
+## Rollback Plan
+- Keep the current basic GitLab executor behind a fallback mode until the new transport path is stable.
+- If parity work destabilizes existing providers, revert only GitLab-specific executor changes and leave auth support intact.
+- Preserve the stable `gitlab-duo` alias so rollback does not break client configuration.
--- a/internal/runtime/executor/gitlab_executor.go
+++ b/internal/runtime/executor/gitlab_executor.go
@@ -1,6 +1,7 @@
 package executor

 import (
+	"bufio"
 	"bytes"
 	"context"
 	"encoding/json"
@@ -14,6 +15,7 @@ import (
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
 	sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator"
@@ -26,6 +28,7 @@ const (
 	gitLabAuthMethodPAT           = "pat"
 	gitLabChatEndpoint            = "/api/v4/chat/completions"
 	gitLabCodeSuggestionsEndpoint = "/api/v4/code_suggestions/completions"
+	gitLabSSEStreamingHeader      = "X-Supports-Sse-Streaming"
 )

 type GitLabExecutor struct {
@@ -40,6 +43,15 @@ type gitLabPrompt struct {
 	CodeSuggestionContext []map[string]any
 }

+type gitLabOpenAIStreamState struct {
+	ID           string
+	Model        string
+	Created      int64
+	LastFullText string
+	Started      bool
+	Finished     bool
+}
+
 func NewGitLabExecutor(cfg *config.Config) *GitLabExecutor {
 	return &GitLabExecutor{cfg: cfg}
 }
@@ -62,7 +74,7 @@ func (e *GitLabExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		return resp, err
 	}

-	text, err := e.invoke(ctx, auth, prompt)
+	text, err := e.invokeText(ctx, auth, prompt)
 	if err != nil {
 		return resp, err
 	}
@@ -101,11 +113,16 @@ func (e *GitLabExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 		return nil, statusErr{code: http.StatusBadRequest, msg: "gitlab duo executor: request has no usable text content"}
 	}

-	text, err := e.invoke(ctx, auth, prompt)
+	if result, streamErr := e.requestCodeSuggestionsStream(ctx, auth, prompt, translated, req, opts, reporter); streamErr == nil {
+		return result, nil
+	} else if !shouldFallbackToCodeSuggestions(streamErr) {
+		return nil, streamErr
+	}
+
+	text, err := e.invokeText(ctx, auth, prompt)
 	if err != nil {
 		return nil, err
 	}
-
 	responseModel := gitLabResolvedModel(auth, req.Model)
 	openAIResponse := buildGitLabOpenAIResponse(responseModel, text, translated)
 	reporter.publish(ctx, parseOpenAIUsage(openAIResponse))
@@ -215,7 +232,7 @@ func (e *GitLabExecutor) translateToOpenAI(req cliproxyexecutor.Request, opts cl
 	return sdktranslator.TranslateRequest(opts.SourceFormat, sdktranslator.FromString("openai"), baseModel, req.Payload, opts.Stream), nil
 }

-func (e *GitLabExecutor) invoke(ctx context.Context, auth *cliproxyauth.Auth, prompt gitLabPrompt) (string, error) {
+func (e *GitLabExecutor) invokeText(ctx context.Context, auth *cliproxyauth.Auth, prompt gitLabPrompt) (string, error) {
 	if text, err := e.requestChat(ctx, auth, prompt); err == nil {
 		return text, nil
 	} else if !shouldFallbackToCodeSuggestions(err) {
@@ -257,27 +274,189 @@ func (e *GitLabExecutor) requestCodeSuggestions(ctx context.Context, auth *clipr
 	return e.doJSONTextRequest(ctx, auth, gitLabCodeSuggestionsEndpoint, body)
 }

+func (e *GitLabExecutor) requestCodeSuggestionsStream(
+	ctx context.Context,
+	auth *cliproxyauth.Auth,
+	prompt gitLabPrompt,
+	translated []byte,
+	req cliproxyexecutor.Request,
+	opts cliproxyexecutor.Options,
+	reporter *usageReporter,
+) (*cliproxyexecutor.StreamResult, error) {
+	contentAbove := strings.TrimSpace(prompt.ContentAboveCursor)
+	if contentAbove == "" {
+		contentAbove = prompt.Instruction
+	}
+	body := map[string]any{
+		"current_file": map[string]any{
+			"file_name":            prompt.FileName,
+			"content_above_cursor": contentAbove,
+			"content_below_cursor": "",
+		},
+		"intent":           "generation",
+		"generation_type":  "small_file",
+		"user_instruction": prompt.Instruction,
+		"stream":           true,
+	}
+	if len(prompt.CodeSuggestionContext) > 0 {
+		body["context"] = prompt.CodeSuggestionContext
+	}
+
+	httpResp, bodyRaw, err := e.doJSONRequest(ctx, auth, gitLabCodeSuggestionsEndpoint, body, "text/event-stream")
+	if err != nil {
+		return nil, err
+	}
+	if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
+		defer func() { _ = httpResp.Body.Close() }()
+		respBody, readErr := io.ReadAll(httpResp.Body)
+		if readErr != nil {
+			recordAPIResponseError(ctx, e.cfg, readErr)
+			return nil, readErr
+		}
+		appendAPIResponseChunk(ctx, e.cfg, respBody)
+		return nil, statusErr{code: httpResp.StatusCode, msg: strings.TrimSpace(string(respBody))}
+	}
+
+	responseModel := gitLabResolvedModel(auth, req.Model)
+	out := make(chan cliproxyexecutor.StreamChunk, 16)
+	go func() {
+		defer close(out)
+		defer func() { _ = httpResp.Body.Close() }()
+
+		scanner := bufio.NewScanner(httpResp.Body)
+		scanner.Buffer(nil, 52_428_800)
+
+		var (
+			param     any
+			eventName string
+			state     gitLabOpenAIStreamState
+		)
+		for scanner.Scan() {
+			line := bytes.Clone(scanner.Bytes())
+			appendAPIResponseChunk(ctx, e.cfg, line)
+			trimmed := bytes.TrimSpace(line)
+			if len(trimmed) == 0 {
+				continue
+			}
+			if bytes.HasPrefix(trimmed, []byte("event:")) {
+				eventName = strings.TrimSpace(string(trimmed[len("event:"):]))
+				continue
+			}
+			if !bytes.HasPrefix(trimmed, []byte("data:")) {
+				continue
+			}
+			payload := bytes.TrimSpace(trimmed[len("data:"):])
+			normalized := normalizeGitLabStreamChunk(eventName, payload, responseModel, &state)
+			eventName = ""
+			for _, item := range normalized {
+				if detail, ok := parseOpenAIStreamUsage(item); ok {
+					reporter.publish(ctx, detail)
+				}
+				chunks := sdktranslator.TranslateStream(
+					ctx,
+					sdktranslator.FromString("openai"),
+					opts.SourceFormat,
+					req.Model,
+					opts.OriginalRequest,
+					translated,
+					item,
+					&param,
+				)
+				for i := range chunks {
+					out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
+				}
+			}
+		}
+		if errScan := scanner.Err(); errScan != nil {
+			recordAPIResponseError(ctx, e.cfg, errScan)
+			reporter.publishFailure(ctx)
+			out <- cliproxyexecutor.StreamChunk{Err: errScan}
+			return
+		}
+		if !state.Finished {
+			for _, item := range finalizeGitLabStream(responseModel, &state) {
+				chunks := sdktranslator.TranslateStream(
+					ctx,
+					sdktranslator.FromString("openai"),
+					opts.SourceFormat,
+					req.Model,
+					opts.OriginalRequest,
+					translated,
+					item,
+					&param,
+				)
+				for i := range chunks {
+					out <- cliproxyexecutor.StreamChunk{Payload: []byte(chunks[i])}
+				}
+			}
+		}
+		reporter.ensurePublished(ctx)
+	}()
+
+	return &cliproxyexecutor.StreamResult{
+		Headers: cloneGitLabStreamHeaders(httpResp.Header, bodyRaw),
+		Chunks:  out,
+	}, nil
+}
+
 func (e *GitLabExecutor) doJSONTextRequest(ctx context.Context, auth *cliproxyauth.Auth, endpoint string, payload map[string]any) (string, error) {
+	resp, _, err := e.doJSONRequest(ctx, auth, endpoint, payload, "application/json")
+	if err != nil {
+		return "", err
+	}
+	defer func() { _ = resp.Body.Close() }()
+
+	respBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		recordAPIResponseError(ctx, e.cfg, err)
+		return "", err
+	}
+	appendAPIResponseChunk(ctx, e.cfg, respBody)
+
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return "", statusErr{code: resp.StatusCode, msg: strings.TrimSpace(string(respBody))}
+	}
+
+	text, err := parseGitLabTextResponse(endpoint, respBody)
+	if err != nil {
+		return "", err
+	}
+	return strings.TrimSpace(text), nil
+}
+
+func (e *GitLabExecutor) doJSONRequest(
+	ctx context.Context,
+	auth *cliproxyauth.Auth,
+	endpoint string,
+	payload map[string]any,
+	accept string,
+) (*http.Response, []byte, error) {
 	token := gitLabPrimaryToken(auth)
 	baseURL := gitLabBaseURL(auth)
 	if token == "" || baseURL == "" {
-		return "", statusErr{code: http.StatusUnauthorized, msg: "gitlab duo executor: missing credentials"}
+		return nil, nil, statusErr{code: http.StatusUnauthorized, msg: "gitlab duo executor: missing credentials"}
 	}

 	body, err := json.Marshal(payload)
 	if err != nil {
-		return "", fmt.Errorf("gitlab duo executor: marshal request failed: %w", err)
+		return nil, nil, fmt.Errorf("gitlab duo executor: marshal request failed: %w", err)
 	}

 	url := strings.TrimRight(baseURL, "/") + endpoint
 	req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
 	if err != nil {
-		return "", err
+		return nil, nil, err
 	}
 	req.Header.Set("Authorization", "Bearer "+token)
 	req.Header.Set("Content-Type", "application/json")
-	req.Header.Set("Accept", "application/json")
+	req.Header.Set("Accept", accept)
 	req.Header.Set("User-Agent", "CLIProxyAPI/GitLab-Duo")
+	applyGitLabRequestHeaders(req, auth)
+	if strings.EqualFold(accept, "text/event-stream") {
+		req.Header.Set("Cache-Control", "no-cache")
+		req.Header.Set(gitLabSSEStreamingHeader, "true")
+		req.Header.Set("Accept-Encoding", "identity")
+	}

 	var authID, authLabel, authType, authValue string
 	if auth != nil {
@@ -301,27 +480,10 @@ func (e *GitLabExecutor) doJSONTextRequest(ctx context.Context, auth *cliproxyau
 	resp, err := httpClient.Do(req)
 	if err != nil {
 		recordAPIResponseError(ctx, e.cfg, err)
-		return "", err
+		return nil, body, err
 	}
-	defer func() { _ = resp.Body.Close() }()
 	recordAPIResponseMetadata(ctx, e.cfg, resp.StatusCode, resp.Header.Clone())
-
-	respBody, err := io.ReadAll(resp.Body)
-	if err != nil {
-		recordAPIResponseError(ctx, e.cfg, err)
-		return "", err
-	}
-	appendAPIResponseChunk(ctx, e.cfg, respBody)
-
-	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
-		return "", statusErr{code: resp.StatusCode, msg: strings.TrimSpace(string(respBody))}
-	}
-
-	text, err := parseGitLabTextResponse(endpoint, respBody)
-	if err != nil {
-		return "", err
-	}
-	return strings.TrimSpace(text), nil
+	return resp, body, nil
 }

 func (e *GitLabExecutor) refreshOAuthToken(ctx context.Context, client *gitlab.AuthClient, auth *cliproxyauth.Auth, baseURL string) (*gitlab.TokenResponse, error) {
@@ -455,6 +617,236 @@ func parseGitLabTextResponse(endpoint string, body []byte) (string, error) {
 	return "", fmt.Errorf("gitlab duo executor: upstream returned no text payload")
 }

+func applyGitLabRequestHeaders(req *http.Request, auth *cliproxyauth.Auth) {
+	if req == nil {
+		return
+	}
+	if auth != nil {
+		util.ApplyCustomHeadersFromAttrs(req, auth.Attributes)
+	}
+	for key, value := range gitLabGatewayHeaders(auth) {
+		if key == "" || value == "" {
+			continue
+		}
+		req.Header.Set(key, value)
+	}
+}
+
+func gitLabGatewayHeaders(auth *cliproxyauth.Auth) map[string]string {
+	if auth == nil || auth.Metadata == nil {
+		return nil
+	}
+	raw, ok := auth.Metadata["duo_gateway_headers"]
+	if !ok {
+		return nil
+	}
+	out := make(map[string]string)
+	switch typed := raw.(type) {
+	case map[string]string:
+		for key, value := range typed {
+			key = strings.TrimSpace(key)
+			value = strings.TrimSpace(value)
+			if key != "" && value != "" {
+				out[key] = value
+			}
+		}
+	case map[string]any:
+		for key, value := range typed {
+			key = strings.TrimSpace(key)
+			if key == "" {
+				continue
+			}
+			strValue := strings.TrimSpace(fmt.Sprint(value))
+			if strValue != "" {
+				out[key] = strValue
+			}
+		}
+	}
+	if len(out) == 0 {
+		return nil
+	}
+	return out
+}
+
+func cloneGitLabStreamHeaders(headers http.Header, _ []byte) http.Header {
+	cloned := headers.Clone()
+	if cloned == nil {
+		cloned = make(http.Header)
+	}
+	cloned.Set("Content-Type", "text/event-stream")
+	return cloned
+}
+
+func normalizeGitLabStreamChunk(eventName string, payload []byte, fallbackModel string, state *gitLabOpenAIStreamState) [][]byte {
+	payload = bytes.TrimSpace(payload)
+	if len(payload) == 0 {
+		return nil
+	}
+	if bytes.Equal(payload, []byte("[DONE]")) {
+		return finalizeGitLabStream(fallbackModel, state)
+	}
+
+	root := gjson.ParseBytes(payload)
+	if root.Exists() {
+		if obj := root.Get("object").String(); obj == "chat.completion.chunk" {
+			return [][]byte{append([]byte("data: "), bytes.Clone(payload)...)}
+		}
+		if root.Get("choices.0.delta").Exists() || root.Get("choices.0.finish_reason").Exists() {
+			return [][]byte{append([]byte("data: "), bytes.Clone(payload)...)}
+		}
+	}
+
+	state.ensureInitialized(fallbackModel, root)
+
+	switch strings.TrimSpace(eventName) {
+	case "stream_end":
+		return finalizeGitLabStream(fallbackModel, state)
+	case "stream_start":
+		if text := extractGitLabStreamText(root); text != "" {
+			return state.emitText(text)
+		}
+		return nil
+	}
+
+	if done := root.Get("done"); done.Exists() && done.Bool() {
+		return finalizeGitLabStream(fallbackModel, state)
+	}
+	if finishReason := strings.TrimSpace(root.Get("finish_reason").String()); finishReason != "" {
+		out := state.emitText(extractGitLabStreamText(root))
+		return append(out, state.finish(finishReason)...)
+	}
+
+	return state.emitText(extractGitLabStreamText(root))
+}
+
+func extractGitLabStreamText(root gjson.Result) string {
+	for _, key := range []string{
+		"choices.0.delta.content",
+		"choices.0.text",
+		"delta.content",
+		"content_chunk",
+		"content",
+		"text",
+		"response",
+		"completion",
+	} {
+		if value := root.Get(key).String(); strings.TrimSpace(value) != "" {
+			return value
+		}
+	}
+	return ""
+}
+
+func finalizeGitLabStream(fallbackModel string, state *gitLabOpenAIStreamState) [][]byte {
+	if state == nil {
+		return nil
+	}
+	state.ensureInitialized(fallbackModel, gjson.Result{})
+	return state.finish("stop")
+}
+
+func (s *gitLabOpenAIStreamState) ensureInitialized(fallbackModel string, root gjson.Result) {
+	if s == nil {
+		return
+	}
+	if s.ID == "" {
+		s.ID = fmt.Sprintf("gitlab-%d", time.Now().UnixNano())
+	}
+	if s.Created == 0 {
+		s.Created = time.Now().Unix()
+	}
+	if s.Model == "" {
+		for _, key := range []string{"model.name", "model", "metadata.model_name"} {
+			if value := strings.TrimSpace(root.Get(key).String()); value != "" {
+				s.Model = value
+				break
+			}
+		}
+	}
+	if s.Model == "" {
+		s.Model = fallbackModel
+	}
+}
+
+func (s *gitLabOpenAIStreamState) emitText(text string) [][]byte {
+	if s == nil {
+		return nil
+	}
+	if strings.TrimSpace(text) == "" {
+		return nil
+	}
+	delta := s.nextDelta(text)
+	if delta == "" {
+		return nil
+	}
+	out := make([][]byte, 0, 2)
+	if !s.Started {
+		out = append(out, s.buildChunk(map[string]any{"role": "assistant"}, ""))
+		s.Started = true
+	}
+	out = append(out, s.buildChunk(map[string]any{"content": delta}, ""))
+	return out
+}
+
+func (s *gitLabOpenAIStreamState) finish(reason string) [][]byte {
+	if s == nil || s.Finished {
+		return nil
+	}
+	if !s.Started {
+		s.Started = true
+	}
+	s.Finished = true
+	return [][]byte{
+		s.buildChunk(map[string]any{}, reason),
+		[]byte("data: [DONE]"),
+	}
+}
+
+func (s *gitLabOpenAIStreamState) nextDelta(text string) string {
+	if s == nil {
+		return text
+	}
+	if strings.TrimSpace(text) == "" {
+		return ""
+	}
+	if s.LastFullText == "" {
+		s.LastFullText = text
+		return text
+	}
+	if text == s.LastFullText {
+		return ""
+	}
+	if strings.HasPrefix(text, s.LastFullText) {
+		delta := text[len(s.LastFullText):]
+		s.LastFullText = text
+		return delta
+	}
+	s.LastFullText += text
+	return text
+}
+
+func (s *gitLabOpenAIStreamState) buildChunk(delta map[string]any, finishReason string) []byte {
+	payload := map[string]any{
+		"id":      s.ID,
+		"object":  "chat.completion.chunk",
+		"created": s.Created,
+		"model":   s.Model,
+		"choices": []map[string]any{{
+			"index": 0,
+			"delta": delta,
+		}},
+	}
+	if finishReason != "" {
+		payload["choices"] = []map[string]any{{
+			"index":         0,
+			"delta":         delta,
+			"finish_reason": finishReason,
+		}}
+	}
+	raw, _ := json.Marshal(payload)
+	return append([]byte("data: "), raw...)
+}
+
 func shouldFallbackToCodeSuggestions(err error) bool {
 	if err == nil {
 		return false
--- a/internal/runtime/executor/gitlab_executor_test.go
+++ b/internal/runtime/executor/gitlab_executor_test.go
@@ -3,8 +3,10 @@ package executor
 import (
 	"context"
 	"encoding/json"
+	"io"
 	"net/http"
 	"net/http/httptest"
+	"strings"
 	"testing"

 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -153,3 +155,147 @@ func TestGitLabExecutorRefreshUpdatesMetadata(t *testing.T) {
 		t.Fatalf("expected refreshed model metadata, got %#v", got)
 	}
 }
+
+func TestGitLabExecutorExecuteStreamUsesCodeSuggestionsSSE(t *testing.T) {
+	var gotAccept, gotStreamingHeader, gotEncoding string
+	var gotStreamFlag bool
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if r.URL.Path != gitLabCodeSuggestionsEndpoint {
+			t.Fatalf("unexpected path %q", r.URL.Path)
+		}
+		gotAccept = r.Header.Get("Accept")
+		gotStreamingHeader = r.Header.Get(gitLabSSEStreamingHeader)
+		gotEncoding = r.Header.Get("Accept-Encoding")
+		gotStreamFlag = gjson.GetBytes(readBody(t, r), "stream").Bool()
+
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = w.Write([]byte("event: stream_start\n"))
+		_, _ = w.Write([]byte("data: {\"model\":{\"name\":\"claude-sonnet-4-5\"}}\n\n"))
+		_, _ = w.Write([]byte("event: content_chunk\n"))
+		_, _ = w.Write([]byte("data: {\"content\":\"hello\"}\n\n"))
+		_, _ = w.Write([]byte("event: content_chunk\n"))
+		_, _ = w.Write([]byte("data: {\"content\":\" world\"}\n\n"))
+		_, _ = w.Write([]byte("event: stream_end\n"))
+		_, _ = w.Write([]byte("data: {}\n\n"))
+	}))
+	defer srv.Close()
+
+	exec := NewGitLabExecutor(&config.Config{})
+	auth := &cliproxyauth.Auth{
+		Provider: "gitlab",
+		Metadata: map[string]any{
+			"base_url":     srv.URL,
+			"access_token": "oauth-access",
+			"model_name":   "claude-sonnet-4-5",
+		},
+	}
+	req := cliproxyexecutor.Request{
+		Model:   "gitlab-duo",
+		Payload: []byte(`{"model":"gitlab-duo","stream":true,"messages":[{"role":"user","content":"hello"}]}`),
+	}
+
+	result, err := exec.ExecuteStream(context.Background(), auth, req, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai"),
+	})
+	if err != nil {
+		t.Fatalf("ExecuteStream() error = %v", err)
+	}
+
+	lines := collectStreamLines(t, result)
+	if gotAccept != "text/event-stream" {
+		t.Fatalf("Accept = %q, want text/event-stream", gotAccept)
+	}
+	if gotStreamingHeader != "true" {
+		t.Fatalf("%s = %q, want true", gitLabSSEStreamingHeader, gotStreamingHeader)
+	}
+	if gotEncoding != "identity" {
+		t.Fatalf("Accept-Encoding = %q, want identity", gotEncoding)
+	}
+	if !gotStreamFlag {
+		t.Fatalf("expected upstream request to set stream=true")
+	}
+	if len(lines) < 4 {
+		t.Fatalf("expected translated stream chunks, got %d", len(lines))
+	}
+	if !strings.Contains(strings.Join(lines, "\n"), `"content":"hello"`) {
+		t.Fatalf("expected hello delta in stream, got %q", strings.Join(lines, "\n"))
+	}
+	if !strings.Contains(strings.Join(lines, "\n"), `"content":" world"`) {
+		t.Fatalf("expected world delta in stream, got %q", strings.Join(lines, "\n"))
+	}
+	if last := lines[len(lines)-1]; last != "data: [DONE]" {
+		t.Fatalf("expected stream terminator, got %q", last)
+	}
+}
+
+func TestGitLabExecutorExecuteStreamFallsBackToSyntheticChat(t *testing.T) {
+	chatCalls := 0
+	streamCalls := 0
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		switch r.URL.Path {
+		case gitLabCodeSuggestionsEndpoint:
+			streamCalls++
+			http.Error(w, "feature unavailable", http.StatusForbidden)
+		case gitLabChatEndpoint:
+			chatCalls++
+			_, _ = w.Write([]byte(`"chat fallback response"`))
+		default:
+			t.Fatalf("unexpected path %q", r.URL.Path)
+		}
+	}))
+	defer srv.Close()
+
+	exec := NewGitLabExecutor(&config.Config{})
+	auth := &cliproxyauth.Auth{
+		Provider: "gitlab",
+		Metadata: map[string]any{
+			"base_url":     srv.URL,
+			"access_token": "oauth-access",
+			"model_name":   "claude-sonnet-4-5",
+		},
+	}
+	req := cliproxyexecutor.Request{
+		Model:   "gitlab-duo",
+		Payload: []byte(`{"model":"gitlab-duo","stream":true,"messages":[{"role":"user","content":"hello"}]}`),
+	}
+
+	result, err := exec.ExecuteStream(context.Background(), auth, req, cliproxyexecutor.Options{
+		SourceFormat: sdktranslator.FromString("openai"),
+	})
+	if err != nil {
+		t.Fatalf("ExecuteStream() error = %v", err)
+	}
+
+	lines := collectStreamLines(t, result)
+	if streamCalls != 1 {
+		t.Fatalf("expected streaming endpoint once, got %d", streamCalls)
+	}
+	if chatCalls != 1 {
+		t.Fatalf("expected chat fallback once, got %d", chatCalls)
+	}
+	if !strings.Contains(strings.Join(lines, "\n"), `"content":"chat fallback response"`) {
+		t.Fatalf("expected fallback content in stream, got %q", strings.Join(lines, "\n"))
+	}
+}
+
+func collectStreamLines(t *testing.T, result *cliproxyexecutor.StreamResult) []string {
+	t.Helper()
+	lines := make([]string, 0, 8)
+	for chunk := range result.Chunks {
+		if chunk.Err != nil {
+			t.Fatalf("unexpected stream error: %v", chunk.Err)
+		}
+		lines = append(lines, string(chunk.Payload))
+	}
+	return lines
+}
+
+func readBody(t *testing.T, r *http.Request) []byte {
+	t.Helper()
+	defer func() { _ = r.Body.Close() }()
+	body, err := io.ReadAll(r.Body)
+	if err != nil {
+		t.Fatalf("ReadAll() error = %v", err)
+	}
+	return body
+}