From 8da3a9a92d9c2c6db6fa6caa881e9310d1792e93 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 27 Feb 2026 16:14:49 +0000 Subject: [PATCH] fix(agents): auto-enable OpenAI Responses server-side compaction (#16930, #22441, #25088) Landed from contributor PRs #16930, #22441, and #25088. Co-authored-by: liweiguang Co-authored-by: EdwardWu7 Co-authored-by: MoerAI --- CHANGELOG.md | 1 + docs/concepts/compaction.md | 12 ++ docs/providers/openai.md | 33 ++++ .../pi-embedded-runner-extraparams.test.ts | 162 +++++++++++++++++- src/agents/pi-embedded-runner/extra-params.ts | 83 ++++++++- 5 files changed, 277 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca4a4e3d7e5..8d4d54af342 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai - Update/Global npm: fallback to `--omit=optional` when global `npm update` fails so optional dependency install failures no longer abort update flows. (#24896) Thanks @xinhuagu and @vincentkoc. - Plugins/NPM spec install: fix npm-spec plugin installs when `npm pack` output is empty by detecting newly created `.tgz` archives in the pack directory. (#21039) Thanks @graysurf and @vincentkoc. - Plugins/Install: clear stale install errors when an npm package is not found so follow-up install attempts report current state correctly. (#25073) Thanks @dalefrieswthat. +- OpenAI Responses/Compaction: rewrite and unify the OpenAI Responses store patches to treat empty `baseUrl` as non-direct, honor `compat.supportsStore=false`, and auto-inject server-side compaction `context_management` for compatible direct OpenAI models (with per-model opt-out/threshold overrides). Landed from contributor PRs #16930 (@OiPunk), #22441 (@EdwardWu7), and #25088 (@MoerAI). Thanks @OiPunk, @EdwardWu7, and @MoerAI. ## 2026.2.26 diff --git a/docs/concepts/compaction.md b/docs/concepts/compaction.md index d83f4190032..8d243bf234d 100644 --- a/docs/concepts/compaction.md +++ b/docs/concepts/compaction.md @@ -55,6 +55,18 @@ Context window is model-specific. OpenClaw uses the model definition from the co See [/concepts/session-pruning](/concepts/session-pruning) for pruning details. +## OpenAI server-side compaction + +OpenClaw also supports OpenAI Responses server-side compaction hints for +compatible direct OpenAI models. This is separate from local OpenClaw +compaction and can run alongside it. + +- Local compaction: OpenClaw summarizes and persists into session JSONL. +- Server-side compaction: OpenAI compacts context on the provider side when + `store` + `context_management` are enabled. + +See [OpenAI provider](/providers/openai) for model params and overrides. + ## Tips - Use `/compact` when sessions feel stale or context is bloated. diff --git a/docs/providers/openai.md b/docs/providers/openai.md index 1a47081a9a6..a06d5dee79d 100644 --- a/docs/providers/openai.md +++ b/docs/providers/openai.md @@ -83,6 +83,39 @@ OpenClaw uses `pi-ai` for model streaming. For `openai-codex/*` models you can s } ``` +### OpenAI Responses server-side compaction + +For direct OpenAI Responses models (`openai/*` using `api: "openai-responses"` with +`baseUrl` on `api.openai.com`), OpenClaw now auto-enables OpenAI server-side +compaction payload hints: + +- Forces `store: true` (unless model compat sets `supportsStore: false`) +- Injects `context_management: [{ type: "compaction", compact_threshold: ... }]` + +By default, `compact_threshold` is `70%` of model `contextWindow` (or `80000` +when unavailable). + +You can override per model: + +```json5 +{ + agents: { + defaults: { + models: { + "openai/gpt-5": { + params: { + responsesServerCompaction: true, + responsesCompactThreshold: 120000, + }, + }, + }, + }, + }, +} +``` + +Set `responsesServerCompaction: false` to disable this injection for a model. + ## Notes - Model refs always use `provider/model` (see [/concepts/models](/concepts/models)). diff --git a/src/agents/pi-embedded-runner-extraparams.test.ts b/src/agents/pi-embedded-runner-extraparams.test.ts index 3b717d3ab96..332cb430eec 100644 --- a/src/agents/pi-embedded-runner-extraparams.test.ts +++ b/src/agents/pi-embedded-runner-extraparams.test.ts @@ -161,7 +161,7 @@ describe("applyExtraParamsToAgent", () => { }; } - function runStoreMutationCase(params: { + function runResponsesPayloadMutationCase(params: { applyProvider: string; applyModelId: string; model: @@ -169,14 +169,21 @@ describe("applyExtraParamsToAgent", () => { | Model<"openai-codex-responses"> | Model<"openai-completions">; options?: SimpleStreamOptions; + cfg?: Record; + payload?: Record; }) { - const payload = { store: false }; + const payload = params.payload ?? { store: false }; const baseStreamFn: StreamFn = (_model, _context, options) => { options?.onPayload?.(payload); return {} as ReturnType; }; const agent = { streamFn: baseStreamFn }; - applyExtraParamsToAgent(agent, undefined, params.applyProvider, params.applyModelId); + applyExtraParamsToAgent( + agent, + params.cfg as Parameters[1], + params.applyProvider, + params.applyModelId, + ); const context: Context = { messages: [] }; void agent.streamFn?.(params.model, context, params.options ?? {}); return payload; @@ -814,7 +821,7 @@ describe("applyExtraParamsToAgent", () => { }); it("forces store=true for direct OpenAI Responses payloads", () => { - const payload = runStoreMutationCase({ + const payload = runResponsesPayloadMutationCase({ applyProvider: "openai", applyModelId: "gpt-5", model: { @@ -828,7 +835,7 @@ describe("applyExtraParamsToAgent", () => { }); it("does not force store for OpenAI Responses routed through non-OpenAI base URLs", () => { - const payload = runStoreMutationCase({ + const payload = runResponsesPayloadMutationCase({ applyProvider: "openai", applyModelId: "gpt-5", model: { @@ -841,11 +848,152 @@ describe("applyExtraParamsToAgent", () => { expect(payload.store).toBe(false); }); + it("does not force store for OpenAI Responses when baseUrl is empty", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "openai", + applyModelId: "gpt-5", + model: { + api: "openai-responses", + provider: "openai", + id: "gpt-5", + baseUrl: "", + } as Model<"openai-responses">, + }); + expect(payload.store).toBe(false); + }); + + it("does not force store for models that declare supportsStore=false", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "azure-openai-responses", + applyModelId: "gpt-4o", + model: { + api: "openai-responses", + provider: "azure-openai-responses", + id: "gpt-4o", + baseUrl: "https://example.openai.azure.com/openai/v1", + compat: { supportsStore: false }, + } as Model<"openai-responses">, + }); + expect(payload.store).toBe(false); + }); + + it("auto-injects OpenAI Responses context_management compaction for direct OpenAI models", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "openai", + applyModelId: "gpt-5", + model: { + api: "openai-responses", + provider: "openai", + id: "gpt-5", + baseUrl: "https://api.openai.com/v1", + contextWindow: 200_000, + } as Model<"openai-responses">, + }); + expect(payload.context_management).toEqual([ + { + type: "compaction", + compact_threshold: 140_000, + }, + ]); + }); + + it("does not auto-inject OpenAI Responses context_management for Azure by default", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "azure-openai-responses", + applyModelId: "gpt-4o", + model: { + api: "openai-responses", + provider: "azure-openai-responses", + id: "gpt-4o", + baseUrl: "https://example.openai.azure.com/openai/v1", + } as Model<"openai-responses">, + }); + expect(payload).not.toHaveProperty("context_management"); + }); + + it("allows explicitly enabling OpenAI Responses context_management compaction", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "azure-openai-responses", + applyModelId: "gpt-4o", + cfg: { + agents: { + defaults: { + models: { + "azure-openai-responses/gpt-4o": { + params: { + responsesServerCompaction: true, + responsesCompactThreshold: 42_000, + }, + }, + }, + }, + }, + }, + model: { + api: "openai-responses", + provider: "azure-openai-responses", + id: "gpt-4o", + baseUrl: "https://example.openai.azure.com/openai/v1", + } as Model<"openai-responses">, + }); + expect(payload.context_management).toEqual([ + { + type: "compaction", + compact_threshold: 42_000, + }, + ]); + }); + + it("preserves existing context_management payload values", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "openai", + applyModelId: "gpt-5", + model: { + api: "openai-responses", + provider: "openai", + id: "gpt-5", + baseUrl: "https://api.openai.com/v1", + } as Model<"openai-responses">, + payload: { + store: false, + context_management: [{ type: "compaction", compact_threshold: 12_345 }], + }, + }); + expect(payload.context_management).toEqual([{ type: "compaction", compact_threshold: 12_345 }]); + }); + + it("allows disabling OpenAI Responses context_management compaction via model params", () => { + const payload = runResponsesPayloadMutationCase({ + applyProvider: "openai", + applyModelId: "gpt-5", + cfg: { + agents: { + defaults: { + models: { + "openai/gpt-5": { + params: { + responsesServerCompaction: false, + }, + }, + }, + }, + }, + }, + model: { + api: "openai-responses", + provider: "openai", + id: "gpt-5", + baseUrl: "https://api.openai.com/v1", + } as Model<"openai-responses">, + }); + expect(payload).not.toHaveProperty("context_management"); + }); + it.each([ { name: "with openai-codex provider config", run: () => - runStoreMutationCase({ + runResponsesPayloadMutationCase({ applyProvider: "openai-codex", applyModelId: "codex-mini-latest", model: { @@ -859,7 +1007,7 @@ describe("applyExtraParamsToAgent", () => { { name: "without config via provider/model hints", run: () => - runStoreMutationCase({ + runResponsesPayloadMutationCase({ applyProvider: "openai-codex", applyModelId: "codex-mini-latest", model: { diff --git a/src/agents/pi-embedded-runner/extra-params.ts b/src/agents/pi-embedded-runner/extra-params.ts index 70662760235..70678f08bb4 100644 --- a/src/agents/pi-embedded-runner/extra-params.ts +++ b/src/agents/pi-embedded-runner/extra-params.ts @@ -186,7 +186,7 @@ function createBedrockNoCacheWrapper(baseStreamFn: StreamFn | undefined): Stream function isDirectOpenAIBaseUrl(baseUrl: unknown): boolean { if (typeof baseUrl !== "string" || !baseUrl.trim()) { - return true; + return false; } try { @@ -208,7 +208,13 @@ function shouldForceResponsesStore(model: { api?: unknown; provider?: unknown; baseUrl?: unknown; + compat?: { supportsStore?: boolean }; }): boolean { + // Never force store=true when the model explicitly declares supportsStore=false + // (e.g. Azure OpenAI Responses API without server-side persistence). + if (model.compat?.supportsStore === false) { + return false; + } if (typeof model.api !== "string" || typeof model.provider !== "string") { return false; } @@ -221,19 +227,82 @@ function shouldForceResponsesStore(model: { return isDirectOpenAIBaseUrl(model.baseUrl); } -function createOpenAIResponsesStoreWrapper(baseStreamFn: StreamFn | undefined): StreamFn { +function parsePositiveInteger(value: unknown): number | undefined { + if (typeof value === "number" && Number.isFinite(value) && value > 0) { + return Math.floor(value); + } + if (typeof value === "string") { + const parsed = Number.parseInt(value, 10); + if (Number.isFinite(parsed) && parsed > 0) { + return parsed; + } + } + return undefined; +} + +function resolveOpenAIResponsesCompactThreshold(model: { contextWindow?: unknown }): number { + const contextWindow = parsePositiveInteger(model.contextWindow); + if (contextWindow) { + return Math.max(1_000, Math.floor(contextWindow * 0.7)); + } + return 80_000; +} + +function shouldEnableOpenAIResponsesServerCompaction( + model: { + api?: unknown; + provider?: unknown; + baseUrl?: unknown; + compat?: { supportsStore?: boolean }; + }, + extraParams: Record | undefined, +): boolean { + const configured = extraParams?.responsesServerCompaction; + if (configured === false) { + return false; + } + if (!shouldForceResponsesStore(model)) { + return false; + } + if (configured === true) { + return true; + } + // Auto-enable for direct OpenAI Responses models. + return model.provider === "openai"; +} + +function createOpenAIResponsesContextManagementWrapper( + baseStreamFn: StreamFn | undefined, + extraParams: Record | undefined, +): StreamFn { const underlying = baseStreamFn ?? streamSimple; return (model, context, options) => { - if (!shouldForceResponsesStore(model)) { + const forceStore = shouldForceResponsesStore(model); + const useServerCompaction = shouldEnableOpenAIResponsesServerCompaction(model, extraParams); + if (!forceStore && !useServerCompaction) { return underlying(model, context, options); } + const compactThreshold = + parsePositiveInteger(extraParams?.responsesCompactThreshold) ?? + resolveOpenAIResponsesCompactThreshold(model); const originalOnPayload = options?.onPayload; return underlying(model, context, { ...options, onPayload: (payload) => { if (payload && typeof payload === "object") { - (payload as { store?: unknown }).store = true; + const payloadObj = payload as Record; + if (forceStore) { + payloadObj.store = true; + } + if (useServerCompaction && payloadObj.context_management === undefined) { + payloadObj.context_management = [ + { + type: "compaction", + compact_threshold: compactThreshold, + }, + ]; + } } originalOnPayload?.(payload); }, @@ -734,7 +803,7 @@ export function applyExtraParamsToAgent( agent.streamFn = createGoogleThinkingPayloadWrapper(agent.streamFn, thinkingLevel); // Work around upstream pi-ai hardcoding `store: false` for Responses API. - // Force `store=true` for direct OpenAI/OpenAI Codex providers so multi-turn - // server-side conversation state is preserved. - agent.streamFn = createOpenAIResponsesStoreWrapper(agent.streamFn); + // Force `store=true` for direct OpenAI Responses models and auto-enable + // server-side compaction for compatible OpenAI Responses payloads. + agent.streamFn = createOpenAIResponsesContextManagementWrapper(agent.streamFn, merged); }