mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-25 23:47:20 +00:00
refactor(providers): share payload patch helpers
This commit is contained in:
@@ -1,76 +1,13 @@
|
||||
import type { StreamFn } from "@mariozechner/pi-agent-core";
|
||||
import { streamSimple } from "@mariozechner/pi-ai";
|
||||
import { streamWithPayloadPatch } from "openclaw/plugin-sdk/provider-stream";
|
||||
import {
|
||||
applyAnthropicEphemeralCacheControlMarkers,
|
||||
buildCopilotDynamicHeaders,
|
||||
hasCopilotVisionInput,
|
||||
streamWithPayloadPatch,
|
||||
} from "openclaw/plugin-sdk/provider-stream";
|
||||
|
||||
type StreamContext = Parameters<StreamFn>[1];
|
||||
type StreamMessage = StreamContext["messages"][number];
|
||||
|
||||
function inferCopilotInitiator(messages: StreamContext["messages"]): "agent" | "user" {
|
||||
const last = messages[messages.length - 1];
|
||||
return last && last.role !== "user" ? "agent" : "user";
|
||||
}
|
||||
|
||||
function hasCopilotVisionInput(messages: StreamContext["messages"]): boolean {
|
||||
return messages.some((message: StreamMessage) => {
|
||||
if (message.role === "user" && Array.isArray(message.content)) {
|
||||
return message.content.some((item) => item.type === "image");
|
||||
}
|
||||
if (message.role === "toolResult" && Array.isArray(message.content)) {
|
||||
return message.content.some((item) => item.type === "image");
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
function buildCopilotDynamicHeaders(params: {
|
||||
messages: StreamContext["messages"];
|
||||
}): Record<string, string> {
|
||||
return {
|
||||
"X-Initiator": inferCopilotInitiator(params.messages),
|
||||
"Openai-Intent": "conversation-edits",
|
||||
...(hasCopilotVisionInput(params.messages) ? { "Copilot-Vision-Request": "true" } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function applyAnthropicPromptCacheMarkers(payloadObj: Record<string, unknown>): void {
|
||||
const messages = payloadObj.messages;
|
||||
if (!Array.isArray(messages)) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const message of messages as Array<{ role?: string; content?: unknown }>) {
|
||||
if (message.role === "system" || message.role === "developer") {
|
||||
if (typeof message.content === "string") {
|
||||
message.content = [
|
||||
{ type: "text", text: message.content, cache_control: { type: "ephemeral" } },
|
||||
];
|
||||
continue;
|
||||
}
|
||||
if (Array.isArray(message.content) && message.content.length > 0) {
|
||||
const last = message.content[message.content.length - 1];
|
||||
if (last && typeof last === "object") {
|
||||
const record = last as Record<string, unknown>;
|
||||
if (record.type !== "thinking" && record.type !== "redacted_thinking") {
|
||||
record.cache_control = { type: "ephemeral" };
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (message.role === "assistant" && Array.isArray(message.content)) {
|
||||
for (const block of message.content) {
|
||||
if (!block || typeof block !== "object") {
|
||||
continue;
|
||||
}
|
||||
const record = block as Record<string, unknown>;
|
||||
if (record.type === "thinking" || record.type === "redacted_thinking") {
|
||||
delete record.cache_control;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function wrapCopilotAnthropicStream(baseStreamFn: StreamFn | undefined): StreamFn {
|
||||
const underlying = baseStreamFn ?? streamSimple;
|
||||
@@ -86,11 +23,14 @@ export function wrapCopilotAnthropicStream(baseStreamFn: StreamFn | undefined):
|
||||
{
|
||||
...options,
|
||||
headers: {
|
||||
...buildCopilotDynamicHeaders({ messages: context.messages }),
|
||||
...buildCopilotDynamicHeaders({
|
||||
messages: context.messages as StreamContext["messages"],
|
||||
hasImages: hasCopilotVisionInput(context.messages as StreamContext["messages"]),
|
||||
}),
|
||||
...(options?.headers ?? {}),
|
||||
},
|
||||
},
|
||||
applyAnthropicPromptCacheMarkers,
|
||||
applyAnthropicEphemeralCacheControlMarkers,
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
82
src/agents/openai-responses-payload-policy.test.ts
Normal file
82
src/agents/openai-responses-payload-policy.test.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
import type { Model } from "@mariozechner/pi-ai";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
applyOpenAIResponsesPayloadPolicy,
|
||||
resolveOpenAIResponsesPayloadPolicy,
|
||||
} from "./openai-responses-payload-policy.js";
|
||||
|
||||
describe("openai responses payload policy", () => {
|
||||
it("forces store for native OpenAI responses payloads but keeps disable mode for transport defaults", () => {
|
||||
const model = {
|
||||
id: "gpt-5.4",
|
||||
name: "GPT-5.4",
|
||||
api: "openai-responses",
|
||||
provider: "openai",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200000,
|
||||
maxTokens: 8192,
|
||||
} satisfies Model<"openai-responses">;
|
||||
|
||||
expect(
|
||||
resolveOpenAIResponsesPayloadPolicy(model, { storeMode: "provider-policy" }),
|
||||
).toMatchObject({
|
||||
explicitStore: true,
|
||||
allowsServiceTier: true,
|
||||
});
|
||||
expect(resolveOpenAIResponsesPayloadPolicy(model, { storeMode: "disable" })).toMatchObject({
|
||||
explicitStore: false,
|
||||
allowsServiceTier: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("strips store and prompt cache for proxy-like responses routes when requested", () => {
|
||||
const policy = resolveOpenAIResponsesPayloadPolicy(
|
||||
{
|
||||
api: "openai-responses",
|
||||
provider: "openai",
|
||||
baseUrl: "https://proxy.example.com/v1",
|
||||
compat: { supportsStore: false },
|
||||
},
|
||||
{
|
||||
enablePromptCacheStripping: true,
|
||||
storeMode: "provider-policy",
|
||||
},
|
||||
);
|
||||
const payload = {
|
||||
store: false,
|
||||
prompt_cache_key: "session-123",
|
||||
prompt_cache_retention: "24h",
|
||||
} satisfies Record<string, unknown>;
|
||||
|
||||
applyOpenAIResponsesPayloadPolicy(payload, policy);
|
||||
|
||||
expect(payload).not.toHaveProperty("store");
|
||||
expect(payload).not.toHaveProperty("prompt_cache_key");
|
||||
expect(payload).not.toHaveProperty("prompt_cache_retention");
|
||||
});
|
||||
|
||||
it("strips disabled reasoning payloads through the shared helper", () => {
|
||||
const payload = {
|
||||
reasoning: {
|
||||
effort: "none",
|
||||
},
|
||||
} satisfies Record<string, unknown>;
|
||||
|
||||
applyOpenAIResponsesPayloadPolicy(
|
||||
payload,
|
||||
resolveOpenAIResponsesPayloadPolicy(
|
||||
{
|
||||
api: "openai-responses",
|
||||
provider: "openai",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
},
|
||||
{ storeMode: "disable" },
|
||||
),
|
||||
);
|
||||
|
||||
expect(payload).not.toHaveProperty("reasoning");
|
||||
});
|
||||
});
|
||||
162
src/agents/openai-responses-payload-policy.ts
Normal file
162
src/agents/openai-responses-payload-policy.ts
Normal file
@@ -0,0 +1,162 @@
|
||||
import { resolveProviderRequestPolicyConfig } from "./provider-request-config.js";
|
||||
|
||||
type OpenAIResponsesPayloadModel = {
|
||||
api?: unknown;
|
||||
baseUrl?: unknown;
|
||||
provider?: unknown;
|
||||
contextWindow?: unknown;
|
||||
compat?: { supportsStore?: boolean };
|
||||
};
|
||||
|
||||
type OpenAIResponsesPayloadPolicyOptions = {
|
||||
extraParams?: Record<string, unknown>;
|
||||
storeMode?: "provider-policy" | "disable" | "preserve";
|
||||
enablePromptCacheStripping?: boolean;
|
||||
enableServerCompaction?: boolean;
|
||||
};
|
||||
|
||||
export type OpenAIResponsesPayloadPolicy = {
|
||||
allowsServiceTier: boolean;
|
||||
compactThreshold: number;
|
||||
explicitStore: boolean | undefined;
|
||||
shouldStripDisabledReasoningPayload: boolean;
|
||||
shouldStripPromptCache: boolean;
|
||||
shouldStripStore: boolean;
|
||||
useServerCompaction: boolean;
|
||||
};
|
||||
|
||||
const OPENAI_RESPONSES_APIS = new Set([
|
||||
"openai-responses",
|
||||
"azure-openai-responses",
|
||||
"openai-codex-responses",
|
||||
]);
|
||||
|
||||
function parsePositiveInteger(value: unknown): number | undefined {
|
||||
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
|
||||
return Math.floor(value);
|
||||
}
|
||||
if (typeof value === "string") {
|
||||
const parsed = Number.parseInt(value, 10);
|
||||
if (Number.isFinite(parsed) && parsed > 0) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveOpenAIResponsesCompactThreshold(model: { contextWindow?: unknown }): number {
|
||||
const contextWindow = parsePositiveInteger(model.contextWindow);
|
||||
if (contextWindow) {
|
||||
return Math.max(1_000, Math.floor(contextWindow * 0.7));
|
||||
}
|
||||
return 80_000;
|
||||
}
|
||||
|
||||
function shouldEnableOpenAIResponsesServerCompaction(
|
||||
explicitStore: boolean | undefined,
|
||||
provider: unknown,
|
||||
extraParams: Record<string, unknown> | undefined,
|
||||
): boolean {
|
||||
const configured = extraParams?.responsesServerCompaction;
|
||||
if (configured === false) {
|
||||
return false;
|
||||
}
|
||||
if (explicitStore !== true) {
|
||||
return false;
|
||||
}
|
||||
if (configured === true) {
|
||||
return true;
|
||||
}
|
||||
return provider === "openai";
|
||||
}
|
||||
|
||||
function stripDisabledOpenAIReasoningPayload(payloadObj: Record<string, unknown>): void {
|
||||
const reasoning = payloadObj.reasoning;
|
||||
if (reasoning === "none") {
|
||||
delete payloadObj.reasoning;
|
||||
return;
|
||||
}
|
||||
if (!reasoning || typeof reasoning !== "object" || Array.isArray(reasoning)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// GPT-5 models reject `reasoning.effort: "none"`. Treat the disabled effort
|
||||
// as "reasoning omitted" instead of forwarding an unsupported value.
|
||||
const reasoningObj = reasoning as Record<string, unknown>;
|
||||
if (reasoningObj.effort === "none") {
|
||||
delete payloadObj.reasoning;
|
||||
}
|
||||
}
|
||||
|
||||
export function resolveOpenAIResponsesPayloadPolicy(
|
||||
model: OpenAIResponsesPayloadModel,
|
||||
options: OpenAIResponsesPayloadPolicyOptions = {},
|
||||
): OpenAIResponsesPayloadPolicy {
|
||||
const capabilities = resolveProviderRequestPolicyConfig({
|
||||
provider: typeof model.provider === "string" ? model.provider : undefined,
|
||||
api: typeof model.api === "string" ? model.api : undefined,
|
||||
baseUrl: typeof model.baseUrl === "string" ? model.baseUrl : undefined,
|
||||
compat: model.compat,
|
||||
capability: "llm",
|
||||
transport: "stream",
|
||||
}).capabilities;
|
||||
const storeMode = options.storeMode ?? "provider-policy";
|
||||
const explicitStore =
|
||||
storeMode === "preserve"
|
||||
? undefined
|
||||
: storeMode === "disable"
|
||||
? capabilities.supportsResponsesStoreField
|
||||
? false
|
||||
: undefined
|
||||
: capabilities.allowsResponsesStore
|
||||
? true
|
||||
: undefined;
|
||||
const isResponsesApi = typeof model.api === "string" && OPENAI_RESPONSES_APIS.has(model.api);
|
||||
|
||||
return {
|
||||
allowsServiceTier: capabilities.allowsOpenAIServiceTier,
|
||||
compactThreshold:
|
||||
parsePositiveInteger(options.extraParams?.responsesCompactThreshold) ??
|
||||
resolveOpenAIResponsesCompactThreshold(model),
|
||||
explicitStore,
|
||||
shouldStripDisabledReasoningPayload: capabilities.supportsOpenAIReasoningCompatPayload,
|
||||
shouldStripPromptCache:
|
||||
options.enablePromptCacheStripping === true && capabilities.shouldStripResponsesPromptCache,
|
||||
shouldStripStore:
|
||||
explicitStore !== true && model.compat?.supportsStore === false && isResponsesApi,
|
||||
useServerCompaction:
|
||||
options.enableServerCompaction === true &&
|
||||
shouldEnableOpenAIResponsesServerCompaction(
|
||||
explicitStore,
|
||||
model.provider,
|
||||
options.extraParams,
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
export function applyOpenAIResponsesPayloadPolicy(
|
||||
payloadObj: Record<string, unknown>,
|
||||
policy: OpenAIResponsesPayloadPolicy,
|
||||
): void {
|
||||
if (policy.explicitStore !== undefined) {
|
||||
payloadObj.store = policy.explicitStore;
|
||||
}
|
||||
if (policy.shouldStripStore) {
|
||||
delete payloadObj.store;
|
||||
}
|
||||
if (policy.shouldStripPromptCache) {
|
||||
delete payloadObj.prompt_cache_key;
|
||||
delete payloadObj.prompt_cache_retention;
|
||||
}
|
||||
if (policy.useServerCompaction && payloadObj.context_management === undefined) {
|
||||
payloadObj.context_management = [
|
||||
{
|
||||
type: "compaction",
|
||||
compact_threshold: policy.compactThreshold,
|
||||
},
|
||||
];
|
||||
}
|
||||
if (policy.shouldStripDisabledReasoningPayload) {
|
||||
stripDisabledOpenAIReasoningPayload(payloadObj);
|
||||
}
|
||||
}
|
||||
@@ -439,6 +439,82 @@ describe("openai transport stream", () => {
|
||||
expect(params.input?.[0]).toMatchObject({ role: "developer" });
|
||||
});
|
||||
|
||||
it("gates responses service_tier to native OpenAI endpoints", () => {
|
||||
const nativeParams = buildOpenAIResponsesParams(
|
||||
{
|
||||
id: "gpt-5.4",
|
||||
name: "GPT-5.4",
|
||||
api: "openai-responses",
|
||||
provider: "openai",
|
||||
baseUrl: "https://api.openai.com/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200000,
|
||||
maxTokens: 8192,
|
||||
} satisfies Model<"openai-responses">,
|
||||
{
|
||||
systemPrompt: "system",
|
||||
messages: [],
|
||||
tools: [],
|
||||
} as never,
|
||||
{
|
||||
serviceTier: "priority",
|
||||
},
|
||||
) as { service_tier?: unknown };
|
||||
const proxyParams = buildOpenAIResponsesParams(
|
||||
{
|
||||
id: "custom-model",
|
||||
name: "Custom Model",
|
||||
api: "openai-responses",
|
||||
provider: "openai",
|
||||
baseUrl: "https://proxy.example.com/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200000,
|
||||
maxTokens: 8192,
|
||||
} satisfies Model<"openai-responses">,
|
||||
{
|
||||
systemPrompt: "system",
|
||||
messages: [],
|
||||
tools: [],
|
||||
} as never,
|
||||
{
|
||||
serviceTier: "priority",
|
||||
},
|
||||
) as { service_tier?: unknown };
|
||||
|
||||
expect(nativeParams.service_tier).toBe("priority");
|
||||
expect(proxyParams).not.toHaveProperty("service_tier");
|
||||
});
|
||||
|
||||
it("strips store when responses compat disables it", () => {
|
||||
const params = buildOpenAIResponsesParams(
|
||||
{
|
||||
id: "custom-model",
|
||||
name: "Custom Model",
|
||||
api: "openai-responses",
|
||||
provider: "custom-provider",
|
||||
baseUrl: "https://proxy.example.com/v1",
|
||||
compat: { supportsStore: false },
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 200000,
|
||||
maxTokens: 8192,
|
||||
} as never,
|
||||
{
|
||||
systemPrompt: "system",
|
||||
messages: [],
|
||||
tools: [],
|
||||
} as never,
|
||||
undefined,
|
||||
) as { store?: unknown };
|
||||
|
||||
expect(params).not.toHaveProperty("store");
|
||||
});
|
||||
|
||||
it("uses system role for xAI default-route responses providers without relying on baseUrl host sniffing", () => {
|
||||
const params = buildOpenAIResponsesParams(
|
||||
{
|
||||
|
||||
@@ -20,6 +20,10 @@ import type {
|
||||
} from "openai/resources/responses/responses.js";
|
||||
import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./copilot-dynamic-headers.js";
|
||||
import { resolveOpenAICompletionsCompatDefaultsFromCapabilities } from "./openai-completions-compat.js";
|
||||
import {
|
||||
applyOpenAIResponsesPayloadPolicy,
|
||||
resolveOpenAIResponsesPayloadPolicy,
|
||||
} from "./openai-responses-payload-policy.js";
|
||||
import { resolveProviderRequestCapabilities } from "./provider-attribution.js";
|
||||
import { buildGuardedModelFetch } from "./provider-transport-fetch.js";
|
||||
import { transformTransportMessages } from "./transport-message-transform.js";
|
||||
@@ -674,13 +678,15 @@ export function buildOpenAIResponsesParams(
|
||||
{ supportsDeveloperRole },
|
||||
);
|
||||
const cacheRetention = resolveCacheRetention(options?.cacheRetention);
|
||||
const payloadPolicy = resolveOpenAIResponsesPayloadPolicy(model, {
|
||||
storeMode: "disable",
|
||||
});
|
||||
const params: OpenAIResponsesRequestParams = {
|
||||
model: model.id,
|
||||
input: messages,
|
||||
stream: true,
|
||||
prompt_cache_key: cacheRetention === "none" ? undefined : options?.sessionId,
|
||||
prompt_cache_retention: getPromptCacheRetention(model.baseUrl, cacheRetention),
|
||||
store: false,
|
||||
};
|
||||
if (options?.maxTokens) {
|
||||
params.max_output_tokens = options.maxTokens;
|
||||
@@ -688,7 +694,7 @@ export function buildOpenAIResponsesParams(
|
||||
if (options?.temperature !== undefined) {
|
||||
params.temperature = options.temperature;
|
||||
}
|
||||
if (options?.serviceTier !== undefined) {
|
||||
if (options?.serviceTier !== undefined && payloadPolicy.allowsServiceTier) {
|
||||
params.service_tier = options.serviceTier;
|
||||
}
|
||||
if (context.tools) {
|
||||
@@ -705,6 +711,7 @@ export function buildOpenAIResponsesParams(
|
||||
params.reasoning = { effort: "none" };
|
||||
}
|
||||
}
|
||||
applyOpenAIResponsesPayloadPolicy(params as Record<string, unknown>, payloadPolicy);
|
||||
return params;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { applyAnthropicEphemeralCacheControlMarkers } from "./anthropic-cache-control-payload.js";
|
||||
|
||||
describe("applyAnthropicEphemeralCacheControlMarkers", () => {
|
||||
it("marks system text content as ephemeral and strips thinking cache markers", () => {
|
||||
const payload = {
|
||||
messages: [
|
||||
{ role: "system", content: "system prompt" },
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "thinking", text: "draft", cache_control: { type: "ephemeral" } },
|
||||
{ type: "text", text: "answer" },
|
||||
],
|
||||
},
|
||||
],
|
||||
} satisfies Record<string, unknown>;
|
||||
|
||||
applyAnthropicEphemeralCacheControlMarkers(payload);
|
||||
|
||||
expect(payload.messages).toEqual([
|
||||
{
|
||||
role: "system",
|
||||
content: [{ type: "text", text: "system prompt", cache_control: { type: "ephemeral" } }],
|
||||
},
|
||||
{
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "thinking", text: "draft" },
|
||||
{ type: "text", text: "answer" },
|
||||
],
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,41 @@
|
||||
export function applyAnthropicEphemeralCacheControlMarkers(
|
||||
payloadObj: Record<string, unknown>,
|
||||
): void {
|
||||
const messages = payloadObj.messages;
|
||||
if (!Array.isArray(messages)) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (const message of messages as Array<{ role?: string; content?: unknown }>) {
|
||||
if (message.role === "system" || message.role === "developer") {
|
||||
if (typeof message.content === "string") {
|
||||
message.content = [
|
||||
{ type: "text", text: message.content, cache_control: { type: "ephemeral" } },
|
||||
];
|
||||
continue;
|
||||
}
|
||||
if (Array.isArray(message.content) && message.content.length > 0) {
|
||||
const last = message.content[message.content.length - 1];
|
||||
if (last && typeof last === "object") {
|
||||
const record = last as Record<string, unknown>;
|
||||
if (record.type !== "thinking" && record.type !== "redacted_thinking") {
|
||||
record.cache_control = { type: "ephemeral" };
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (message.role === "assistant" && Array.isArray(message.content)) {
|
||||
for (const block of message.content) {
|
||||
if (!block || typeof block !== "object") {
|
||||
continue;
|
||||
}
|
||||
const record = block as Record<string, unknown>;
|
||||
if (record.type === "thinking" || record.type === "redacted_thinking") {
|
||||
delete record.cache_control;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,10 @@ import {
|
||||
patchCodexNativeWebSearchPayload,
|
||||
resolveCodexNativeSearchActivation,
|
||||
} from "../codex-native-web-search.js";
|
||||
import {
|
||||
applyOpenAIResponsesPayloadPolicy,
|
||||
resolveOpenAIResponsesPayloadPolicy,
|
||||
} from "../openai-responses-payload-policy.js";
|
||||
import { resolveProviderRequestPolicyConfig } from "../provider-request-config.js";
|
||||
import { log } from "./logger.js";
|
||||
import { streamWithPayloadPatch } from "./stream-payload-utils.js";
|
||||
@@ -13,8 +17,6 @@ import { streamWithPayloadPatch } from "./stream-payload-utils.js";
|
||||
type OpenAIServiceTier = "auto" | "default" | "flex" | "priority";
|
||||
type OpenAITextVerbosity = "low" | "medium" | "high";
|
||||
|
||||
const OPENAI_RESPONSES_APIS = new Set(["openai-responses", "azure-openai-responses"]);
|
||||
|
||||
function resolveOpenAIRequestCapabilities(model: {
|
||||
api?: unknown;
|
||||
provider?: unknown;
|
||||
@@ -47,76 +49,7 @@ function shouldApplyOpenAIServiceTier(model: {
|
||||
provider?: unknown;
|
||||
baseUrl?: unknown;
|
||||
}): boolean {
|
||||
return resolveOpenAIRequestCapabilities(model).allowsOpenAIServiceTier;
|
||||
}
|
||||
|
||||
function shouldForceResponsesStore(model: {
|
||||
api?: unknown;
|
||||
provider?: unknown;
|
||||
baseUrl?: unknown;
|
||||
compat?: { supportsStore?: boolean };
|
||||
}): boolean {
|
||||
return resolveOpenAIRequestCapabilities(model).allowsResponsesStore;
|
||||
}
|
||||
|
||||
function parsePositiveInteger(value: unknown): number | undefined {
|
||||
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
|
||||
return Math.floor(value);
|
||||
}
|
||||
if (typeof value === "string") {
|
||||
const parsed = Number.parseInt(value, 10);
|
||||
if (Number.isFinite(parsed) && parsed > 0) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveOpenAIResponsesCompactThreshold(model: { contextWindow?: unknown }): number {
|
||||
const contextWindow = parsePositiveInteger(model.contextWindow);
|
||||
if (contextWindow) {
|
||||
return Math.max(1_000, Math.floor(contextWindow * 0.7));
|
||||
}
|
||||
return 80_000;
|
||||
}
|
||||
|
||||
function shouldEnableOpenAIResponsesServerCompaction(
|
||||
model: {
|
||||
api?: unknown;
|
||||
provider?: unknown;
|
||||
baseUrl?: unknown;
|
||||
compat?: { supportsStore?: boolean };
|
||||
},
|
||||
extraParams: Record<string, unknown> | undefined,
|
||||
): boolean {
|
||||
const configured = extraParams?.responsesServerCompaction;
|
||||
if (configured === false) {
|
||||
return false;
|
||||
}
|
||||
if (!shouldForceResponsesStore(model)) {
|
||||
return false;
|
||||
}
|
||||
if (configured === true) {
|
||||
return true;
|
||||
}
|
||||
return model.provider === "openai";
|
||||
}
|
||||
|
||||
function shouldStripResponsesStore(
|
||||
model: { api?: unknown; compat?: { supportsStore?: boolean } },
|
||||
forceStore: boolean,
|
||||
): boolean {
|
||||
if (forceStore) {
|
||||
return false;
|
||||
}
|
||||
if (typeof model.api !== "string") {
|
||||
return false;
|
||||
}
|
||||
return OPENAI_RESPONSES_APIS.has(model.api) && model.compat?.supportsStore === false;
|
||||
}
|
||||
|
||||
function shouldStripResponsesPromptCache(model: { api?: unknown; baseUrl?: unknown }): boolean {
|
||||
return resolveOpenAIRequestCapabilities(model).shouldStripResponsesPromptCache;
|
||||
return resolveOpenAIResponsesPayloadPolicy(model, { storeMode: "disable" }).allowsServiceTier;
|
||||
}
|
||||
|
||||
function shouldApplyOpenAIReasoningCompatibility(model: {
|
||||
@@ -130,52 +63,6 @@ function shouldApplyOpenAIReasoningCompatibility(model: {
|
||||
return resolveOpenAIRequestCapabilities(model).supportsOpenAIReasoningCompatPayload;
|
||||
}
|
||||
|
||||
function stripDisabledOpenAIReasoningPayload(payloadObj: Record<string, unknown>): void {
|
||||
const reasoning = payloadObj.reasoning;
|
||||
if (reasoning === "none") {
|
||||
delete payloadObj.reasoning;
|
||||
return;
|
||||
}
|
||||
if (!reasoning || typeof reasoning !== "object" || Array.isArray(reasoning)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// GPT-5 models reject `reasoning.effort: "none"`. Treat the disabled effort
|
||||
// as "reasoning omitted" instead of forwarding an unsupported value.
|
||||
const reasoningObj = reasoning as Record<string, unknown>;
|
||||
if (reasoningObj.effort === "none") {
|
||||
delete payloadObj.reasoning;
|
||||
}
|
||||
}
|
||||
|
||||
function applyOpenAIResponsesPayloadOverrides(params: {
|
||||
payloadObj: Record<string, unknown>;
|
||||
forceStore: boolean;
|
||||
stripStore: boolean;
|
||||
stripPromptCache: boolean;
|
||||
useServerCompaction: boolean;
|
||||
compactThreshold: number;
|
||||
}): void {
|
||||
if (params.forceStore) {
|
||||
params.payloadObj.store = true;
|
||||
}
|
||||
if (params.stripStore) {
|
||||
delete params.payloadObj.store;
|
||||
}
|
||||
if (params.stripPromptCache) {
|
||||
delete params.payloadObj.prompt_cache_key;
|
||||
delete params.payloadObj.prompt_cache_retention;
|
||||
}
|
||||
if (params.useServerCompaction && params.payloadObj.context_management === undefined) {
|
||||
params.payloadObj.context_management = [
|
||||
{
|
||||
type: "compaction",
|
||||
compact_threshold: params.compactThreshold,
|
||||
},
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeOpenAIServiceTier(value: unknown): OpenAIServiceTier | undefined {
|
||||
if (typeof value !== "string") {
|
||||
return undefined;
|
||||
@@ -283,30 +170,28 @@ export function createOpenAIResponsesContextManagementWrapper(
|
||||
): StreamFn {
|
||||
const underlying = baseStreamFn ?? streamSimple;
|
||||
return (model, context, options) => {
|
||||
const forceStore = shouldForceResponsesStore(model);
|
||||
const useServerCompaction = shouldEnableOpenAIResponsesServerCompaction(model, extraParams);
|
||||
const stripStore = shouldStripResponsesStore(model, forceStore);
|
||||
const stripPromptCache = shouldStripResponsesPromptCache(model);
|
||||
if (!forceStore && !useServerCompaction && !stripStore && !stripPromptCache) {
|
||||
const policy = resolveOpenAIResponsesPayloadPolicy(model, {
|
||||
extraParams,
|
||||
enablePromptCacheStripping: true,
|
||||
enableServerCompaction: true,
|
||||
storeMode: "provider-policy",
|
||||
});
|
||||
if (
|
||||
policy.explicitStore === undefined &&
|
||||
!policy.useServerCompaction &&
|
||||
!policy.shouldStripStore &&
|
||||
!policy.shouldStripPromptCache &&
|
||||
!policy.shouldStripDisabledReasoningPayload
|
||||
) {
|
||||
return underlying(model, context, options);
|
||||
}
|
||||
|
||||
const compactThreshold =
|
||||
parsePositiveInteger(extraParams?.responsesCompactThreshold) ??
|
||||
resolveOpenAIResponsesCompactThreshold(model);
|
||||
const originalOnPayload = options?.onPayload;
|
||||
return underlying(model, context, {
|
||||
...options,
|
||||
onPayload: (payload) => {
|
||||
if (payload && typeof payload === "object") {
|
||||
applyOpenAIResponsesPayloadOverrides({
|
||||
payloadObj: payload as Record<string, unknown>,
|
||||
forceStore,
|
||||
stripStore,
|
||||
stripPromptCache,
|
||||
useServerCompaction,
|
||||
compactThreshold,
|
||||
});
|
||||
applyOpenAIResponsesPayloadPolicy(payload as Record<string, unknown>, policy);
|
||||
}
|
||||
return originalOnPayload?.(payload, model);
|
||||
},
|
||||
@@ -323,7 +208,10 @@ export function createOpenAIReasoningCompatibilityWrapper(
|
||||
return underlying(model, context, options);
|
||||
}
|
||||
return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => {
|
||||
stripDisabledOpenAIReasoningPayload(payloadObj);
|
||||
applyOpenAIResponsesPayloadPolicy(
|
||||
payloadObj,
|
||||
resolveOpenAIResponsesPayloadPolicy(model, { storeMode: "preserve" }),
|
||||
);
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ import { streamSimple } from "@mariozechner/pi-ai";
|
||||
import { isXaiModelHint } from "../../../extensions/xai/api.js";
|
||||
import type { ThinkLevel } from "../../auto-reply/thinking.js";
|
||||
import { resolveProviderRequestPolicyConfig } from "../provider-request-config.js";
|
||||
import { applyAnthropicEphemeralCacheControlMarkers } from "./anthropic-cache-control-payload.js";
|
||||
import { isOpenRouterAnthropicModelRef } from "./anthropic-family-cache-semantics.js";
|
||||
import { streamWithPayloadPatch } from "./stream-payload-utils.js";
|
||||
const KILOCODE_FEATURE_HEADER = "X-KILOCODE-FEATURE";
|
||||
@@ -66,39 +67,7 @@ export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | unde
|
||||
}
|
||||
|
||||
return streamWithPayloadPatch(underlying, model, context, options, (payloadObj) => {
|
||||
const messages = payloadObj.messages;
|
||||
if (Array.isArray(messages)) {
|
||||
for (const msg of messages as Array<{ role?: string; content?: unknown }>) {
|
||||
if (msg.role === "system" || msg.role === "developer") {
|
||||
if (typeof msg.content === "string") {
|
||||
msg.content = [
|
||||
{ type: "text", text: msg.content, cache_control: { type: "ephemeral" } },
|
||||
];
|
||||
} else if (Array.isArray(msg.content) && msg.content.length > 0) {
|
||||
const last = msg.content[msg.content.length - 1];
|
||||
if (last && typeof last === "object") {
|
||||
const record = last as Record<string, unknown>;
|
||||
if (record.type !== "thinking" && record.type !== "redacted_thinking") {
|
||||
record.cache_control = { type: "ephemeral" };
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (msg.role === "assistant" && Array.isArray(msg.content)) {
|
||||
for (const block of msg.content) {
|
||||
if (!block || typeof block !== "object") {
|
||||
continue;
|
||||
}
|
||||
const record = block as Record<string, unknown>;
|
||||
if (record.type === "thinking" || record.type === "redacted_thinking") {
|
||||
delete record.cache_control;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
applyAnthropicEphemeralCacheControlMarkers(payloadObj);
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
// Public stream-wrapper helpers for provider plugins.
|
||||
|
||||
export {
|
||||
buildCopilotDynamicHeaders,
|
||||
hasCopilotVisionInput,
|
||||
} from "../agents/copilot-dynamic-headers.js";
|
||||
export { applyAnthropicEphemeralCacheControlMarkers } from "../agents/pi-embedded-runner/anthropic-cache-control-payload.js";
|
||||
export { createAnthropicToolPayloadCompatibilityWrapper } from "../agents/pi-embedded-runner/anthropic-family-tool-payload-compat.js";
|
||||
export {
|
||||
createBedrockNoCacheWrapper,
|
||||
|
||||
Reference in New Issue
Block a user