mirror of
https://github.com/moltbot/moltbot.git
synced 2026-03-07 22:44:16 +00:00
1496 lines
60 KiB
TypeScript
1496 lines
60 KiB
TypeScript
import { randomBytes } from "node:crypto";
|
||
import fs from "node:fs/promises";
|
||
import type { ThinkLevel } from "../../auto-reply/thinking.js";
|
||
import {
|
||
ensureContextEnginesInitialized,
|
||
resolveContextEngine,
|
||
} from "../../context-engine/index.js";
|
||
import { computeBackoff, sleepWithAbort, type BackoffPolicy } from "../../infra/backoff.js";
|
||
import { generateSecureToken } from "../../infra/secure-random.js";
|
||
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
|
||
import type { PluginHookBeforeAgentStartResult } from "../../plugins/types.js";
|
||
import { enqueueCommandInLane } from "../../process/command-queue.js";
|
||
import { isMarkdownCapableMessageChannel } from "../../utils/message-channel.js";
|
||
import { resolveOpenClawAgentDir } from "../agent-paths.js";
|
||
import { hasConfiguredModelFallbacks } from "../agent-scope.js";
|
||
import {
|
||
isProfileInCooldown,
|
||
type AuthProfileFailureReason,
|
||
markAuthProfileFailure,
|
||
markAuthProfileGood,
|
||
markAuthProfileUsed,
|
||
resolveProfilesUnavailableReason,
|
||
} from "../auth-profiles.js";
|
||
import {
|
||
CONTEXT_WINDOW_HARD_MIN_TOKENS,
|
||
CONTEXT_WINDOW_WARN_BELOW_TOKENS,
|
||
evaluateContextWindowGuard,
|
||
resolveContextWindowInfo,
|
||
} from "../context-window-guard.js";
|
||
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
|
||
import { FailoverError, resolveFailoverStatus } from "../failover-error.js";
|
||
import {
|
||
ensureAuthProfileStore,
|
||
getApiKeyForModel,
|
||
resolveAuthProfileOrder,
|
||
type ResolvedProviderAuth,
|
||
} from "../model-auth.js";
|
||
import { normalizeProviderId } from "../model-selection.js";
|
||
import { ensureOpenClawModelsJson } from "../models-config.js";
|
||
import {
|
||
formatBillingErrorMessage,
|
||
classifyFailoverReason,
|
||
formatAssistantErrorText,
|
||
isAuthAssistantError,
|
||
isBillingAssistantError,
|
||
isCompactionFailureError,
|
||
isLikelyContextOverflowError,
|
||
isFailoverAssistantError,
|
||
isFailoverErrorMessage,
|
||
parseImageSizeError,
|
||
parseImageDimensionError,
|
||
isRateLimitAssistantError,
|
||
isTimeoutErrorMessage,
|
||
pickFallbackThinkingLevel,
|
||
type FailoverReason,
|
||
} from "../pi-embedded-helpers.js";
|
||
import { derivePromptTokens, normalizeUsage, type UsageLike } from "../usage.js";
|
||
import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js";
|
||
import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
|
||
import { log } from "./logger.js";
|
||
import { resolveModel } from "./model.js";
|
||
import { runEmbeddedAttempt } from "./run/attempt.js";
|
||
import type { RunEmbeddedPiAgentParams } from "./run/params.js";
|
||
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
|
||
import {
|
||
truncateOversizedToolResultsInSession,
|
||
sessionLikelyHasOversizedToolResults,
|
||
} from "./tool-result-truncation.js";
|
||
import type { EmbeddedPiAgentMeta, EmbeddedPiRunResult } from "./types.js";
|
||
import { describeUnknownError } from "./utils.js";
|
||
|
||
type ApiKeyInfo = ResolvedProviderAuth;
|
||
|
||
type CopilotTokenState = {
|
||
githubToken: string;
|
||
expiresAt: number;
|
||
refreshTimer?: ReturnType<typeof setTimeout>;
|
||
refreshInFlight?: Promise<void>;
|
||
};
|
||
|
||
const COPILOT_REFRESH_MARGIN_MS = 5 * 60 * 1000;
|
||
const COPILOT_REFRESH_RETRY_MS = 60 * 1000;
|
||
const COPILOT_REFRESH_MIN_DELAY_MS = 5 * 1000;
|
||
// Keep overload pacing noticeable enough to avoid tight retry bursts, but short
|
||
// enough that fallback still feels responsive within a single turn.
|
||
const OVERLOAD_FAILOVER_BACKOFF_POLICY: BackoffPolicy = {
|
||
initialMs: 250,
|
||
maxMs: 1_500,
|
||
factor: 2,
|
||
jitter: 0.2,
|
||
};
|
||
|
||
// Avoid Anthropic's refusal test token poisoning session transcripts.
|
||
const ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL = "ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL";
|
||
const ANTHROPIC_MAGIC_STRING_REPLACEMENT = "ANTHROPIC MAGIC STRING TRIGGER REFUSAL (redacted)";
|
||
|
||
function scrubAnthropicRefusalMagic(prompt: string): string {
|
||
if (!prompt.includes(ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL)) {
|
||
return prompt;
|
||
}
|
||
return prompt.replaceAll(
|
||
ANTHROPIC_MAGIC_STRING_TRIGGER_REFUSAL,
|
||
ANTHROPIC_MAGIC_STRING_REPLACEMENT,
|
||
);
|
||
}
|
||
|
||
type UsageAccumulator = {
|
||
input: number;
|
||
output: number;
|
||
cacheRead: number;
|
||
cacheWrite: number;
|
||
total: number;
|
||
/** Cache fields from the most recent API call (not accumulated). */
|
||
lastCacheRead: number;
|
||
lastCacheWrite: number;
|
||
lastInput: number;
|
||
};
|
||
|
||
const createUsageAccumulator = (): UsageAccumulator => ({
|
||
input: 0,
|
||
output: 0,
|
||
cacheRead: 0,
|
||
cacheWrite: 0,
|
||
total: 0,
|
||
lastCacheRead: 0,
|
||
lastCacheWrite: 0,
|
||
lastInput: 0,
|
||
});
|
||
|
||
function createCompactionDiagId(): string {
|
||
return `ovf-${Date.now().toString(36)}-${generateSecureToken(4)}`;
|
||
}
|
||
|
||
// Defensive guard for the outer run loop across all retry branches.
|
||
const BASE_RUN_RETRY_ITERATIONS = 24;
|
||
const RUN_RETRY_ITERATIONS_PER_PROFILE = 8;
|
||
const MIN_RUN_RETRY_ITERATIONS = 32;
|
||
const MAX_RUN_RETRY_ITERATIONS = 160;
|
||
|
||
function resolveMaxRunRetryIterations(profileCandidateCount: number): number {
|
||
const scaled =
|
||
BASE_RUN_RETRY_ITERATIONS +
|
||
Math.max(1, profileCandidateCount) * RUN_RETRY_ITERATIONS_PER_PROFILE;
|
||
return Math.min(MAX_RUN_RETRY_ITERATIONS, Math.max(MIN_RUN_RETRY_ITERATIONS, scaled));
|
||
}
|
||
|
||
const hasUsageValues = (
|
||
usage: ReturnType<typeof normalizeUsage>,
|
||
): usage is NonNullable<ReturnType<typeof normalizeUsage>> =>
|
||
!!usage &&
|
||
[usage.input, usage.output, usage.cacheRead, usage.cacheWrite, usage.total].some(
|
||
(value) => typeof value === "number" && Number.isFinite(value) && value > 0,
|
||
);
|
||
|
||
const mergeUsageIntoAccumulator = (
|
||
target: UsageAccumulator,
|
||
usage: ReturnType<typeof normalizeUsage>,
|
||
) => {
|
||
if (!hasUsageValues(usage)) {
|
||
return;
|
||
}
|
||
target.input += usage.input ?? 0;
|
||
target.output += usage.output ?? 0;
|
||
target.cacheRead += usage.cacheRead ?? 0;
|
||
target.cacheWrite += usage.cacheWrite ?? 0;
|
||
target.total +=
|
||
usage.total ??
|
||
(usage.input ?? 0) + (usage.output ?? 0) + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
|
||
// Track the most recent API call's cache fields for accurate context-size reporting.
|
||
// Accumulated cache totals inflate context size when there are multiple tool-call round-trips,
|
||
// since each call reports cacheRead ≈ current_context_size.
|
||
target.lastCacheRead = usage.cacheRead ?? 0;
|
||
target.lastCacheWrite = usage.cacheWrite ?? 0;
|
||
target.lastInput = usage.input ?? 0;
|
||
};
|
||
|
||
const toNormalizedUsage = (usage: UsageAccumulator) => {
|
||
const hasUsage =
|
||
usage.input > 0 ||
|
||
usage.output > 0 ||
|
||
usage.cacheRead > 0 ||
|
||
usage.cacheWrite > 0 ||
|
||
usage.total > 0;
|
||
if (!hasUsage) {
|
||
return undefined;
|
||
}
|
||
// Use the LAST API call's cache fields for context-size calculation.
|
||
// The accumulated cacheRead/cacheWrite inflate context size because each tool-call
|
||
// round-trip reports cacheRead ≈ current_context_size, and summing N calls gives
|
||
// N × context_size which gets clamped to contextWindow (e.g. 200k).
|
||
// See: https://github.com/openclaw/openclaw/issues/13698
|
||
//
|
||
// We use lastInput/lastCacheRead/lastCacheWrite (from the most recent API call) for
|
||
// cache-related fields, but keep accumulated output (total generated text this turn).
|
||
const lastPromptTokens = usage.lastInput + usage.lastCacheRead + usage.lastCacheWrite;
|
||
return {
|
||
input: usage.lastInput || undefined,
|
||
output: usage.output || undefined,
|
||
cacheRead: usage.lastCacheRead || undefined,
|
||
cacheWrite: usage.lastCacheWrite || undefined,
|
||
total: lastPromptTokens + usage.output || undefined,
|
||
};
|
||
};
|
||
|
||
function resolveActiveErrorContext(params: {
|
||
lastAssistant: { provider?: string; model?: string } | undefined;
|
||
provider: string;
|
||
model: string;
|
||
}): { provider: string; model: string } {
|
||
return {
|
||
provider: params.lastAssistant?.provider ?? params.provider,
|
||
model: params.lastAssistant?.model ?? params.model,
|
||
};
|
||
}
|
||
|
||
/**
|
||
* Build agentMeta for error return paths, preserving accumulated usage so that
|
||
* session totalTokens reflects the actual context size rather than going stale.
|
||
* Without this, error returns omit usage and the session keeps whatever
|
||
* totalTokens was set by the previous successful run.
|
||
*/
|
||
function buildErrorAgentMeta(params: {
|
||
sessionId: string;
|
||
provider: string;
|
||
model: string;
|
||
usageAccumulator: UsageAccumulator;
|
||
lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
|
||
lastAssistant?: { usage?: unknown } | null;
|
||
/** API-reported total from the most recent call, mirroring the success path correction. */
|
||
lastTurnTotal?: number;
|
||
}): EmbeddedPiAgentMeta {
|
||
const usage = toNormalizedUsage(params.usageAccumulator);
|
||
// Apply the same lastTurnTotal correction the success path uses so
|
||
// usage.total reflects the API-reported context size, not accumulated totals.
|
||
if (usage && params.lastTurnTotal && params.lastTurnTotal > 0) {
|
||
usage.total = params.lastTurnTotal;
|
||
}
|
||
const lastCallUsage = params.lastAssistant
|
||
? normalizeUsage(params.lastAssistant.usage as UsageLike)
|
||
: undefined;
|
||
const promptTokens = derivePromptTokens(params.lastRunPromptUsage);
|
||
return {
|
||
sessionId: params.sessionId,
|
||
provider: params.provider,
|
||
model: params.model,
|
||
// Only include usage fields when we have actual data from prior API calls.
|
||
...(usage ? { usage } : {}),
|
||
...(lastCallUsage ? { lastCallUsage } : {}),
|
||
...(promptTokens ? { promptTokens } : {}),
|
||
};
|
||
}
|
||
|
||
export async function runEmbeddedPiAgent(
|
||
params: RunEmbeddedPiAgentParams,
|
||
): Promise<EmbeddedPiRunResult> {
|
||
const sessionLane = resolveSessionLane(params.sessionKey?.trim() || params.sessionId);
|
||
const globalLane = resolveGlobalLane(params.lane);
|
||
const enqueueGlobal =
|
||
params.enqueue ?? ((task, opts) => enqueueCommandInLane(globalLane, task, opts));
|
||
const enqueueSession =
|
||
params.enqueue ?? ((task, opts) => enqueueCommandInLane(sessionLane, task, opts));
|
||
const channelHint = params.messageChannel ?? params.messageProvider;
|
||
const resolvedToolResultFormat =
|
||
params.toolResultFormat ??
|
||
(channelHint
|
||
? isMarkdownCapableMessageChannel(channelHint)
|
||
? "markdown"
|
||
: "plain"
|
||
: "markdown");
|
||
const isProbeSession = params.sessionId?.startsWith("probe-") ?? false;
|
||
|
||
return enqueueSession(() =>
|
||
enqueueGlobal(async () => {
|
||
const started = Date.now();
|
||
const workspaceResolution = resolveRunWorkspaceDir({
|
||
workspaceDir: params.workspaceDir,
|
||
sessionKey: params.sessionKey,
|
||
agentId: params.agentId,
|
||
config: params.config,
|
||
});
|
||
const resolvedWorkspace = workspaceResolution.workspaceDir;
|
||
const redactedSessionId = redactRunIdentifier(params.sessionId);
|
||
const redactedSessionKey = redactRunIdentifier(params.sessionKey);
|
||
const redactedWorkspace = redactRunIdentifier(resolvedWorkspace);
|
||
if (workspaceResolution.usedFallback) {
|
||
log.warn(
|
||
`[workspace-fallback] caller=runEmbeddedPiAgent reason=${workspaceResolution.fallbackReason} run=${params.runId} session=${redactedSessionId} sessionKey=${redactedSessionKey} agent=${workspaceResolution.agentId} workspace=${redactedWorkspace}`,
|
||
);
|
||
}
|
||
const prevCwd = process.cwd();
|
||
|
||
let provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER;
|
||
let modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL;
|
||
const agentDir = params.agentDir ?? resolveOpenClawAgentDir();
|
||
const fallbackConfigured = hasConfiguredModelFallbacks({
|
||
cfg: params.config,
|
||
agentId: params.agentId,
|
||
sessionKey: params.sessionKey,
|
||
});
|
||
await ensureOpenClawModelsJson(params.config, agentDir);
|
||
|
||
// Run before_model_resolve hooks early so plugins can override the
|
||
// provider/model before resolveModel().
|
||
//
|
||
// Legacy compatibility: before_agent_start is also checked for override
|
||
// fields if present. New hook takes precedence when both are set.
|
||
let modelResolveOverride: { providerOverride?: string; modelOverride?: string } | undefined;
|
||
let legacyBeforeAgentStartResult: PluginHookBeforeAgentStartResult | undefined;
|
||
const hookRunner = getGlobalHookRunner();
|
||
const hookCtx = {
|
||
agentId: workspaceResolution.agentId,
|
||
sessionKey: params.sessionKey,
|
||
sessionId: params.sessionId,
|
||
workspaceDir: resolvedWorkspace,
|
||
messageProvider: params.messageProvider ?? undefined,
|
||
trigger: params.trigger,
|
||
channelId: params.messageChannel ?? params.messageProvider ?? undefined,
|
||
};
|
||
if (hookRunner?.hasHooks("before_model_resolve")) {
|
||
try {
|
||
modelResolveOverride = await hookRunner.runBeforeModelResolve(
|
||
{ prompt: params.prompt },
|
||
hookCtx,
|
||
);
|
||
} catch (hookErr) {
|
||
log.warn(`before_model_resolve hook failed: ${String(hookErr)}`);
|
||
}
|
||
}
|
||
if (hookRunner?.hasHooks("before_agent_start")) {
|
||
try {
|
||
legacyBeforeAgentStartResult = await hookRunner.runBeforeAgentStart(
|
||
{ prompt: params.prompt },
|
||
hookCtx,
|
||
);
|
||
modelResolveOverride = {
|
||
providerOverride:
|
||
modelResolveOverride?.providerOverride ??
|
||
legacyBeforeAgentStartResult?.providerOverride,
|
||
modelOverride:
|
||
modelResolveOverride?.modelOverride ?? legacyBeforeAgentStartResult?.modelOverride,
|
||
};
|
||
} catch (hookErr) {
|
||
log.warn(
|
||
`before_agent_start hook (legacy model resolve path) failed: ${String(hookErr)}`,
|
||
);
|
||
}
|
||
}
|
||
if (modelResolveOverride?.providerOverride) {
|
||
provider = modelResolveOverride.providerOverride;
|
||
log.info(`[hooks] provider overridden to ${provider}`);
|
||
}
|
||
if (modelResolveOverride?.modelOverride) {
|
||
modelId = modelResolveOverride.modelOverride;
|
||
log.info(`[hooks] model overridden to ${modelId}`);
|
||
}
|
||
|
||
const { model, error, authStorage, modelRegistry } = resolveModel(
|
||
provider,
|
||
modelId,
|
||
agentDir,
|
||
params.config,
|
||
);
|
||
if (!model) {
|
||
throw new FailoverError(error ?? `Unknown model: ${provider}/${modelId}`, {
|
||
reason: "model_not_found",
|
||
provider,
|
||
model: modelId,
|
||
});
|
||
}
|
||
|
||
const ctxInfo = resolveContextWindowInfo({
|
||
cfg: params.config,
|
||
provider,
|
||
modelId,
|
||
modelContextWindow: model.contextWindow,
|
||
defaultTokens: DEFAULT_CONTEXT_TOKENS,
|
||
});
|
||
// Apply contextTokens cap to model so pi-coding-agent's auto-compaction
|
||
// threshold uses the effective limit, not the native context window.
|
||
const effectiveModel =
|
||
ctxInfo.tokens < (model.contextWindow ?? Infinity)
|
||
? { ...model, contextWindow: ctxInfo.tokens }
|
||
: model;
|
||
const ctxGuard = evaluateContextWindowGuard({
|
||
info: ctxInfo,
|
||
warnBelowTokens: CONTEXT_WINDOW_WARN_BELOW_TOKENS,
|
||
hardMinTokens: CONTEXT_WINDOW_HARD_MIN_TOKENS,
|
||
});
|
||
if (ctxGuard.shouldWarn) {
|
||
log.warn(
|
||
`low context window: ${provider}/${modelId} ctx=${ctxGuard.tokens} (warn<${CONTEXT_WINDOW_WARN_BELOW_TOKENS}) source=${ctxGuard.source}`,
|
||
);
|
||
}
|
||
if (ctxGuard.shouldBlock) {
|
||
log.error(
|
||
`blocked model (context window too small): ${provider}/${modelId} ctx=${ctxGuard.tokens} (min=${CONTEXT_WINDOW_HARD_MIN_TOKENS}) source=${ctxGuard.source}`,
|
||
);
|
||
throw new FailoverError(
|
||
`Model context window too small (${ctxGuard.tokens} tokens). Minimum is ${CONTEXT_WINDOW_HARD_MIN_TOKENS}.`,
|
||
{ reason: "unknown", provider, model: modelId },
|
||
);
|
||
}
|
||
|
||
const authStore = ensureAuthProfileStore(agentDir, { allowKeychainPrompt: false });
|
||
const preferredProfileId = params.authProfileId?.trim();
|
||
let lockedProfileId = params.authProfileIdSource === "user" ? preferredProfileId : undefined;
|
||
if (lockedProfileId) {
|
||
const lockedProfile = authStore.profiles[lockedProfileId];
|
||
if (
|
||
!lockedProfile ||
|
||
normalizeProviderId(lockedProfile.provider) !== normalizeProviderId(provider)
|
||
) {
|
||
lockedProfileId = undefined;
|
||
}
|
||
}
|
||
const profileOrder = resolveAuthProfileOrder({
|
||
cfg: params.config,
|
||
store: authStore,
|
||
provider,
|
||
preferredProfile: preferredProfileId,
|
||
});
|
||
if (lockedProfileId && !profileOrder.includes(lockedProfileId)) {
|
||
throw new Error(`Auth profile "${lockedProfileId}" is not configured for ${provider}.`);
|
||
}
|
||
const profileCandidates = lockedProfileId
|
||
? [lockedProfileId]
|
||
: profileOrder.length > 0
|
||
? profileOrder
|
||
: [undefined];
|
||
let profileIndex = 0;
|
||
|
||
const initialThinkLevel = params.thinkLevel ?? "off";
|
||
let thinkLevel = initialThinkLevel;
|
||
const attemptedThinking = new Set<ThinkLevel>();
|
||
let apiKeyInfo: ApiKeyInfo | null = null;
|
||
let lastProfileId: string | undefined;
|
||
const copilotTokenState: CopilotTokenState | null =
|
||
model.provider === "github-copilot" ? { githubToken: "", expiresAt: 0 } : null;
|
||
let copilotRefreshCancelled = false;
|
||
const hasCopilotGithubToken = () => Boolean(copilotTokenState?.githubToken.trim());
|
||
|
||
const clearCopilotRefreshTimer = () => {
|
||
if (!copilotTokenState?.refreshTimer) {
|
||
return;
|
||
}
|
||
clearTimeout(copilotTokenState.refreshTimer);
|
||
copilotTokenState.refreshTimer = undefined;
|
||
};
|
||
|
||
const stopCopilotRefreshTimer = () => {
|
||
if (!copilotTokenState) {
|
||
return;
|
||
}
|
||
copilotRefreshCancelled = true;
|
||
clearCopilotRefreshTimer();
|
||
};
|
||
|
||
const refreshCopilotToken = async (reason: string): Promise<void> => {
|
||
if (!copilotTokenState) {
|
||
return;
|
||
}
|
||
if (copilotTokenState.refreshInFlight) {
|
||
await copilotTokenState.refreshInFlight;
|
||
return;
|
||
}
|
||
const { resolveCopilotApiToken } = await import("../../providers/github-copilot-token.js");
|
||
copilotTokenState.refreshInFlight = (async () => {
|
||
const githubToken = copilotTokenState.githubToken.trim();
|
||
if (!githubToken) {
|
||
throw new Error("Copilot refresh requires a GitHub token.");
|
||
}
|
||
log.debug(`Refreshing GitHub Copilot token (${reason})...`);
|
||
const copilotToken = await resolveCopilotApiToken({
|
||
githubToken,
|
||
});
|
||
authStorage.setRuntimeApiKey(model.provider, copilotToken.token);
|
||
copilotTokenState.expiresAt = copilotToken.expiresAt;
|
||
const remaining = copilotToken.expiresAt - Date.now();
|
||
log.debug(
|
||
`Copilot token refreshed; expires in ${Math.max(0, Math.floor(remaining / 1000))}s.`,
|
||
);
|
||
})()
|
||
.catch((err) => {
|
||
log.warn(`Copilot token refresh failed: ${describeUnknownError(err)}`);
|
||
throw err;
|
||
})
|
||
.finally(() => {
|
||
copilotTokenState.refreshInFlight = undefined;
|
||
});
|
||
await copilotTokenState.refreshInFlight;
|
||
};
|
||
|
||
const scheduleCopilotRefresh = (): void => {
|
||
if (!copilotTokenState || copilotRefreshCancelled) {
|
||
return;
|
||
}
|
||
if (!hasCopilotGithubToken()) {
|
||
log.warn("Skipping Copilot refresh scheduling; GitHub token missing.");
|
||
return;
|
||
}
|
||
clearCopilotRefreshTimer();
|
||
const now = Date.now();
|
||
const refreshAt = copilotTokenState.expiresAt - COPILOT_REFRESH_MARGIN_MS;
|
||
const delayMs = Math.max(COPILOT_REFRESH_MIN_DELAY_MS, refreshAt - now);
|
||
const timer = setTimeout(() => {
|
||
if (copilotRefreshCancelled) {
|
||
return;
|
||
}
|
||
refreshCopilotToken("scheduled")
|
||
.then(() => scheduleCopilotRefresh())
|
||
.catch(() => {
|
||
if (copilotRefreshCancelled) {
|
||
return;
|
||
}
|
||
const retryTimer = setTimeout(() => {
|
||
if (copilotRefreshCancelled) {
|
||
return;
|
||
}
|
||
refreshCopilotToken("scheduled-retry")
|
||
.then(() => scheduleCopilotRefresh())
|
||
.catch(() => undefined);
|
||
}, COPILOT_REFRESH_RETRY_MS);
|
||
copilotTokenState.refreshTimer = retryTimer;
|
||
if (copilotRefreshCancelled) {
|
||
clearTimeout(retryTimer);
|
||
copilotTokenState.refreshTimer = undefined;
|
||
}
|
||
});
|
||
}, delayMs);
|
||
copilotTokenState.refreshTimer = timer;
|
||
if (copilotRefreshCancelled) {
|
||
clearTimeout(timer);
|
||
copilotTokenState.refreshTimer = undefined;
|
||
}
|
||
};
|
||
|
||
const resolveAuthProfileFailoverReason = (params: {
|
||
allInCooldown: boolean;
|
||
message: string;
|
||
profileIds?: Array<string | undefined>;
|
||
}): FailoverReason => {
|
||
if (params.allInCooldown) {
|
||
const profileIds = (params.profileIds ?? profileCandidates).filter(
|
||
(id): id is string => typeof id === "string" && id.length > 0,
|
||
);
|
||
return (
|
||
resolveProfilesUnavailableReason({
|
||
store: authStore,
|
||
profileIds,
|
||
}) ?? "rate_limit"
|
||
);
|
||
}
|
||
const classified = classifyFailoverReason(params.message);
|
||
return classified ?? "auth";
|
||
};
|
||
|
||
const throwAuthProfileFailover = (params: {
|
||
allInCooldown: boolean;
|
||
message?: string;
|
||
error?: unknown;
|
||
}): never => {
|
||
const fallbackMessage = `No available auth profile for ${provider} (all in cooldown or unavailable).`;
|
||
const message =
|
||
params.message?.trim() ||
|
||
(params.error ? describeUnknownError(params.error).trim() : "") ||
|
||
fallbackMessage;
|
||
const reason = resolveAuthProfileFailoverReason({
|
||
allInCooldown: params.allInCooldown,
|
||
message,
|
||
profileIds: profileCandidates,
|
||
});
|
||
if (fallbackConfigured) {
|
||
throw new FailoverError(message, {
|
||
reason,
|
||
provider,
|
||
model: modelId,
|
||
status: resolveFailoverStatus(reason),
|
||
cause: params.error,
|
||
});
|
||
}
|
||
if (params.error instanceof Error) {
|
||
throw params.error;
|
||
}
|
||
throw new Error(message);
|
||
};
|
||
|
||
const resolveApiKeyForCandidate = async (candidate?: string) => {
|
||
return getApiKeyForModel({
|
||
model,
|
||
cfg: params.config,
|
||
profileId: candidate,
|
||
store: authStore,
|
||
agentDir,
|
||
});
|
||
};
|
||
|
||
const applyApiKeyInfo = async (candidate?: string): Promise<void> => {
|
||
apiKeyInfo = await resolveApiKeyForCandidate(candidate);
|
||
const resolvedProfileId = apiKeyInfo.profileId ?? candidate;
|
||
if (!apiKeyInfo.apiKey) {
|
||
if (apiKeyInfo.mode !== "aws-sdk") {
|
||
throw new Error(
|
||
`No API key resolved for provider "${model.provider}" (auth mode: ${apiKeyInfo.mode}).`,
|
||
);
|
||
}
|
||
lastProfileId = resolvedProfileId;
|
||
return;
|
||
}
|
||
if (model.provider === "github-copilot") {
|
||
const { resolveCopilotApiToken } =
|
||
await import("../../providers/github-copilot-token.js");
|
||
const copilotToken = await resolveCopilotApiToken({
|
||
githubToken: apiKeyInfo.apiKey,
|
||
});
|
||
authStorage.setRuntimeApiKey(model.provider, copilotToken.token);
|
||
if (copilotTokenState) {
|
||
copilotTokenState.githubToken = apiKeyInfo.apiKey;
|
||
copilotTokenState.expiresAt = copilotToken.expiresAt;
|
||
scheduleCopilotRefresh();
|
||
}
|
||
} else {
|
||
authStorage.setRuntimeApiKey(model.provider, apiKeyInfo.apiKey);
|
||
}
|
||
lastProfileId = apiKeyInfo.profileId;
|
||
};
|
||
|
||
const advanceAuthProfile = async (): Promise<boolean> => {
|
||
if (lockedProfileId) {
|
||
return false;
|
||
}
|
||
let nextIndex = profileIndex + 1;
|
||
while (nextIndex < profileCandidates.length) {
|
||
const candidate = profileCandidates[nextIndex];
|
||
if (candidate && isProfileInCooldown(authStore, candidate)) {
|
||
nextIndex += 1;
|
||
continue;
|
||
}
|
||
try {
|
||
await applyApiKeyInfo(candidate);
|
||
profileIndex = nextIndex;
|
||
thinkLevel = initialThinkLevel;
|
||
attemptedThinking.clear();
|
||
return true;
|
||
} catch (err) {
|
||
if (candidate && candidate === lockedProfileId) {
|
||
throw err;
|
||
}
|
||
nextIndex += 1;
|
||
}
|
||
}
|
||
return false;
|
||
};
|
||
|
||
try {
|
||
const autoProfileCandidates = profileCandidates.filter(
|
||
(candidate): candidate is string =>
|
||
typeof candidate === "string" && candidate.length > 0 && candidate !== lockedProfileId,
|
||
);
|
||
const allAutoProfilesInCooldown =
|
||
autoProfileCandidates.length > 0 &&
|
||
autoProfileCandidates.every((candidate) => isProfileInCooldown(authStore, candidate));
|
||
const unavailableReason = allAutoProfilesInCooldown
|
||
? (resolveProfilesUnavailableReason({
|
||
store: authStore,
|
||
profileIds: autoProfileCandidates,
|
||
}) ?? "rate_limit")
|
||
: null;
|
||
const allowTransientCooldownProbe =
|
||
params.allowTransientCooldownProbe === true &&
|
||
allAutoProfilesInCooldown &&
|
||
(unavailableReason === "rate_limit" || unavailableReason === "overloaded");
|
||
let didTransientCooldownProbe = false;
|
||
|
||
while (profileIndex < profileCandidates.length) {
|
||
const candidate = profileCandidates[profileIndex];
|
||
const inCooldown =
|
||
candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
|
||
if (inCooldown) {
|
||
if (allowTransientCooldownProbe && !didTransientCooldownProbe) {
|
||
didTransientCooldownProbe = true;
|
||
log.warn(
|
||
`probing cooldowned auth profile for ${provider}/${modelId} due to ${unavailableReason ?? "transient"} unavailability`,
|
||
);
|
||
} else {
|
||
profileIndex += 1;
|
||
continue;
|
||
}
|
||
}
|
||
await applyApiKeyInfo(profileCandidates[profileIndex]);
|
||
break;
|
||
}
|
||
if (profileIndex >= profileCandidates.length) {
|
||
throwAuthProfileFailover({ allInCooldown: true });
|
||
}
|
||
} catch (err) {
|
||
if (err instanceof FailoverError) {
|
||
throw err;
|
||
}
|
||
if (profileCandidates[profileIndex] === lockedProfileId) {
|
||
throwAuthProfileFailover({ allInCooldown: false, error: err });
|
||
}
|
||
const advanced = await advanceAuthProfile();
|
||
if (!advanced) {
|
||
throwAuthProfileFailover({ allInCooldown: false, error: err });
|
||
}
|
||
}
|
||
|
||
const maybeRefreshCopilotForAuthError = async (
|
||
errorText: string,
|
||
retried: boolean,
|
||
): Promise<boolean> => {
|
||
if (!copilotTokenState || retried) {
|
||
return false;
|
||
}
|
||
if (!isFailoverErrorMessage(errorText)) {
|
||
return false;
|
||
}
|
||
if (classifyFailoverReason(errorText) !== "auth") {
|
||
return false;
|
||
}
|
||
try {
|
||
await refreshCopilotToken("auth-error");
|
||
scheduleCopilotRefresh();
|
||
return true;
|
||
} catch {
|
||
return false;
|
||
}
|
||
};
|
||
|
||
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
|
||
const MAX_RUN_LOOP_ITERATIONS = resolveMaxRunRetryIterations(profileCandidates.length);
|
||
let overflowCompactionAttempts = 0;
|
||
let toolResultTruncationAttempted = false;
|
||
let bootstrapPromptWarningSignaturesSeen =
|
||
params.bootstrapPromptWarningSignaturesSeen ??
|
||
(params.bootstrapPromptWarningSignature ? [params.bootstrapPromptWarningSignature] : []);
|
||
const usageAccumulator = createUsageAccumulator();
|
||
let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
|
||
let autoCompactionCount = 0;
|
||
let runLoopIterations = 0;
|
||
let overloadFailoverAttempts = 0;
|
||
const maybeMarkAuthProfileFailure = async (failure: {
|
||
profileId?: string;
|
||
reason?: AuthProfileFailureReason | null;
|
||
config?: RunEmbeddedPiAgentParams["config"];
|
||
agentDir?: RunEmbeddedPiAgentParams["agentDir"];
|
||
}) => {
|
||
const { profileId, reason } = failure;
|
||
if (!profileId || !reason || reason === "timeout") {
|
||
return;
|
||
}
|
||
await markAuthProfileFailure({
|
||
store: authStore,
|
||
profileId,
|
||
reason,
|
||
cfg: params.config,
|
||
agentDir,
|
||
});
|
||
};
|
||
const resolveAuthProfileFailureReason = (
|
||
failoverReason: FailoverReason | null,
|
||
): AuthProfileFailureReason | null => {
|
||
// Timeouts are transport/model-path failures, not auth health signals,
|
||
// so they should not persist auth-profile failure state.
|
||
if (!failoverReason || failoverReason === "timeout") {
|
||
return null;
|
||
}
|
||
return failoverReason;
|
||
};
|
||
const maybeBackoffBeforeOverloadFailover = async (reason: FailoverReason | null) => {
|
||
if (reason !== "overloaded") {
|
||
return;
|
||
}
|
||
overloadFailoverAttempts += 1;
|
||
const delayMs = computeBackoff(OVERLOAD_FAILOVER_BACKOFF_POLICY, overloadFailoverAttempts);
|
||
log.warn(
|
||
`overload backoff before failover for ${provider}/${modelId}: attempt=${overloadFailoverAttempts} delayMs=${delayMs}`,
|
||
);
|
||
try {
|
||
await sleepWithAbort(delayMs, params.abortSignal);
|
||
} catch (err) {
|
||
if (params.abortSignal?.aborted) {
|
||
const abortErr = new Error("Operation aborted", { cause: err });
|
||
abortErr.name = "AbortError";
|
||
throw abortErr;
|
||
}
|
||
throw err;
|
||
}
|
||
};
|
||
// Resolve the context engine once and reuse across retries to avoid
|
||
// repeated initialization/connection overhead per attempt.
|
||
ensureContextEnginesInitialized();
|
||
const contextEngine = await resolveContextEngine(params.config);
|
||
try {
|
||
let authRetryPending = false;
|
||
// Hoisted so the retry-limit error path can use the most recent API total.
|
||
let lastTurnTotal: number | undefined;
|
||
while (true) {
|
||
if (runLoopIterations >= MAX_RUN_LOOP_ITERATIONS) {
|
||
const message =
|
||
`Exceeded retry limit after ${runLoopIterations} attempts ` +
|
||
`(max=${MAX_RUN_LOOP_ITERATIONS}).`;
|
||
log.error(
|
||
`[run-retry-limit] sessionKey=${params.sessionKey ?? params.sessionId} ` +
|
||
`provider=${provider}/${modelId} attempts=${runLoopIterations} ` +
|
||
`maxAttempts=${MAX_RUN_LOOP_ITERATIONS}`,
|
||
);
|
||
return {
|
||
payloads: [
|
||
{
|
||
text:
|
||
"Request failed after repeated internal retries. " +
|
||
"Please try again, or use /new to start a fresh session.",
|
||
isError: true,
|
||
},
|
||
],
|
||
meta: {
|
||
durationMs: Date.now() - started,
|
||
agentMeta: buildErrorAgentMeta({
|
||
sessionId: params.sessionId,
|
||
provider,
|
||
model: model.id,
|
||
usageAccumulator,
|
||
lastRunPromptUsage,
|
||
lastTurnTotal,
|
||
}),
|
||
error: { kind: "retry_limit", message },
|
||
},
|
||
};
|
||
}
|
||
runLoopIterations += 1;
|
||
const copilotAuthRetry = authRetryPending;
|
||
authRetryPending = false;
|
||
attemptedThinking.add(thinkLevel);
|
||
await fs.mkdir(resolvedWorkspace, { recursive: true });
|
||
|
||
const prompt =
|
||
provider === "anthropic" ? scrubAnthropicRefusalMagic(params.prompt) : params.prompt;
|
||
|
||
const attempt = await runEmbeddedAttempt({
|
||
sessionId: params.sessionId,
|
||
sessionKey: params.sessionKey,
|
||
trigger: params.trigger,
|
||
messageChannel: params.messageChannel,
|
||
messageProvider: params.messageProvider,
|
||
agentAccountId: params.agentAccountId,
|
||
messageTo: params.messageTo,
|
||
messageThreadId: params.messageThreadId,
|
||
groupId: params.groupId,
|
||
groupChannel: params.groupChannel,
|
||
groupSpace: params.groupSpace,
|
||
spawnedBy: params.spawnedBy,
|
||
senderId: params.senderId,
|
||
senderName: params.senderName,
|
||
senderUsername: params.senderUsername,
|
||
senderE164: params.senderE164,
|
||
senderIsOwner: params.senderIsOwner,
|
||
currentChannelId: params.currentChannelId,
|
||
currentThreadTs: params.currentThreadTs,
|
||
currentMessageId: params.currentMessageId,
|
||
replyToMode: params.replyToMode,
|
||
hasRepliedRef: params.hasRepliedRef,
|
||
sessionFile: params.sessionFile,
|
||
workspaceDir: resolvedWorkspace,
|
||
agentDir,
|
||
config: params.config,
|
||
contextEngine,
|
||
contextTokenBudget: ctxInfo.tokens,
|
||
skillsSnapshot: params.skillsSnapshot,
|
||
prompt,
|
||
images: params.images,
|
||
disableTools: params.disableTools,
|
||
provider,
|
||
modelId,
|
||
model: effectiveModel,
|
||
authProfileId: lastProfileId,
|
||
authProfileIdSource: lockedProfileId ? "user" : "auto",
|
||
authStorage,
|
||
modelRegistry,
|
||
agentId: workspaceResolution.agentId,
|
||
legacyBeforeAgentStartResult,
|
||
thinkLevel,
|
||
verboseLevel: params.verboseLevel,
|
||
reasoningLevel: params.reasoningLevel,
|
||
toolResultFormat: resolvedToolResultFormat,
|
||
execOverrides: params.execOverrides,
|
||
bashElevated: params.bashElevated,
|
||
timeoutMs: params.timeoutMs,
|
||
runId: params.runId,
|
||
abortSignal: params.abortSignal,
|
||
shouldEmitToolResult: params.shouldEmitToolResult,
|
||
shouldEmitToolOutput: params.shouldEmitToolOutput,
|
||
onPartialReply: params.onPartialReply,
|
||
onAssistantMessageStart: params.onAssistantMessageStart,
|
||
onBlockReply: params.onBlockReply,
|
||
onBlockReplyFlush: params.onBlockReplyFlush,
|
||
blockReplyBreak: params.blockReplyBreak,
|
||
blockReplyChunking: params.blockReplyChunking,
|
||
onReasoningStream: params.onReasoningStream,
|
||
onReasoningEnd: params.onReasoningEnd,
|
||
onToolResult: params.onToolResult,
|
||
onAgentEvent: params.onAgentEvent,
|
||
extraSystemPrompt: params.extraSystemPrompt,
|
||
inputProvenance: params.inputProvenance,
|
||
streamParams: params.streamParams,
|
||
ownerNumbers: params.ownerNumbers,
|
||
enforceFinalTag: params.enforceFinalTag,
|
||
bootstrapPromptWarningSignaturesSeen,
|
||
bootstrapPromptWarningSignature:
|
||
bootstrapPromptWarningSignaturesSeen[bootstrapPromptWarningSignaturesSeen.length - 1],
|
||
});
|
||
|
||
const {
|
||
aborted,
|
||
promptError,
|
||
timedOut,
|
||
timedOutDuringCompaction,
|
||
sessionIdUsed,
|
||
lastAssistant,
|
||
} = attempt;
|
||
bootstrapPromptWarningSignaturesSeen =
|
||
attempt.bootstrapPromptWarningSignaturesSeen ??
|
||
(attempt.bootstrapPromptWarningSignature
|
||
? Array.from(
|
||
new Set([
|
||
...bootstrapPromptWarningSignaturesSeen,
|
||
attempt.bootstrapPromptWarningSignature,
|
||
]),
|
||
)
|
||
: bootstrapPromptWarningSignaturesSeen);
|
||
const lastAssistantUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
|
||
const attemptUsage = attempt.attemptUsage ?? lastAssistantUsage;
|
||
mergeUsageIntoAccumulator(usageAccumulator, attemptUsage);
|
||
// Keep prompt size from the latest model call so session totalTokens
|
||
// reflects current context usage, not accumulated tool-loop usage.
|
||
lastRunPromptUsage = lastAssistantUsage ?? attemptUsage;
|
||
lastTurnTotal = lastAssistantUsage?.total ?? attemptUsage?.total;
|
||
const attemptCompactionCount = Math.max(0, attempt.compactionCount ?? 0);
|
||
autoCompactionCount += attemptCompactionCount;
|
||
const activeErrorContext = resolveActiveErrorContext({
|
||
lastAssistant,
|
||
provider,
|
||
model: modelId,
|
||
});
|
||
const formattedAssistantErrorText = lastAssistant
|
||
? formatAssistantErrorText(lastAssistant, {
|
||
cfg: params.config,
|
||
sessionKey: params.sessionKey ?? params.sessionId,
|
||
provider: activeErrorContext.provider,
|
||
model: activeErrorContext.model,
|
||
})
|
||
: undefined;
|
||
const assistantErrorText =
|
||
lastAssistant?.stopReason === "error"
|
||
? lastAssistant.errorMessage?.trim() || formattedAssistantErrorText
|
||
: undefined;
|
||
|
||
const contextOverflowError = !aborted
|
||
? (() => {
|
||
if (promptError) {
|
||
const errorText = describeUnknownError(promptError);
|
||
if (isLikelyContextOverflowError(errorText)) {
|
||
return { text: errorText, source: "promptError" as const };
|
||
}
|
||
// Prompt submission failed with a non-overflow error. Do not
|
||
// inspect prior assistant errors from history for this attempt.
|
||
return null;
|
||
}
|
||
if (assistantErrorText && isLikelyContextOverflowError(assistantErrorText)) {
|
||
return { text: assistantErrorText, source: "assistantError" as const };
|
||
}
|
||
return null;
|
||
})()
|
||
: null;
|
||
|
||
if (contextOverflowError) {
|
||
const overflowDiagId = createCompactionDiagId();
|
||
const errorText = contextOverflowError.text;
|
||
const msgCount = attempt.messagesSnapshot?.length ?? 0;
|
||
log.warn(
|
||
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
|
||
`provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
|
||
`messages=${msgCount} sessionFile=${params.sessionFile} ` +
|
||
`diagId=${overflowDiagId} compactionAttempts=${overflowCompactionAttempts} ` +
|
||
`error=${errorText.slice(0, 200)}`,
|
||
);
|
||
const isCompactionFailure = isCompactionFailureError(errorText);
|
||
const hadAttemptLevelCompaction = attemptCompactionCount > 0;
|
||
// If this attempt already compacted (SDK auto-compaction), avoid immediately
|
||
// running another explicit compaction for the same overflow trigger.
|
||
if (
|
||
!isCompactionFailure &&
|
||
hadAttemptLevelCompaction &&
|
||
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
|
||
) {
|
||
overflowCompactionAttempts++;
|
||
log.warn(
|
||
`context overflow persisted after in-attempt compaction (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); retrying prompt without additional compaction for ${provider}/${modelId}`,
|
||
);
|
||
continue;
|
||
}
|
||
// Attempt explicit overflow compaction only when this attempt did not
|
||
// already auto-compact.
|
||
if (
|
||
!isCompactionFailure &&
|
||
!hadAttemptLevelCompaction &&
|
||
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
|
||
) {
|
||
if (log.isEnabled("debug")) {
|
||
log.debug(
|
||
`[compaction-diag] decision diagId=${overflowDiagId} branch=compact ` +
|
||
`isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=unknown ` +
|
||
`attempt=${overflowCompactionAttempts + 1} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
|
||
);
|
||
}
|
||
overflowCompactionAttempts++;
|
||
log.warn(
|
||
`context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`,
|
||
);
|
||
const compactResult = await contextEngine.compact({
|
||
sessionId: params.sessionId,
|
||
sessionFile: params.sessionFile,
|
||
tokenBudget: ctxInfo.tokens,
|
||
force: true,
|
||
compactionTarget: "budget",
|
||
legacyParams: {
|
||
sessionKey: params.sessionKey,
|
||
messageChannel: params.messageChannel,
|
||
messageProvider: params.messageProvider,
|
||
agentAccountId: params.agentAccountId,
|
||
authProfileId: lastProfileId,
|
||
workspaceDir: resolvedWorkspace,
|
||
agentDir,
|
||
config: params.config,
|
||
skillsSnapshot: params.skillsSnapshot,
|
||
senderIsOwner: params.senderIsOwner,
|
||
provider,
|
||
model: modelId,
|
||
runId: params.runId,
|
||
thinkLevel,
|
||
reasoningLevel: params.reasoningLevel,
|
||
bashElevated: params.bashElevated,
|
||
extraSystemPrompt: params.extraSystemPrompt,
|
||
ownerNumbers: params.ownerNumbers,
|
||
trigger: "overflow",
|
||
diagId: overflowDiagId,
|
||
attempt: overflowCompactionAttempts,
|
||
maxAttempts: MAX_OVERFLOW_COMPACTION_ATTEMPTS,
|
||
},
|
||
});
|
||
if (compactResult.compacted) {
|
||
autoCompactionCount += 1;
|
||
log.info(`auto-compaction succeeded for ${provider}/${modelId}; retrying prompt`);
|
||
continue;
|
||
}
|
||
log.warn(
|
||
`auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
|
||
);
|
||
}
|
||
// Fallback: try truncating oversized tool results in the session.
|
||
// This handles the case where a single tool result exceeds the
|
||
// context window and compaction cannot reduce it further.
|
||
if (!toolResultTruncationAttempted) {
|
||
const contextWindowTokens = ctxInfo.tokens;
|
||
const hasOversized = attempt.messagesSnapshot
|
||
? sessionLikelyHasOversizedToolResults({
|
||
messages: attempt.messagesSnapshot,
|
||
contextWindowTokens,
|
||
})
|
||
: false;
|
||
|
||
if (hasOversized) {
|
||
if (log.isEnabled("debug")) {
|
||
log.debug(
|
||
`[compaction-diag] decision diagId=${overflowDiagId} branch=truncate_tool_results ` +
|
||
`isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=${hasOversized} ` +
|
||
`attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
|
||
);
|
||
}
|
||
toolResultTruncationAttempted = true;
|
||
log.warn(
|
||
`[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
|
||
`(contextWindow=${contextWindowTokens} tokens)`,
|
||
);
|
||
const truncResult = await truncateOversizedToolResultsInSession({
|
||
sessionFile: params.sessionFile,
|
||
contextWindowTokens,
|
||
sessionId: params.sessionId,
|
||
sessionKey: params.sessionKey,
|
||
});
|
||
if (truncResult.truncated) {
|
||
log.info(
|
||
`[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
|
||
);
|
||
// Do NOT reset overflowCompactionAttempts here — the global cap must remain
|
||
// enforced across all iterations to prevent unbounded compaction cycles (OC-65).
|
||
continue;
|
||
}
|
||
log.warn(
|
||
`[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
|
||
);
|
||
} else if (log.isEnabled("debug")) {
|
||
log.debug(
|
||
`[compaction-diag] decision diagId=${overflowDiagId} branch=give_up ` +
|
||
`isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=${hasOversized} ` +
|
||
`attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
|
||
);
|
||
}
|
||
}
|
||
if (
|
||
(isCompactionFailure ||
|
||
overflowCompactionAttempts >= MAX_OVERFLOW_COMPACTION_ATTEMPTS ||
|
||
toolResultTruncationAttempted) &&
|
||
log.isEnabled("debug")
|
||
) {
|
||
log.debug(
|
||
`[compaction-diag] decision diagId=${overflowDiagId} branch=give_up ` +
|
||
`isCompactionFailure=${isCompactionFailure} hasOversizedToolResults=unknown ` +
|
||
`attempt=${overflowCompactionAttempts} maxAttempts=${MAX_OVERFLOW_COMPACTION_ATTEMPTS}`,
|
||
);
|
||
}
|
||
const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
|
||
return {
|
||
payloads: [
|
||
{
|
||
text:
|
||
"Context overflow: prompt too large for the model. " +
|
||
"Try /reset (or /new) to start a fresh session, or use a larger-context model.",
|
||
isError: true,
|
||
},
|
||
],
|
||
meta: {
|
||
durationMs: Date.now() - started,
|
||
agentMeta: buildErrorAgentMeta({
|
||
sessionId: sessionIdUsed,
|
||
provider,
|
||
model: model.id,
|
||
usageAccumulator,
|
||
lastRunPromptUsage,
|
||
lastAssistant,
|
||
lastTurnTotal,
|
||
}),
|
||
systemPromptReport: attempt.systemPromptReport,
|
||
error: { kind, message: errorText },
|
||
},
|
||
};
|
||
}
|
||
|
||
if (promptError && !aborted) {
|
||
const errorText = describeUnknownError(promptError);
|
||
if (await maybeRefreshCopilotForAuthError(errorText, copilotAuthRetry)) {
|
||
authRetryPending = true;
|
||
continue;
|
||
}
|
||
// Handle role ordering errors with a user-friendly message
|
||
if (/incorrect role information|roles must alternate/i.test(errorText)) {
|
||
return {
|
||
payloads: [
|
||
{
|
||
text:
|
||
"Message ordering conflict - please try again. " +
|
||
"If this persists, use /new to start a fresh session.",
|
||
isError: true,
|
||
},
|
||
],
|
||
meta: {
|
||
durationMs: Date.now() - started,
|
||
agentMeta: buildErrorAgentMeta({
|
||
sessionId: sessionIdUsed,
|
||
provider,
|
||
model: model.id,
|
||
usageAccumulator,
|
||
lastRunPromptUsage,
|
||
lastAssistant,
|
||
lastTurnTotal,
|
||
}),
|
||
systemPromptReport: attempt.systemPromptReport,
|
||
error: { kind: "role_ordering", message: errorText },
|
||
},
|
||
};
|
||
}
|
||
// Handle image size errors with a user-friendly message (no retry needed)
|
||
const imageSizeError = parseImageSizeError(errorText);
|
||
if (imageSizeError) {
|
||
const maxMb = imageSizeError.maxMb;
|
||
const maxMbLabel =
|
||
typeof maxMb === "number" && Number.isFinite(maxMb) ? `${maxMb}` : null;
|
||
const maxBytesHint = maxMbLabel ? ` (max ${maxMbLabel}MB)` : "";
|
||
return {
|
||
payloads: [
|
||
{
|
||
text:
|
||
`Image too large for the model${maxBytesHint}. ` +
|
||
"Please compress or resize the image and try again.",
|
||
isError: true,
|
||
},
|
||
],
|
||
meta: {
|
||
durationMs: Date.now() - started,
|
||
agentMeta: buildErrorAgentMeta({
|
||
sessionId: sessionIdUsed,
|
||
provider,
|
||
model: model.id,
|
||
usageAccumulator,
|
||
lastRunPromptUsage,
|
||
lastAssistant,
|
||
lastTurnTotal,
|
||
}),
|
||
systemPromptReport: attempt.systemPromptReport,
|
||
error: { kind: "image_size", message: errorText },
|
||
},
|
||
};
|
||
}
|
||
const promptFailoverReason = classifyFailoverReason(errorText);
|
||
const promptProfileFailureReason =
|
||
resolveAuthProfileFailureReason(promptFailoverReason);
|
||
await maybeMarkAuthProfileFailure({
|
||
profileId: lastProfileId,
|
||
reason: promptProfileFailureReason,
|
||
});
|
||
const promptFailoverFailure = isFailoverErrorMessage(errorText);
|
||
if (
|
||
promptFailoverFailure &&
|
||
promptFailoverReason !== "timeout" &&
|
||
(await advanceAuthProfile())
|
||
) {
|
||
await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
|
||
continue;
|
||
}
|
||
const fallbackThinking = pickFallbackThinkingLevel({
|
||
message: errorText,
|
||
attempted: attemptedThinking,
|
||
});
|
||
if (fallbackThinking) {
|
||
log.warn(
|
||
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
|
||
);
|
||
thinkLevel = fallbackThinking;
|
||
continue;
|
||
}
|
||
// Throw FailoverError for prompt-side failover reasons when fallbacks
|
||
// are configured so outer model fallback can continue on overload,
|
||
// rate-limit, auth, or billing failures.
|
||
if (fallbackConfigured && promptFailoverFailure) {
|
||
await maybeBackoffBeforeOverloadFailover(promptFailoverReason);
|
||
throw new FailoverError(errorText, {
|
||
reason: promptFailoverReason ?? "unknown",
|
||
provider,
|
||
model: modelId,
|
||
profileId: lastProfileId,
|
||
status: resolveFailoverStatus(promptFailoverReason ?? "unknown"),
|
||
});
|
||
}
|
||
throw promptError;
|
||
}
|
||
|
||
const fallbackThinking = pickFallbackThinkingLevel({
|
||
message: lastAssistant?.errorMessage,
|
||
attempted: attemptedThinking,
|
||
});
|
||
if (fallbackThinking && !aborted) {
|
||
log.warn(
|
||
`unsupported thinking level for ${provider}/${modelId}; retrying with ${fallbackThinking}`,
|
||
);
|
||
thinkLevel = fallbackThinking;
|
||
continue;
|
||
}
|
||
|
||
const authFailure = isAuthAssistantError(lastAssistant);
|
||
const rateLimitFailure = isRateLimitAssistantError(lastAssistant);
|
||
const billingFailure = isBillingAssistantError(lastAssistant);
|
||
const failoverFailure = isFailoverAssistantError(lastAssistant);
|
||
const assistantFailoverReason = classifyFailoverReason(lastAssistant?.errorMessage ?? "");
|
||
const assistantProfileFailureReason =
|
||
resolveAuthProfileFailureReason(assistantFailoverReason);
|
||
const cloudCodeAssistFormatError = attempt.cloudCodeAssistFormatError;
|
||
const imageDimensionError = parseImageDimensionError(lastAssistant?.errorMessage ?? "");
|
||
|
||
if (
|
||
authFailure &&
|
||
(await maybeRefreshCopilotForAuthError(
|
||
lastAssistant?.errorMessage ?? "",
|
||
copilotAuthRetry,
|
||
))
|
||
) {
|
||
authRetryPending = true;
|
||
continue;
|
||
}
|
||
if (imageDimensionError && lastProfileId) {
|
||
const details = [
|
||
imageDimensionError.messageIndex !== undefined
|
||
? `message=${imageDimensionError.messageIndex}`
|
||
: null,
|
||
imageDimensionError.contentIndex !== undefined
|
||
? `content=${imageDimensionError.contentIndex}`
|
||
: null,
|
||
imageDimensionError.maxDimensionPx !== undefined
|
||
? `limit=${imageDimensionError.maxDimensionPx}px`
|
||
: null,
|
||
]
|
||
.filter(Boolean)
|
||
.join(" ");
|
||
log.warn(
|
||
`Profile ${lastProfileId} rejected image payload${details ? ` (${details})` : ""}.`,
|
||
);
|
||
}
|
||
|
||
// Rotate on timeout to try another account/model path in this turn,
|
||
// but exclude post-prompt compaction timeouts (model succeeded; no profile issue).
|
||
const shouldRotate =
|
||
(!aborted && failoverFailure) || (timedOut && !timedOutDuringCompaction);
|
||
|
||
if (shouldRotate) {
|
||
if (lastProfileId) {
|
||
const reason = timedOut ? "timeout" : assistantProfileFailureReason;
|
||
// Skip cooldown for timeouts: a timeout is model/network-specific,
|
||
// not an auth issue. Marking the profile would poison fallback models
|
||
// on the same provider (e.g. gpt-5.3 timeout blocks gpt-5.2).
|
||
await maybeMarkAuthProfileFailure({
|
||
profileId: lastProfileId,
|
||
reason,
|
||
});
|
||
if (timedOut && !isProbeSession) {
|
||
log.warn(`Profile ${lastProfileId} timed out. Trying next account...`);
|
||
}
|
||
if (cloudCodeAssistFormatError) {
|
||
log.warn(
|
||
`Profile ${lastProfileId} hit Cloud Code Assist format error. Tool calls will be sanitized on retry.`,
|
||
);
|
||
}
|
||
}
|
||
|
||
const rotated = await advanceAuthProfile();
|
||
if (rotated) {
|
||
await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
|
||
continue;
|
||
}
|
||
|
||
if (fallbackConfigured) {
|
||
await maybeBackoffBeforeOverloadFailover(assistantFailoverReason);
|
||
// Prefer formatted error message (user-friendly) over raw errorMessage
|
||
const message =
|
||
(lastAssistant
|
||
? formatAssistantErrorText(lastAssistant, {
|
||
cfg: params.config,
|
||
sessionKey: params.sessionKey ?? params.sessionId,
|
||
provider: activeErrorContext.provider,
|
||
model: activeErrorContext.model,
|
||
})
|
||
: undefined) ||
|
||
lastAssistant?.errorMessage?.trim() ||
|
||
(timedOut
|
||
? "LLM request timed out."
|
||
: rateLimitFailure
|
||
? "LLM request rate limited."
|
||
: billingFailure
|
||
? formatBillingErrorMessage(
|
||
activeErrorContext.provider,
|
||
activeErrorContext.model,
|
||
)
|
||
: authFailure
|
||
? "LLM request unauthorized."
|
||
: "LLM request failed.");
|
||
const status =
|
||
resolveFailoverStatus(assistantFailoverReason ?? "unknown") ??
|
||
(isTimeoutErrorMessage(message) ? 408 : undefined);
|
||
throw new FailoverError(message, {
|
||
reason: assistantFailoverReason ?? "unknown",
|
||
provider: activeErrorContext.provider,
|
||
model: activeErrorContext.model,
|
||
profileId: lastProfileId,
|
||
status,
|
||
});
|
||
}
|
||
}
|
||
|
||
const usage = toNormalizedUsage(usageAccumulator);
|
||
if (usage && lastTurnTotal && lastTurnTotal > 0) {
|
||
usage.total = lastTurnTotal;
|
||
}
|
||
// Extract the last individual API call's usage for context-window
|
||
// utilization display. The accumulated `usage` sums input tokens
|
||
// across all calls (tool-use loops, compaction retries), which
|
||
// overstates the actual context size. `lastCallUsage` reflects only
|
||
// the final call, giving an accurate snapshot of current context.
|
||
const lastCallUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
|
||
const promptTokens = derivePromptTokens(lastRunPromptUsage);
|
||
const agentMeta: EmbeddedPiAgentMeta = {
|
||
sessionId: sessionIdUsed,
|
||
provider: lastAssistant?.provider ?? provider,
|
||
model: lastAssistant?.model ?? model.id,
|
||
usage,
|
||
lastCallUsage: lastCallUsage ?? undefined,
|
||
promptTokens,
|
||
compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined,
|
||
};
|
||
|
||
const payloads = buildEmbeddedRunPayloads({
|
||
assistantTexts: attempt.assistantTexts,
|
||
toolMetas: attempt.toolMetas,
|
||
lastAssistant: attempt.lastAssistant,
|
||
lastToolError: attempt.lastToolError,
|
||
config: params.config,
|
||
sessionKey: params.sessionKey ?? params.sessionId,
|
||
provider: activeErrorContext.provider,
|
||
model: activeErrorContext.model,
|
||
verboseLevel: params.verboseLevel,
|
||
reasoningLevel: params.reasoningLevel,
|
||
toolResultFormat: resolvedToolResultFormat,
|
||
suppressToolErrorWarnings: params.suppressToolErrorWarnings,
|
||
inlineToolResultsAllowed: false,
|
||
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
|
||
});
|
||
|
||
// Timeout aborts can leave the run without any assistant payloads.
|
||
// Emit an explicit timeout error instead of silently completing, so
|
||
// callers do not lose the turn as an orphaned user message.
|
||
if (timedOut && !timedOutDuringCompaction && payloads.length === 0) {
|
||
return {
|
||
payloads: [
|
||
{
|
||
text:
|
||
"Request timed out before a response was generated. " +
|
||
"Please try again, or increase `agents.defaults.timeoutSeconds` in your config.",
|
||
isError: true,
|
||
},
|
||
],
|
||
meta: {
|
||
durationMs: Date.now() - started,
|
||
agentMeta,
|
||
aborted,
|
||
systemPromptReport: attempt.systemPromptReport,
|
||
},
|
||
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
|
||
messagingToolSentTexts: attempt.messagingToolSentTexts,
|
||
messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
|
||
messagingToolSentTargets: attempt.messagingToolSentTargets,
|
||
successfulCronAdds: attempt.successfulCronAdds,
|
||
};
|
||
}
|
||
|
||
log.debug(
|
||
`embedded run done: runId=${params.runId} sessionId=${params.sessionId} durationMs=${Date.now() - started} aborted=${aborted}`,
|
||
);
|
||
if (lastProfileId) {
|
||
await markAuthProfileGood({
|
||
store: authStore,
|
||
provider,
|
||
profileId: lastProfileId,
|
||
agentDir: params.agentDir,
|
||
});
|
||
await markAuthProfileUsed({
|
||
store: authStore,
|
||
profileId: lastProfileId,
|
||
agentDir: params.agentDir,
|
||
});
|
||
}
|
||
return {
|
||
payloads: payloads.length ? payloads : undefined,
|
||
meta: {
|
||
durationMs: Date.now() - started,
|
||
agentMeta,
|
||
aborted,
|
||
systemPromptReport: attempt.systemPromptReport,
|
||
// Handle client tool calls (OpenResponses hosted tools)
|
||
// Propagate the LLM stop reason so callers (lifecycle events,
|
||
// ACP bridge) can distinguish end_turn from max_tokens.
|
||
stopReason: attempt.clientToolCall
|
||
? "tool_calls"
|
||
: (lastAssistant?.stopReason as string | undefined),
|
||
pendingToolCalls: attempt.clientToolCall
|
||
? [
|
||
{
|
||
id: randomBytes(5).toString("hex").slice(0, 9),
|
||
name: attempt.clientToolCall.name,
|
||
arguments: JSON.stringify(attempt.clientToolCall.params),
|
||
},
|
||
]
|
||
: undefined,
|
||
},
|
||
didSendViaMessagingTool: attempt.didSendViaMessagingTool,
|
||
messagingToolSentTexts: attempt.messagingToolSentTexts,
|
||
messagingToolSentMediaUrls: attempt.messagingToolSentMediaUrls,
|
||
messagingToolSentTargets: attempt.messagingToolSentTargets,
|
||
successfulCronAdds: attempt.successfulCronAdds,
|
||
};
|
||
}
|
||
} finally {
|
||
await contextEngine.dispose?.();
|
||
stopCopilotRefreshTimer();
|
||
process.chdir(prevCwd);
|
||
}
|
||
}),
|
||
);
|
||
}
|