diff --git a/CHANGELOG.md b/CHANGELOG.md index 6192fa29f8c..11665fa5b4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Docs: https://docs.openclaw.ai - Docs/Mermaid: remove hardcoded Mermaid init theme blocks from four docs diagrams so dark mode inherits readable theme defaults. (#15157) Thanks @heytulsiprasad. - Outbound/Threading: pass `replyTo` and `threadId` from `message send` tool actions through the core outbound send path to channel adapters, preserving thread/reply routing. (#14948) Thanks @mcaxtr. - Sessions/Agents: pass `agentId` when resolving existing transcript paths in reply runs so non-default agents and heartbeat/chat handlers no longer fail with `Session file path must be within sessions directory`. (#15141) Thanks @Goldenmonstew. +- Status/Sessions: stop clamping derived `totalTokens` to context-window size, keep prompt-token snapshots wired through session accounting, and surface context usage as unknown when fresh snapshot data is missing to avoid false 100% reports. (#15114) Thanks @echoVic. ## 2026.2.12 diff --git a/src/agents/usage.test.ts b/src/agents/usage.test.ts index 02f24c22212..d3ebbe70daf 100644 --- a/src/agents/usage.test.ts +++ b/src/agents/usage.test.ts @@ -47,7 +47,7 @@ describe("normalizeUsage", () => { expect(hasNonzeroUsage({ total: 1 })).toBe(true); }); - it("caps derived session total tokens to the context window", () => { + it("does not clamp derived session total tokens to the context window", () => { expect( deriveSessionTotalTokens({ usage: { @@ -58,7 +58,7 @@ describe("normalizeUsage", () => { }, contextTokens: 200_000, }), - ).toBe(200_000); + ).toBe(2_400_027); }); it("uses prompt tokens when within context window", () => { diff --git a/src/agents/usage.ts b/src/agents/usage.ts index 7e8a4f2ecc9..eaf48d5f1ac 100644 --- a/src/agents/usage.ts +++ b/src/agents/usage.ts @@ -134,9 +134,10 @@ export function deriveSessionTotalTokens(params: { return undefined; } - const contextTokens = params.contextTokens; - if (typeof contextTokens === "number" && Number.isFinite(contextTokens) && contextTokens > 0) { - total = Math.min(total, contextTokens); - } + // NOTE: Do NOT clamp total to contextTokens here. The stored totalTokens + // should reflect the actual token count (or best estimate). Clamping causes + // /status to display contextTokens/contextTokens (100%) when the accumulated + // input exceeds the context window, hiding the real usage. The display layer + // (formatTokens in status.ts) already caps the percentage at 999%. return total; } diff --git a/src/auto-reply/reply/agent-runner.messaging-tools.test.ts b/src/auto-reply/reply/agent-runner.messaging-tools.test.ts index 7cdb9286e5c..d09c970db32 100644 --- a/src/auto-reply/reply/agent-runner.messaging-tools.test.ts +++ b/src/auto-reply/reply/agent-runner.messaging-tools.test.ts @@ -151,7 +151,7 @@ describe("runReplyAgent messaging tool suppression", () => { expect(result).toMatchObject({ text: "hello world!" }); }); - it("persists usage even when replies are suppressed", async () => { + it("persists usage fields even when replies are suppressed", async () => { const storePath = path.join( await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-session-store-")), "sessions.json", @@ -177,7 +177,42 @@ describe("runReplyAgent messaging tool suppression", () => { expect(result).toBeUndefined(); const store = loadSessionStore(storePath, { skipCache: true }); - expect(store[sessionKey]?.totalTokens ?? 0).toBeGreaterThan(0); + expect(store[sessionKey]?.inputTokens).toBe(10); + expect(store[sessionKey]?.outputTokens).toBe(5); + expect(store[sessionKey]?.totalTokens).toBeUndefined(); + expect(store[sessionKey]?.totalTokensFresh).toBe(false); + expect(store[sessionKey]?.model).toBe("claude-opus-4-5"); + }); + + it("persists totalTokens from promptTokens when snapshot is available", async () => { + const storePath = path.join( + await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-session-store-")), + "sessions.json", + ); + const sessionKey = "main"; + const entry: SessionEntry = { sessionId: "session", updatedAt: Date.now() }; + await saveSessionStore(storePath, { [sessionKey]: entry }); + + runEmbeddedPiAgentMock.mockResolvedValueOnce({ + payloads: [{ text: "hello world!" }], + messagingToolSentTexts: ["different message"], + messagingToolSentTargets: [{ tool: "slack", provider: "slack", to: "channel:C1" }], + meta: { + agentMeta: { + usage: { input: 10, output: 5 }, + promptTokens: 42_000, + model: "claude-opus-4-5", + provider: "anthropic", + }, + }, + }); + + const result = await createRun("slack", { storePath, sessionKey }); + + expect(result).toBeUndefined(); + const store = loadSessionStore(storePath, { skipCache: true }); + expect(store[sessionKey]?.totalTokens).toBe(42_000); + expect(store[sessionKey]?.totalTokensFresh).toBe(true); expect(store[sessionKey]?.model).toBe("claude-opus-4-5"); }); }); diff --git a/src/auto-reply/reply/commands-compact.ts b/src/auto-reply/reply/commands-compact.ts index 232c2e7b3b2..00b00e7edea 100644 --- a/src/auto-reply/reply/commands-compact.ts +++ b/src/auto-reply/reply/commands-compact.ts @@ -6,7 +6,11 @@ import { isEmbeddedPiRunActive, waitForEmbeddedPiRunEnd, } from "../../agents/pi-embedded.js"; -import { resolveSessionFilePath, resolveSessionFilePathOptions } from "../../config/sessions.js"; +import { + resolveFreshSessionTotalTokens, + resolveSessionFilePath, + resolveSessionFilePathOptions, +} from "../../config/sessions.js"; import { logVerbose } from "../../globals.js"; import { enqueueSystemEvent } from "../../infra/system-events.js"; import { formatContextUsageShort, formatTokenCount } from "../status.js"; @@ -124,12 +128,9 @@ export const handleCompactCommand: CommandHandler = async (params) => { } // Use the post-compaction token count for context summary if available const tokensAfterCompaction = result.result?.tokensAfter; - const totalTokens = - tokensAfterCompaction ?? - params.sessionEntry.totalTokens ?? - (params.sessionEntry.inputTokens ?? 0) + (params.sessionEntry.outputTokens ?? 0); + const totalTokens = tokensAfterCompaction ?? resolveFreshSessionTotalTokens(params.sessionEntry); const contextSummary = formatContextUsageShort( - totalTokens > 0 ? totalTokens : null, + typeof totalTokens === "number" && totalTokens > 0 ? totalTokens : null, params.contextTokens ?? params.sessionEntry.contextTokens ?? null, ); const reason = result.reason?.trim(); diff --git a/src/auto-reply/reply/memory-flush.test.ts b/src/auto-reply/reply/memory-flush.test.ts index ce3a7929528..e3dcc124e18 100644 --- a/src/auto-reply/reply/memory-flush.test.ts +++ b/src/auto-reply/reply/memory-flush.test.ts @@ -113,6 +113,17 @@ describe("shouldRunMemoryFlush", () => { }), ).toBe(true); }); + + it("ignores stale cached totals", () => { + expect( + shouldRunMemoryFlush({ + entry: { totalTokens: 96_000, totalTokensFresh: false, compactionCount: 1 }, + contextWindowTokens: 100_000, + reserveTokensFloor: 5_000, + softThresholdTokens: 2_000, + }), + ).toBe(false); + }); }); describe("resolveMemoryFlushContextWindowTokens", () => { diff --git a/src/auto-reply/reply/memory-flush.ts b/src/auto-reply/reply/memory-flush.ts index b291111ca73..8ff6f1b1b6f 100644 --- a/src/auto-reply/reply/memory-flush.ts +++ b/src/auto-reply/reply/memory-flush.ts @@ -1,8 +1,8 @@ import type { OpenClawConfig } from "../../config/config.js"; -import type { SessionEntry } from "../../config/sessions.js"; import { lookupContextTokens } from "../../agents/context.js"; import { DEFAULT_CONTEXT_TOKENS } from "../../agents/defaults.js"; import { DEFAULT_PI_COMPACTION_RESERVE_TOKENS_FLOOR } from "../../agents/pi-settings.js"; +import { resolveFreshSessionTotalTokens, type SessionEntry } from "../../config/sessions.js"; import { SILENT_REPLY_TOKEN } from "../tokens.js"; export const DEFAULT_MEMORY_FLUSH_SOFT_TOKENS = 4000; @@ -76,12 +76,15 @@ export function resolveMemoryFlushContextWindowTokens(params: { } export function shouldRunMemoryFlush(params: { - entry?: Pick; + entry?: Pick< + SessionEntry, + "totalTokens" | "totalTokensFresh" | "compactionCount" | "memoryFlushCompactionCount" + >; contextWindowTokens: number; reserveTokensFloor: number; softThresholdTokens: number; }): boolean { - const totalTokens = params.entry?.totalTokens; + const totalTokens = resolveFreshSessionTotalTokens(params.entry); if (!totalTokens || totalTokens <= 0) { return false; } diff --git a/src/auto-reply/reply/session-run-accounting.ts b/src/auto-reply/reply/session-run-accounting.ts index 4316a6573ed..d1d17ad93dd 100644 --- a/src/auto-reply/reply/session-run-accounting.ts +++ b/src/auto-reply/reply/session-run-accounting.ts @@ -18,6 +18,7 @@ export async function persistRunSessionUsage(params: PersistRunSessionUsageParam sessionKey: params.sessionKey, usage: params.usage, lastCallUsage: params.lastCallUsage, + promptTokens: params.promptTokens, modelUsed: params.modelUsed, providerUsed: params.providerUsed, contextTokensUsed: params.contextTokensUsed, diff --git a/src/auto-reply/reply/session-updates.ts b/src/auto-reply/reply/session-updates.ts index 556ac9bbdde..45556950ee8 100644 --- a/src/auto-reply/reply/session-updates.ts +++ b/src/auto-reply/reply/session-updates.ts @@ -255,6 +255,7 @@ export async function incrementCompactionCount(params: { // If tokensAfter is provided, update the cached token counts to reflect post-compaction state if (tokensAfter != null && tokensAfter > 0) { updates.totalTokens = tokensAfter; + updates.totalTokensFresh = true; // Clear input/output breakdown since we only have the total estimate after compaction updates.inputTokens = undefined; updates.outputTokens = undefined; diff --git a/src/auto-reply/reply/session-usage.test.ts b/src/auto-reply/reply/session-usage.test.ts index d592cad21ef..ab44c53ed29 100644 --- a/src/auto-reply/reply/session-usage.test.ts +++ b/src/auto-reply/reply/session-usage.test.ts @@ -44,12 +44,13 @@ describe("persistSessionUsageUpdate", () => { const stored = JSON.parse(await fs.readFile(storePath, "utf-8")); // totalTokens should reflect lastCallUsage (12_000 input), not accumulated (180_000) expect(stored[sessionKey].totalTokens).toBe(12_000); + expect(stored[sessionKey].totalTokensFresh).toBe(true); // inputTokens/outputTokens still reflect accumulated usage for cost tracking expect(stored[sessionKey].inputTokens).toBe(180_000); expect(stored[sessionKey].outputTokens).toBe(10_000); }); - it("falls back to accumulated usage for totalTokens when lastCallUsage not provided", async () => { + it("marks totalTokens as unknown when no fresh context snapshot is available", async () => { const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-usage-")); const storePath = path.join(tmp, "sessions.json"); const sessionKey = "main"; @@ -67,10 +68,34 @@ describe("persistSessionUsageUpdate", () => { }); const stored = JSON.parse(await fs.readFile(storePath, "utf-8")); - expect(stored[sessionKey].totalTokens).toBe(50_000); + expect(stored[sessionKey].totalTokens).toBeUndefined(); + expect(stored[sessionKey].totalTokensFresh).toBe(false); }); - it("caps totalTokens at context window even with lastCallUsage", async () => { + it("uses promptTokens when available without lastCallUsage", async () => { + const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-usage-")); + const storePath = path.join(tmp, "sessions.json"); + const sessionKey = "main"; + await seedSessionStore({ + storePath, + sessionKey, + entry: { sessionId: "s1", updatedAt: Date.now() }, + }); + + await persistSessionUsageUpdate({ + storePath, + sessionKey, + usage: { input: 50_000, output: 5_000, total: 55_000 }, + promptTokens: 42_000, + contextTokensUsed: 200_000, + }); + + const stored = JSON.parse(await fs.readFile(storePath, "utf-8")); + expect(stored[sessionKey].totalTokens).toBe(42_000); + expect(stored[sessionKey].totalTokensFresh).toBe(true); + }); + + it("keeps non-clamped lastCallUsage totalTokens when exceeding context window", async () => { const tmp = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-usage-")); const storePath = path.join(tmp, "sessions.json"); const sessionKey = "main"; @@ -89,7 +114,7 @@ describe("persistSessionUsageUpdate", () => { }); const stored = JSON.parse(await fs.readFile(storePath, "utf-8")); - // Capped at context window - expect(stored[sessionKey].totalTokens).toBe(200_000); + expect(stored[sessionKey].totalTokens).toBe(250_000); + expect(stored[sessionKey].totalTokensFresh).toBe(true); }); }); diff --git a/src/auto-reply/reply/session-usage.ts b/src/auto-reply/reply/session-usage.ts index d5408870e37..3d4a1c40531 100644 --- a/src/auto-reply/reply/session-usage.ts +++ b/src/auto-reply/reply/session-usage.ts @@ -45,20 +45,29 @@ export async function persistSessionUsageUpdate(params: { const input = params.usage?.input ?? 0; const output = params.usage?.output ?? 0; const resolvedContextTokens = params.contextTokensUsed ?? entry.contextTokens; + const hasPromptTokens = + typeof params.promptTokens === "number" && + Number.isFinite(params.promptTokens) && + params.promptTokens > 0; + const hasFreshContextSnapshot = Boolean(params.lastCallUsage) || hasPromptTokens; // Use last-call usage for totalTokens when available. The accumulated // `usage.input` sums input tokens from every API call in the run // (tool-use loops, compaction retries), overstating actual context. // `lastCallUsage` reflects only the final API call — the true context. const usageForContext = params.lastCallUsage ?? params.usage; - const patch: Partial = { - inputTokens: input, - outputTokens: output, - totalTokens: - deriveSessionTotalTokens({ + const totalTokens = hasFreshContextSnapshot + ? deriveSessionTotalTokens({ usage: usageForContext, contextTokens: resolvedContextTokens, promptTokens: params.promptTokens, - }) ?? input, + }) + : undefined; + const patch: Partial = { + inputTokens: input, + outputTokens: output, + // Missing a last-call snapshot means context utilization is stale/unknown. + totalTokens, + totalTokensFresh: typeof totalTokens === "number", modelProvider: params.providerUsed ?? entry.modelProvider, model: params.modelUsed ?? entry.model, contextTokens: resolvedContextTokens, diff --git a/src/auto-reply/reply/session.ts b/src/auto-reply/reply/session.ts index 04481d89d2f..1c5fcbe17c3 100644 --- a/src/auto-reply/reply/session.ts +++ b/src/auto-reply/reply/session.ts @@ -358,6 +358,7 @@ export async function initSessionState(params: { // Clear stale token metrics from previous session so /status doesn't // display the old session's context usage after /new or /reset. sessionEntry.totalTokens = undefined; + sessionEntry.totalTokensFresh = false; sessionEntry.inputTokens = undefined; sessionEntry.outputTokens = undefined; sessionEntry.contextTokens = undefined; diff --git a/src/auto-reply/status.test.ts b/src/auto-reply/status.test.ts index 5298c90e883..90746c7752f 100644 --- a/src/auto-reply/status.test.ts +++ b/src/auto-reply/status.test.ts @@ -258,6 +258,25 @@ describe("buildStatusMessage", () => { expect(normalized).toContain("Queue: collect"); }); + it("treats stale cached totals as unknown context usage", () => { + const text = buildStatusMessage({ + agent: { model: "anthropic/claude-opus-4-5", contextTokens: 32_000 }, + sessionEntry: { + sessionId: "stale-1", + updatedAt: 0, + totalTokens: 12_345, + totalTokensFresh: false, + contextTokens: 32_000, + }, + sessionKey: "agent:main:main", + sessionScope: "per-sender", + queue: { mode: "collect", depth: 0 }, + modelAuth: "api-key", + }); + + expect(normalizeTestText(text)).toContain("Context: ?/32k"); + }); + it("includes group activation for group sessions", () => { const text = buildStatusMessage({ agent: {}, diff --git a/src/auto-reply/status.ts b/src/auto-reply/status.ts index 858411e5e2f..ab266f5ae05 100644 --- a/src/auto-reply/status.ts +++ b/src/auto-reply/status.ts @@ -12,6 +12,7 @@ import { resolveSandboxRuntimeStatus } from "../agents/sandbox.js"; import { derivePromptTokens, normalizeUsage, type UsageLike } from "../agents/usage.js"; import { resolveMainSessionKey, + resolveFreshSessionTotalTokens, resolveSessionFilePath, resolveSessionFilePathOptions, type SessionEntry, @@ -343,7 +344,7 @@ export function buildStatusMessage(args: StatusArgs): string { let inputTokens = entry?.inputTokens; let outputTokens = entry?.outputTokens; - let totalTokens = entry?.totalTokens ?? (entry?.inputTokens ?? 0) + (entry?.outputTokens ?? 0); + let totalTokens = resolveFreshSessionTotalTokens(entry); // Prefer prompt-size tokens from the session transcript when it looks larger // (cached prompt tokens are often missing from agent meta/store). diff --git a/src/commands/agent/session-store.ts b/src/commands/agent/session-store.ts index 48657bba197..4b4a27fd6a2 100644 --- a/src/commands/agent/session-store.ts +++ b/src/commands/agent/session-store.ts @@ -66,14 +66,16 @@ export async function updateSessionStoreAfterAgentRun(params: { if (hasNonzeroUsage(usage)) { const input = usage.input ?? 0; const output = usage.output ?? 0; - next.inputTokens = input; - next.outputTokens = output; - next.totalTokens = + const totalTokens = deriveSessionTotalTokens({ usage, contextTokens, promptTokens, }) ?? input; + next.inputTokens = input; + next.outputTokens = output; + next.totalTokens = totalTokens; + next.totalTokensFresh = true; } if (compactionsThisRun > 0) { next.compactionCount = (entry.compactionCount ?? 0) + compactionsThisRun; diff --git a/src/commands/sessions.test.ts b/src/commands/sessions.test.ts index 4d181d0d6a9..61f89889022 100644 --- a/src/commands/sessions.test.ts +++ b/src/commands/sessions.test.ts @@ -66,6 +66,8 @@ describe("sessionsCommand", () => { updatedAt: Date.now() - 45 * 60_000, inputTokens: 1200, outputTokens: 800, + totalTokens: 2000, + totalTokensFresh: true, model: "pi:opus", }, }); @@ -99,8 +101,48 @@ describe("sessionsCommand", () => { fs.rmSync(store); const row = logs.find((line) => line.includes("discord:group:demo")) ?? ""; - expect(row).toContain("-".padEnd(20)); + expect(row).toContain("unknown/32k (?%)"); expect(row).toContain("think:high"); expect(row).toContain("5m ago"); }); + + it("exports freshness metadata in JSON output", async () => { + const store = writeStore({ + main: { + sessionId: "abc123", + updatedAt: Date.now() - 10 * 60_000, + inputTokens: 1200, + outputTokens: 800, + totalTokens: 2000, + totalTokensFresh: true, + model: "pi:opus", + }, + "discord:group:demo": { + sessionId: "xyz", + updatedAt: Date.now() - 5 * 60_000, + inputTokens: 20, + outputTokens: 10, + model: "pi:opus", + }, + }); + + const { runtime, logs } = makeRuntime(); + await sessionsCommand({ store, json: true }, runtime); + + fs.rmSync(store); + + const payload = JSON.parse(logs[0] ?? "{}") as { + sessions?: Array<{ + key: string; + totalTokens: number | null; + totalTokensFresh: boolean; + }>; + }; + const main = payload.sessions?.find((row) => row.key === "main"); + const group = payload.sessions?.find((row) => row.key === "discord:group:demo"); + expect(main?.totalTokens).toBe(2000); + expect(main?.totalTokensFresh).toBe(true); + expect(group?.totalTokens).toBeNull(); + expect(group?.totalTokensFresh).toBe(false); + }); }); diff --git a/src/commands/sessions.ts b/src/commands/sessions.ts index 849fecb7592..deb22a3814c 100644 --- a/src/commands/sessions.ts +++ b/src/commands/sessions.ts @@ -3,7 +3,12 @@ import { lookupContextTokens } from "../agents/context.js"; import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js"; import { resolveConfiguredModelRef } from "../agents/model-selection.js"; import { loadConfig } from "../config/config.js"; -import { loadSessionStore, resolveStorePath, type SessionEntry } from "../config/sessions.js"; +import { + loadSessionStore, + resolveFreshSessionTotalTokens, + resolveStorePath, + type SessionEntry, +} from "../config/sessions.js"; import { info } from "../globals.js"; import { formatTimeAgo } from "../infra/format-time/format-relative.ts"; import { isRich, theme } from "../terminal/theme.js"; @@ -25,6 +30,7 @@ type SessionRow = { inputTokens?: number; outputTokens?: number; totalTokens?: number; + totalTokensFresh?: boolean; model?: string; contextTokens?: number; }; @@ -61,9 +67,15 @@ const colorByPct = (label: string, pct: number | null, rich: boolean) => { return theme.muted(label); }; -const formatTokensCell = (total: number, contextTokens: number | null, rich: boolean) => { - if (!total) { - return "-".padEnd(TOKENS_PAD); +const formatTokensCell = ( + total: number | undefined, + contextTokens: number | null, + rich: boolean, +) => { + if (total === undefined) { + const ctxLabel = contextTokens ? formatKTokens(contextTokens) : "?"; + const label = `unknown/${ctxLabel} (?%)`; + return rich ? theme.muted(label.padEnd(TOKENS_PAD)) : label.padEnd(TOKENS_PAD); } const totalLabel = formatKTokens(total); const ctxLabel = contextTokens ? formatKTokens(contextTokens) : "?"; @@ -154,6 +166,7 @@ function toRows(store: Record): SessionRow[] { inputTokens: entry?.inputTokens, outputTokens: entry?.outputTokens, totalTokens: entry?.totalTokens, + totalTokensFresh: entry?.totalTokensFresh, model: entry?.model, contextTokens: entry?.contextTokens, } satisfies SessionRow; @@ -209,6 +222,9 @@ export async function sessionsCommand( activeMinutes: activeMinutes ?? null, sessions: rows.map((r) => ({ ...r, + totalTokens: resolveFreshSessionTotalTokens(r) ?? null, + totalTokensFresh: + typeof r.totalTokens === "number" ? r.totalTokensFresh !== false : false, contextTokens: r.contextTokens ?? lookupContextTokens(r.model) ?? configContextTokens ?? null, model: r.model ?? configModel ?? null, @@ -246,9 +262,7 @@ export async function sessionsCommand( for (const row of rows) { const model = row.model ?? configModel; const contextTokens = row.contextTokens ?? lookupContextTokens(model) ?? configContextTokens; - const input = row.inputTokens ?? 0; - const output = row.outputTokens ?? 0; - const total = row.totalTokens ?? input + output; + const total = resolveFreshSessionTotalTokens(row); const keyLabel = truncateKey(row.key).padEnd(KEY_PAD); const keyCell = rich ? theme.accent(keyLabel) : keyLabel; diff --git a/src/commands/status.format.ts b/src/commands/status.format.ts index 9c4a7a59b22..bab43209386 100644 --- a/src/commands/status.format.ts +++ b/src/commands/status.format.ts @@ -22,8 +22,11 @@ export const shortenText = (value: string, maxLen: number) => { export const formatTokensCompact = ( sess: Pick, ) => { - const used = sess.totalTokens ?? 0; + const used = sess.totalTokens; const ctx = sess.contextTokens; + if (used == null) { + return ctx ? `unknown/${formatKTokens(ctx)} (?%)` : "unknown used"; + } if (!ctx) { return `${formatKTokens(used)} used`; } diff --git a/src/commands/status.summary.ts b/src/commands/status.summary.ts index 2e865d654a4..3c74e1a7b5d 100644 --- a/src/commands/status.summary.ts +++ b/src/commands/status.summary.ts @@ -5,6 +5,7 @@ import { resolveConfiguredModelRef } from "../agents/model-selection.js"; import { loadConfig } from "../config/config.js"; import { loadSessionStore, + resolveFreshSessionTotalTokens, resolveMainSessionKey, resolveStorePath, type SessionEntry, @@ -120,12 +121,13 @@ export async function getStatusSummary(): Promise { const model = entry?.model ?? configModel ?? null; const contextTokens = entry?.contextTokens ?? lookupContextTokens(model) ?? configContextTokens ?? null; - const input = entry?.inputTokens ?? 0; - const output = entry?.outputTokens ?? 0; - const total = entry?.totalTokens ?? input + output; - const remaining = contextTokens != null ? Math.max(0, contextTokens - total) : null; + const total = resolveFreshSessionTotalTokens(entry); + const totalTokensFresh = + typeof entry?.totalTokens === "number" ? entry?.totalTokensFresh !== false : false; + const remaining = + contextTokens != null && total !== undefined ? Math.max(0, contextTokens - total) : null; const pct = - contextTokens && contextTokens > 0 + contextTokens && contextTokens > 0 && total !== undefined ? Math.min(999, Math.round((total / contextTokens) * 100)) : null; const parsedAgentId = parseAgentSessionKey(key)?.agentId; @@ -147,6 +149,7 @@ export async function getStatusSummary(): Promise { inputTokens: entry?.inputTokens, outputTokens: entry?.outputTokens, totalTokens: total ?? null, + totalTokensFresh, remainingTokens: remaining, percentUsed: pct, model, diff --git a/src/commands/status.test.ts b/src/commands/status.test.ts index 0641f7eedbb..d5a8dcb0944 100644 --- a/src/commands/status.test.ts +++ b/src/commands/status.test.ts @@ -23,6 +23,7 @@ const mocks = vi.hoisted(() => ({ thinkingLevel: "low", inputTokens: 2_000, outputTokens: 3_000, + totalTokens: 5_000, contextTokens: 10_000, model: "pi:opus", sessionId: "abc123", @@ -120,6 +121,12 @@ vi.mock("../config/sessions.js", () => ({ loadSessionStore: mocks.loadSessionStore, resolveMainSessionKey: mocks.resolveMainSessionKey, resolveStorePath: mocks.resolveStorePath, + resolveFreshSessionTotalTokens: vi.fn( + (entry?: { totalTokens?: number; totalTokensFresh?: boolean }) => + typeof entry?.totalTokens === "number" && entry?.totalTokensFresh !== false + ? entry.totalTokens + : undefined, + ), readSessionUpdatedAt: vi.fn(() => undefined), recordSessionMetaFromInbound: vi.fn().mockResolvedValue(undefined), })); @@ -303,6 +310,7 @@ describe("statusCommand", () => { expect(payload.sessions.defaults.model).toBeTruthy(); expect(payload.sessions.defaults.contextTokens).toBeGreaterThan(0); expect(payload.sessions.recent[0].percentUsed).toBe(50); + expect(payload.sessions.recent[0].totalTokensFresh).toBe(true); expect(payload.sessions.recent[0].remainingTokens).toBe(5000); expect(payload.sessions.recent[0].flags).toContain("verbose:on"); expect(payload.securityAudit.summary.critical).toBe(1); @@ -311,6 +319,55 @@ describe("statusCommand", () => { expect(payload.nodeService.label).toBe("LaunchAgent"); }); + it("surfaces unknown usage when totalTokens is missing", async () => { + const originalLoadSessionStore = mocks.loadSessionStore.getMockImplementation(); + mocks.loadSessionStore.mockReturnValue({ + "+1000": { + updatedAt: Date.now() - 60_000, + inputTokens: 2_000, + outputTokens: 3_000, + contextTokens: 10_000, + model: "pi:opus", + }, + }); + + (runtime.log as vi.Mock).mockClear(); + await statusCommand({ json: true }, runtime as never); + const payload = JSON.parse((runtime.log as vi.Mock).mock.calls.at(-1)?.[0]); + expect(payload.sessions.recent[0].totalTokens).toBeNull(); + expect(payload.sessions.recent[0].totalTokensFresh).toBe(false); + expect(payload.sessions.recent[0].percentUsed).toBeNull(); + expect(payload.sessions.recent[0].remainingTokens).toBeNull(); + + if (originalLoadSessionStore) { + mocks.loadSessionStore.mockImplementation(originalLoadSessionStore); + } + }); + + it("prints unknown usage in formatted output when totalTokens is missing", async () => { + const originalLoadSessionStore = mocks.loadSessionStore.getMockImplementation(); + mocks.loadSessionStore.mockReturnValue({ + "+1000": { + updatedAt: Date.now() - 60_000, + inputTokens: 2_000, + outputTokens: 3_000, + contextTokens: 10_000, + model: "pi:opus", + }, + }); + + try { + (runtime.log as vi.Mock).mockClear(); + await statusCommand({}, runtime as never); + const logs = (runtime.log as vi.Mock).mock.calls.map((c) => String(c[0])); + expect(logs.some((line) => line.includes("unknown/") && line.includes("(?%)"))).toBe(true); + } finally { + if (originalLoadSessionStore) { + mocks.loadSessionStore.mockImplementation(originalLoadSessionStore); + } + } + }); + it("prints formatted lines otherwise", async () => { (runtime.log as vi.Mock).mockClear(); await statusCommand({}, runtime as never); @@ -439,6 +496,7 @@ describe("statusCommand", () => { updatedAt: Date.now() - 120_000, inputTokens: 1_000, outputTokens: 1_000, + totalTokens: 2_000, contextTokens: 10_000, model: "pi:opus", }, @@ -451,6 +509,7 @@ describe("statusCommand", () => { thinkingLevel: "low", inputTokens: 2_000, outputTokens: 3_000, + totalTokens: 5_000, contextTokens: 10_000, model: "pi:opus", sessionId: "abc123", diff --git a/src/commands/status.types.ts b/src/commands/status.types.ts index dba1e19e845..af8d8941e0c 100644 --- a/src/commands/status.types.ts +++ b/src/commands/status.types.ts @@ -16,6 +16,7 @@ export type SessionStatus = { inputTokens?: number; outputTokens?: number; totalTokens: number | null; + totalTokensFresh: boolean; remainingTokens: number | null; percentUsed: number | null; model: string | null; diff --git a/src/config/sessions/types.ts b/src/config/sessions/types.ts index fdd77233cc2..0eabe9334c8 100644 --- a/src/config/sessions/types.ts +++ b/src/config/sessions/types.ts @@ -70,6 +70,12 @@ export type SessionEntry = { inputTokens?: number; outputTokens?: number; totalTokens?: number; + /** + * Whether totalTokens reflects a fresh context snapshot for the latest run. + * Undefined means legacy/unknown freshness; false forces consumers to treat + * totalTokens as stale/unknown for context-utilization displays. + */ + totalTokensFresh?: boolean; modelProvider?: string; model?: string; contextTokens?: number; @@ -107,6 +113,25 @@ export function mergeSessionEntry( return { ...existing, ...patch, sessionId, updatedAt }; } +export function resolveFreshSessionTotalTokens( + entry?: Pick | null, +): number | undefined { + const total = entry?.totalTokens; + if (typeof total !== "number" || !Number.isFinite(total) || total < 0) { + return undefined; + } + if (entry?.totalTokensFresh === false) { + return undefined; + } + return total; +} + +export function isSessionTotalTokensFresh( + entry?: Pick | null, +): boolean { + return resolveFreshSessionTotalTokens(entry) !== undefined; +} + export type GroupKeyResolution = { key: string; channel?: string; diff --git a/src/cron/isolated-agent/run.ts b/src/cron/isolated-agent/run.ts index 9029ae29f64..a329ef0e88e 100644 --- a/src/cron/isolated-agent/run.ts +++ b/src/cron/isolated-agent/run.ts @@ -474,14 +474,16 @@ export async function runCronIsolatedAgentTurn(params: { if (hasNonzeroUsage(usage)) { const input = usage.input ?? 0; const output = usage.output ?? 0; - cronSession.sessionEntry.inputTokens = input; - cronSession.sessionEntry.outputTokens = output; - cronSession.sessionEntry.totalTokens = + const totalTokens = deriveSessionTotalTokens({ usage, contextTokens, promptTokens, }) ?? input; + cronSession.sessionEntry.inputTokens = input; + cronSession.sessionEntry.outputTokens = output; + cronSession.sessionEntry.totalTokens = totalTokens; + cronSession.sessionEntry.totalTokensFresh = true; } await persistSessionEntry(); } diff --git a/src/gateway/server-methods/sessions.ts b/src/gateway/server-methods/sessions.ts index b62a952d75a..5c3c4fe30ff 100644 --- a/src/gateway/server-methods/sessions.ts +++ b/src/gateway/server-methods/sessions.ts @@ -264,6 +264,7 @@ export const sessionsHandlers: GatewayRequestHandlers = { inputTokens: 0, outputTokens: 0, totalTokens: 0, + totalTokensFresh: true, }; store[primaryKey] = nextEntry; return nextEntry; @@ -464,6 +465,7 @@ export const sessionsHandlers: GatewayRequestHandlers = { delete entryToUpdate.inputTokens; delete entryToUpdate.outputTokens; delete entryToUpdate.totalTokens; + delete entryToUpdate.totalTokensFresh; entryToUpdate.updatedAt = Date.now(); }); diff --git a/src/gateway/server.sessions.gateway-server-sessions-a.e2e.test.ts b/src/gateway/server.sessions.gateway-server-sessions-a.e2e.test.ts index 90cd4dcc517..aad712f8c06 100644 --- a/src/gateway/server.sessions.gateway-server-sessions-a.e2e.test.ts +++ b/src/gateway/server.sessions.gateway-server-sessions-a.e2e.test.ts @@ -157,6 +157,7 @@ describe("gateway server sessions", () => { sessions: Array<{ key: string; totalTokens?: number; + totalTokensFresh?: boolean; thinkingLevel?: string; verboseLevel?: string; lastAccountId?: string; @@ -169,7 +170,8 @@ describe("gateway server sessions", () => { expect(list1.payload?.sessions.some((s) => s.key === "global")).toBe(false); expect(list1.payload?.defaults?.modelProvider).toBe(DEFAULT_PROVIDER); const main = list1.payload?.sessions.find((s) => s.key === "agent:main:main"); - expect(main?.totalTokens).toBe(30); + expect(main?.totalTokens).toBeUndefined(); + expect(main?.totalTokensFresh).toBe(false); expect(main?.thinkingLevel).toBe("low"); expect(main?.verboseLevel).toBe("on"); expect(main?.lastAccountId).toBe("work"); diff --git a/src/gateway/session-utils.test.ts b/src/gateway/session-utils.test.ts index 2fb51153d49..db1d0928f9e 100644 --- a/src/gateway/session-utils.test.ts +++ b/src/gateway/session-utils.test.ts @@ -356,4 +356,45 @@ describe("listSessionsFromStore search", () => { expect(result.sessions.map((session) => session.key)).toEqual(["agent:main:cron:job-1"]); }); + + test("exposes unknown totals when freshness is stale or missing", () => { + const now = Date.now(); + const store: Record = { + "agent:main:fresh": { + sessionId: "sess-fresh", + updatedAt: now, + totalTokens: 1200, + totalTokensFresh: true, + } as SessionEntry, + "agent:main:stale": { + sessionId: "sess-stale", + updatedAt: now - 1000, + totalTokens: 2200, + totalTokensFresh: false, + } as SessionEntry, + "agent:main:missing": { + sessionId: "sess-missing", + updatedAt: now - 2000, + inputTokens: 100, + outputTokens: 200, + } as SessionEntry, + }; + + const result = listSessionsFromStore({ + cfg: baseCfg, + storePath: "/tmp/sessions.json", + store, + opts: {}, + }); + + const fresh = result.sessions.find((row) => row.key === "agent:main:fresh"); + const stale = result.sessions.find((row) => row.key === "agent:main:stale"); + const missing = result.sessions.find((row) => row.key === "agent:main:missing"); + expect(fresh?.totalTokens).toBe(1200); + expect(fresh?.totalTokensFresh).toBe(true); + expect(stale?.totalTokens).toBeUndefined(); + expect(stale?.totalTokensFresh).toBe(false); + expect(missing?.totalTokens).toBeUndefined(); + expect(missing?.totalTokensFresh).toBe(false); + }); }); diff --git a/src/gateway/session-utils.ts b/src/gateway/session-utils.ts index f2bd97874e0..16299c6a11f 100644 --- a/src/gateway/session-utils.ts +++ b/src/gateway/session-utils.ts @@ -19,6 +19,7 @@ import { buildGroupDisplayName, canonicalizeMainSessionAlias, loadSessionStore, + resolveFreshSessionTotalTokens, resolveMainSessionKey, resolveStorePath, type SessionEntry, @@ -607,9 +608,9 @@ export function listSessionsFromStore(params: { }) .map(([key, entry]) => { const updatedAt = entry?.updatedAt ?? null; - const input = entry?.inputTokens ?? 0; - const output = entry?.outputTokens ?? 0; - const total = entry?.totalTokens ?? input + output; + const total = resolveFreshSessionTotalTokens(entry); + const totalTokensFresh = + typeof entry?.totalTokens === "number" ? entry?.totalTokensFresh !== false : false; const parsed = parseGroupKey(key); const channel = entry?.channel ?? parsed?.channel; const subject = entry?.subject; @@ -662,6 +663,7 @@ export function listSessionsFromStore(params: { inputTokens: entry?.inputTokens, outputTokens: entry?.outputTokens, totalTokens: total, + totalTokensFresh, responseUsage: entry?.responseUsage, modelProvider, model, diff --git a/src/gateway/session-utils.types.ts b/src/gateway/session-utils.types.ts index a7939bd1e5b..233a3d7c782 100644 --- a/src/gateway/session-utils.types.ts +++ b/src/gateway/session-utils.types.ts @@ -33,6 +33,7 @@ export type GatewaySessionRow = { inputTokens?: number; outputTokens?: number; totalTokens?: number; + totalTokensFresh?: boolean; responseUsage?: "on" | "off" | "tokens" | "full"; modelProvider?: string; model?: string;