fix(status): account cached prompt tokens

2026-04-28 08:52:45 +00:00 · 2025-12-12 23:22:05 +00:00
parent e502ad13f9
commit c3aed2543e
6 changed files with 171 additions and 27 deletions
--- a/src/auto-reply/reply.ts
+++ b/src/auto-reply/reply.ts
@@ -822,12 +822,15 @@ export async function getReplyFromConfig(
          if (entry) {
            const input = usage.input ?? 0;
            const output = usage.output ?? 0;
-            const total = usage.total ?? input + output;
+            const promptTokens =
+              input + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
            sessionEntry = {
              ...entry,
-              inputTokens: (entry.inputTokens ?? 0) + input,
-              outputTokens: (entry.outputTokens ?? 0) + output,
-              totalTokens: (entry.totalTokens ?? 0) + total,
+              inputTokens: input,
+              outputTokens: output,
+              // Track the effective prompt/context size (cached + uncached input).
+              totalTokens:
+                promptTokens > 0 ? promptTokens : (usage.total ?? input),
              model,
              contextTokens: contextTokens ?? entry.contextTokens,
              updatedAt: Date.now(),
--- a/src/auto-reply/status.test.ts
+++ b/src/auto-reply/status.test.ts
@@ -1,5 +1,7 @@
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
 import { afterEach, describe, expect, it, vi } from "vitest";
-
 import { buildStatusMessage } from "./status.js";

 afterEach(() => {
@@ -60,4 +62,55 @@ describe("buildStatusMessage", () => {
    expect(text).toContain("Context:");
    expect(text).toContain("Web: not linked");
  });
+
+  it("prefers cached prompt tokens from the session log", () => {
+    const dir = fs.mkdtempSync(path.join(os.tmpdir(), "clawdis-status-"));
+    const storePath = path.join(dir, "sessions.json");
+    const sessionId = "sess-1";
+    const logPath = path.join(dir, `${sessionId}.jsonl`);
+
+    fs.writeFileSync(
+      logPath,
+      [
+        JSON.stringify({
+          type: "message",
+          message: {
+            role: "assistant",
+            model: "claude-opus-4-5",
+            usage: {
+              input: 1,
+              output: 2,
+              cacheRead: 1000,
+              cacheWrite: 0,
+              totalTokens: 1003,
+            },
+          },
+        }),
+      ].join("\n"),
+      "utf-8",
+    );
+
+    const text = buildStatusMessage({
+      reply: {
+        mode: "command",
+        command: ["echo", "{{Body}}"],
+        agent: { kind: "pi", model: "claude-opus-4-5", contextTokens: 32_000 },
+        session: { scope: "per-sender" },
+      },
+      sessionEntry: {
+        sessionId,
+        updatedAt: 0,
+        totalTokens: 3, // would be wrong if cached prompt tokens exist
+        contextTokens: 32_000,
+      },
+      sessionKey: "main",
+      sessionScope: "per-sender",
+      storePath,
+      webLinked: true,
+    });
+
+    expect(text).toContain("Context: 1.0k/32k");
+
+    fs.rmSync(dir, { recursive: true, force: true });
+  });
 });
--- a/src/auto-reply/status.ts
+++ b/src/auto-reply/status.ts
@@ -5,6 +5,11 @@ import path from "node:path";

 import { lookupContextTokens } from "../agents/context.js";
 import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL } from "../agents/defaults.js";
+import {
+  derivePromptTokens,
+  normalizeUsage,
+  type UsageLike,
+} from "../agents/usage.js";
 import type { ClawdisConfig } from "../config/config.js";
 import type { SessionEntry, SessionScope } from "../config/sessions.js";
 import type { ThinkLevel, VerboseLevel } from "./thinking.js";
@@ -117,6 +122,7 @@ const readUsageFromSessionLog = (
  | {
      input: number;
      output: number;
+      promptTokens: number;
      total: number;
      model?: string;
    }
@@ -144,33 +150,38 @@ const readUsageFromSessionLog = (
    const lines = fs.readFileSync(logPath, "utf-8").split(/\n+/);
    let input = 0;
    let output = 0;
+    let promptTokens = 0;
    let model: string | undefined;
+    let lastUsage: ReturnType<typeof normalizeUsage> | undefined;

    for (const line of lines) {
      if (!line.trim()) continue;
      try {
        const parsed = JSON.parse(line) as {
          message?: {
-            usage?: { input?: number; output?: number; total?: number };
+            usage?: UsageLike;
            model?: string;
          };
-          usage?: { input?: number; output?: number; total?: number };
+          usage?: UsageLike;
          model?: string;
        };
-        const usage = parsed.message?.usage ?? parsed.usage;
-        if (usage) {
-          input += usage.input ?? 0;
-          output += usage.output ?? 0;
-        }
+        const usageRaw = parsed.message?.usage ?? parsed.usage;
+        const usage = normalizeUsage(usageRaw);
+        if (usage) lastUsage = usage;
        model = parsed.message?.model ?? parsed.model ?? model;
      } catch {
        // ignore bad lines
      }
    }

-    const total = input + output;
-    if (total === 0) return undefined;
-    return { input, output, total, model };
+    if (!lastUsage) return undefined;
+    input = lastUsage.input ?? 0;
+    output = lastUsage.output ?? 0;
+    promptTokens =
+      derivePromptTokens(lastUsage) ?? lastUsage.total ?? input + output;
+    const total = lastUsage.total ?? promptTokens + output;
+    if (promptTokens === 0 && total === 0) return undefined;
+    return { input, output, promptTokens, total, model };
  } catch {
    return undefined;
  }
@@ -190,15 +201,17 @@ export function buildStatusMessage(args: StatusArgs): string {
    entry?.totalTokens ??
    (entry?.inputTokens ?? 0) + (entry?.outputTokens ?? 0);

-  // Fallback: derive usage from the session transcript if the store lacks it
-  if (!totalTokens || totalTokens === 0) {
-    const logUsage = readUsageFromSessionLog(entry?.sessionId, args.storePath);
-    if (logUsage) {
-      totalTokens = logUsage.total;
-      if (!model) model = logUsage.model ?? model;
-      if (!contextTokens && logUsage.model) {
-        contextTokens = lookupContextTokens(logUsage.model) ?? contextTokens;
-      }
+  // Prefer prompt-size tokens from the session transcript when it looks larger
+  // (cached prompt tokens are often missing from agent meta/store).
+  const logUsage = readUsageFromSessionLog(entry?.sessionId, args.storePath);
+  if (logUsage) {
+    const candidate = logUsage.promptTokens || logUsage.total;
+    if (!totalTokens || totalTokens === 0 || candidate > totalTokens) {
+      totalTokens = candidate;
+    }
+    if (!model) model = logUsage.model ?? model;
+    if (!contextTokens && logUsage.model) {
+      contextTokens = lookupContextTokens(logUsage.model) ?? contextTokens;
    }
  }
  const agentProbe = probeAgentCommand(args.reply?.command);