From f85f8df64ea08e1206c5fe280ea7a5033bf7f4d1 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 9 May 2026 05:46:26 +0100 Subject: [PATCH] refactor: remove transcript file mapping table --- docs/refactor/database-first.md | 18 +++++++------- docs/refactor/piless.md | 1 - .../session-management-compaction.md | 6 ++--- src/auto-reply/reply/agent-runner-memory.ts | 4 ++-- .../sessions/transcript-store.sqlite.test.ts | 23 +----------------- .../sessions/transcript-store.sqlite.ts | 20 ++++++++++------ src/config/sessions/transcript.test.ts | 10 +------- src/infra/session-cost-usage.test.ts | 24 ++++++++++++++++--- src/infra/session-cost-usage.ts | 4 ++-- src/state/openclaw-state-db.generated.d.ts | 9 ------- src/state/openclaw-state-schema.generated.ts | 16 ------------- src/state/openclaw-state-schema.sql | 16 ------------- 12 files changed, 52 insertions(+), 99 deletions(-) diff --git a/docs/refactor/database-first.md b/docs/refactor/database-first.md index 767a9b940a1..7f9db53a6ad 100644 --- a/docs/refactor/database-first.md +++ b/docs/refactor/database-first.md @@ -103,16 +103,16 @@ The branch already has a real shared SQLite base: file-to-database import remains in doctor code, and branch-local database upgrade helpers have been deleted. - Relational ownership is enforced where the ownership boundary is canonical: - transcript-file mappings cascade from `agent_databases`, source migration - rows cascade from `migration_runs`, task delivery state cascades from - `task_runs`, and transcript identity rows cascade from transcript events. + source migration rows cascade from `migration_runs`, task delivery state + cascades from `task_runs`, and transcript identity rows cascade from + transcript events. - Current shared tables include `kv`, `agents`, `agent_databases`, - `plugin_state_entries`, `plugin_blob_entries`, `transcript_files`, - `capture_sessions`, `capture_events`, `capture_blobs`, - `sandbox_registry_entries`, `cron_run_logs`, `cron_jobs`, `commitments`, - `delivery_queue_entries`, `current_conversation_bindings`, - `tui_last_sessions`, `task_runs`, `task_delivery_state`, `flow_runs`, - `subagent_runs`, `migration_runs`, and `backup_runs`. + `plugin_state_entries`, `plugin_blob_entries`, `capture_sessions`, + `capture_events`, `capture_blobs`, `sandbox_registry_entries`, + `cron_run_logs`, `cron_jobs`, `commitments`, `delivery_queue_entries`, + `current_conversation_bindings`, `tui_last_sessions`, `task_runs`, + `task_delivery_state`, `flow_runs`, `subagent_runs`, `migration_runs`, and + `backup_runs`. - `src/state/openclaw-agent-db.ts` opens `agents//agent/openclaw-agent.sqlite`, registers the database in the global DB, and owns agent-local session, transcript, VFS, artifact, and cache diff --git a/docs/refactor/piless.md b/docs/refactor/piless.md index 8d9482359db..9327a9e697f 100644 --- a/docs/refactor/piless.md +++ b/docs/refactor/piless.md @@ -364,7 +364,6 @@ kv(scope, key, value_json, updated_at) agents(agent_id, config_json, created_at, updated_at) session_entries(agent_id, session_key, entry_json, updated_at) transcript_events(agent_id, session_id, seq, event_json, created_at) -transcript_files(agent_id, session_id, path, imported_at, exported_at) vfs_entries(agent_id, namespace, path, kind, content_blob, metadata_json, updated_at) tool_artifacts(agent_id, run_id, artifact_id, kind, metadata_json, blob, created_at) ``` diff --git a/docs/reference/session-management-compaction.md b/docs/reference/session-management-compaction.md index b140ccb5651..e659e0b84a4 100644 --- a/docs/reference/session-management-compaction.md +++ b/docs/reference/session-management-compaction.md @@ -83,9 +83,9 @@ Per agent, on the Gateway host: sources after durable verification. Gateway startup leaves legacy indexes alone. - Transcripts: runtime transcript events live in the per-agent database - (`transcript_events` and `transcript_event_identities`). The global - `transcript_files` table maps legacy/export/debug path-shaped locators to - `{ agentId, sessionId }`; JSONL files are not runtime sidecars. + (`transcript_events` and `transcript_event_identities`). Session file values + are canonical `sqlite-transcript:///.jsonl` locators; + JSONL files are doctor migration inputs, not runtime sidecars. - Telegram topic handles: `.../-topic-.jsonl` OpenClaw resolves these via `src/config/sessions/*`. diff --git a/src/auto-reply/reply/agent-runner-memory.ts b/src/auto-reply/reply/agent-runner-memory.ts index 02b9eae89de..18a7be93b44 100644 --- a/src/auto-reply/reply/agent-runner-memory.ts +++ b/src/auto-reply/reply/agent-runner-memory.ts @@ -18,7 +18,7 @@ import { type SessionEntry, } from "../../config/sessions.js"; import { - listSqliteSessionTranscriptFiles, + listSqliteSessionTranscriptLocators, loadSqliteSessionTranscriptEvents, resolveSqliteSessionTranscriptScope, } from "../../config/sessions/transcript-store.sqlite.js"; @@ -233,7 +233,7 @@ function resolveSqliteSessionTranscriptPath(params: { return undefined; } const agentId = params.sessionKey ? resolveAgentIdFromSessionKey(params.sessionKey) : undefined; - const candidates = listSqliteSessionTranscriptFiles().filter( + const candidates = listSqliteSessionTranscriptLocators().filter( (entry) => (!agentId || entry.agentId === agentId) && entry.sessionId === sessionId, ); if (candidates.length === 0) { diff --git a/src/config/sessions/transcript-store.sqlite.test.ts b/src/config/sessions/transcript-store.sqlite.test.ts index 10b05fe10ae..79aa1df4989 100644 --- a/src/config/sessions/transcript-store.sqlite.test.ts +++ b/src/config/sessions/transcript-store.sqlite.test.ts @@ -6,10 +6,7 @@ import { closeOpenClawAgentDatabasesForTest, openOpenClawAgentDatabase, } from "../../state/openclaw-agent-db.js"; -import { - closeOpenClawStateDatabaseForTest, - openOpenClawStateDatabase, -} from "../../state/openclaw-state-db.js"; +import { closeOpenClawStateDatabaseForTest } from "../../state/openclaw-state-db.js"; import { createSqliteSessionTranscriptLocator } from "./paths.js"; import { appendSqliteSessionTranscriptEvent, @@ -206,24 +203,6 @@ describe("SQLite session transcript store", () => { ]); }); - it("does not write runtime transcript file mappings", () => { - const stateDir = createTempDir(); - appendSqliteSessionTranscriptEvent({ - env: { OPENCLAW_STATE_DIR: stateDir }, - agentId: "main", - sessionId: "session-1", - transcriptPath: path.join(stateDir, "session.jsonl"), - event: { type: "session", id: "session-1" }, - }); - - const stateDatabase = openOpenClawStateDatabase({ - env: { OPENCLAW_STATE_DIR: stateDir }, - }); - expect( - stateDatabase.db.prepare("SELECT COUNT(*) AS count FROM transcript_files").get(), - ).toEqual({ count: 0 }); - }); - it("deletes transcript snapshots with the transcript", () => { const stateDir = createTempDir(); const env = { OPENCLAW_STATE_DIR: stateDir }; diff --git a/src/config/sessions/transcript-store.sqlite.ts b/src/config/sessions/transcript-store.sqlite.ts index 212763c8516..be16107496b 100644 --- a/src/config/sessions/transcript-store.sqlite.ts +++ b/src/config/sessions/transcript-store.sqlite.ts @@ -57,7 +57,7 @@ export type SqliteSessionTranscriptScope = { sessionId: string; }; -export type SqliteSessionTranscriptFile = SqliteSessionTranscriptScope & { +export type SqliteSessionTranscriptLocator = SqliteSessionTranscriptScope & { path: string; updatedAt: number; }; @@ -116,6 +116,12 @@ function getAgentTranscriptKysely(db: import("node:sqlite").DatabaseSync) { return getNodeSqliteKysely(db); } +function openTranscriptAgentDatabase( + options: SqliteSessionTranscriptStoreOptions, +): OpenClawAgentDatabase { + return openOpenClawAgentDatabase({ env: options.env, agentId: options.agentId }); +} + function bindTranscriptEvent(params: { sessionId: string; seq: number; @@ -265,9 +271,9 @@ export function resolveSqliteSessionTranscriptScope( }; } -export function listSqliteSessionTranscriptFiles( +export function listSqliteSessionTranscriptLocators( options: OpenClawStateDatabaseOptions = {}, -): SqliteSessionTranscriptFile[] { +): SqliteSessionTranscriptLocator[] { return listSqliteSessionTranscripts(options).map((transcript) => ({ agentId: transcript.agentId, sessionId: transcript.sessionId, @@ -352,7 +358,7 @@ export function getSqliteSessionTranscriptStats( options: SqliteSessionTranscriptStoreOptions, ): Pick | null { const { sessionId } = normalizeTranscriptScope(options); - const database = openOpenClawAgentDatabase(options); + const database = openTranscriptAgentDatabase(options); const row = executeSqliteQueryTakeFirstSync( database.db, getAgentTranscriptKysely(database.db) @@ -519,7 +525,7 @@ export function loadSqliteSessionTranscriptEvents( options: SqliteSessionTranscriptStoreOptions, ): SqliteSessionTranscriptEvent[] { const { sessionId } = normalizeTranscriptScope(options); - const database = openOpenClawAgentDatabase(options); + const database = openTranscriptAgentDatabase(options); return executeSqliteQuerySync( database.db, getAgentTranscriptKysely(database.db) @@ -542,7 +548,7 @@ export function hasSqliteSessionTranscriptEvents( options: SqliteSessionTranscriptStoreOptions, ): boolean { const { sessionId } = normalizeTranscriptScope(options); - const database = openOpenClawAgentDatabase(options); + const database = openTranscriptAgentDatabase(options); const row = executeSqliteQueryTakeFirstSync( database.db, getAgentTranscriptKysely(database.db) @@ -598,7 +604,7 @@ export function hasSqliteSessionTranscriptSnapshot( ): boolean { const { sessionId } = normalizeTranscriptScope(options); const snapshotId = normalizeSessionId(options.snapshotId); - const database = openOpenClawAgentDatabase(options); + const database = openTranscriptAgentDatabase(options); const row = executeSqliteQueryTakeFirstSync( database.db, getAgentTranscriptKysely(database.db) diff --git a/src/config/sessions/transcript.test.ts b/src/config/sessions/transcript.test.ts index e7ef19781a3..990134704bd 100644 --- a/src/config/sessions/transcript.test.ts +++ b/src/config/sessions/transcript.test.ts @@ -4,10 +4,7 @@ import path from "node:path"; import { afterEach, describe, expect, it, vi } from "vitest"; import * as transcriptEvents from "../../sessions/transcript-events.js"; import { closeOpenClawAgentDatabasesForTest } from "../../state/openclaw-agent-db.js"; -import { - closeOpenClawStateDatabaseForTest, - openOpenClawStateDatabase, -} from "../../state/openclaw-state-db.js"; +import { closeOpenClawStateDatabaseForTest } from "../../state/openclaw-state-db.js"; import { createSqliteSessionTranscriptLocator } from "./paths.js"; import { upsertSessionEntry } from "./store.js"; import { useTempSessionsFixture } from "./test-helpers.js"; @@ -632,11 +629,6 @@ describe("appendAssistantMessageToSessionTranscript", () => { }).map((entry) => entry.event as { type?: string; message?: unknown }); expect(events.map((event) => event.type)).toEqual(["session", "message"]); - const stateDatabase = openOpenClawStateDatabase({ env }); - expect( - stateDatabase.db.prepare("SELECT COUNT(*) AS count FROM transcript_files").get(), - ).toEqual({ count: 0 }); - fs.rmSync(stateDir, { recursive: true, force: true }); }); diff --git a/src/infra/session-cost-usage.test.ts b/src/infra/session-cost-usage.test.ts index a45412491cc..fb4b45cd6a7 100644 --- a/src/infra/session-cost-usage.test.ts +++ b/src/infra/session-cost-usage.test.ts @@ -1,6 +1,7 @@ import os from "node:os"; import path from "node:path"; import { afterAll, beforeAll, describe, expect, it } from "vitest"; +import { createSqliteSessionTranscriptLocator } from "../config/sessions/paths.js"; import { replaceSqliteSessionTranscriptEvents } from "../config/sessions/transcript-store.sqlite.js"; import { closeOpenClawStateDatabaseForTest } from "../state/openclaw-state-db.js"; import { createSuiteTempRootTracker } from "../test-helpers/temp-dir.js"; @@ -32,8 +33,8 @@ describe("session cost usage", () => { const makeRoot = async (prefix: string): Promise => await suiteRootTracker.make(prefix); - const sessionPath = (root: string, sessionId: string, agentId = "main") => - path.join(root, "agents", agentId, "sessions", `${sessionId}.jsonl`); + const sessionPath = (_root: string, sessionId: string, agentId = "main") => + createSqliteSessionTranscriptLocator({ agentId, sessionId }); const writeTranscript = (params: { agentId?: string; @@ -41,11 +42,20 @@ describe("session cost usage", () => { transcriptPath?: string; events: unknown[]; }) => { + const eventTimestamp = params.events + .map((event) => + event && typeof event === "object" + ? Date.parse(String((event as { timestamp?: unknown }).timestamp ?? "")) + : NaN, + ) + .find((value) => Number.isFinite(value)); replaceSqliteSessionTranscriptEvents({ agentId: params.agentId ?? "main", sessionId: params.sessionId, - transcriptPath: params.transcriptPath, + transcriptPath: + params.transcriptPath ?? sessionPath("", params.sessionId, params.agentId ?? "main"), events: [{ type: "session", version: 1, id: params.sessionId }, ...params.events], + ...(eventTimestamp !== undefined ? { now: () => eventTimestamp } : {}), }); }; @@ -61,6 +71,14 @@ describe("session cost usage", () => { }) => ({ type: "message", timestamp: params.timestamp, + provider: params.provider ?? "openai", + model: params.model ?? "gpt-5.4", + usage: { + input: params.input, + output: params.output, + totalTokens: params.totalTokens ?? params.input + params.output, + ...(params.cost === undefined ? {} : { cost: { total: params.cost } }), + }, message: { role: "assistant", provider: params.provider ?? "openai", diff --git a/src/infra/session-cost-usage.ts b/src/infra/session-cost-usage.ts index 246b8a2211a..ab6b84aea72 100644 --- a/src/infra/session-cost-usage.ts +++ b/src/infra/session-cost-usage.ts @@ -3,7 +3,7 @@ import { normalizeUsage } from "../agents/usage.js"; import { stripInboundMetadata } from "../auto-reply/reply/strip-inbound-meta.js"; import { createSqliteSessionTranscriptLocator } from "../config/sessions/paths.js"; import { - listSqliteSessionTranscriptFiles, + listSqliteSessionTranscriptLocators, listSqliteSessionTranscripts, loadSqliteSessionTranscriptEvents, resolveSqliteSessionTranscriptScopeForPath, @@ -263,7 +263,7 @@ const applyCostTotal = (totals: CostUsageTotals, costTotal: number | undefined) }; function getRememberedTranscriptPath(agentId: string, sessionId: string): string | undefined { - return listSqliteSessionTranscriptFiles().find( + return listSqliteSessionTranscriptLocators().find( (entry) => entry.agentId === agentId && entry.sessionId === sessionId, )?.path; } diff --git a/src/state/openclaw-state-db.generated.d.ts b/src/state/openclaw-state-db.generated.d.ts index d2b9d907928..3eaf5e84108 100644 --- a/src/state/openclaw-state-db.generated.d.ts +++ b/src/state/openclaw-state-db.generated.d.ts @@ -329,14 +329,6 @@ export interface TaskRuns { terminal_summary: string | null; } -export interface TranscriptFiles { - agent_id: string; - exported_at: number | null; - imported_at: number | null; - path: string; - session_id: string; -} - export interface TuiLastSessions { scope_key: string; session_key: string; @@ -368,6 +360,5 @@ export interface DB { subagent_runs: SubagentRuns; task_delivery_state: TaskDeliveryState; task_runs: TaskRuns; - transcript_files: TranscriptFiles; tui_last_sessions: TuiLastSessions; } diff --git a/src/state/openclaw-state-schema.generated.ts b/src/state/openclaw-state-schema.generated.ts index 35f277313dc..e652ec7f1a8 100644 --- a/src/state/openclaw-state-schema.generated.ts +++ b/src/state/openclaw-state-schema.generated.ts @@ -188,22 +188,6 @@ CREATE INDEX IF NOT EXISTS idx_commitments_scope_due CREATE INDEX IF NOT EXISTS idx_commitments_status_due ON commitments(status, due_earliest_ms, due_latest_ms); -CREATE TABLE IF NOT EXISTS transcript_files ( - agent_id TEXT NOT NULL, - session_id TEXT NOT NULL, - path TEXT NOT NULL, - imported_at INTEGER, - exported_at INTEGER, - PRIMARY KEY (agent_id, session_id, path), - FOREIGN KEY (agent_id) REFERENCES agent_databases(agent_id) ON DELETE CASCADE -); - -CREATE INDEX IF NOT EXISTS idx_transcript_files_path_updated - ON transcript_files(path, imported_at DESC, exported_at DESC, agent_id, session_id); - -CREATE INDEX IF NOT EXISTS idx_transcript_files_session_updated - ON transcript_files(agent_id, session_id, imported_at DESC, exported_at DESC, path); - CREATE TABLE IF NOT EXISTS cron_run_logs ( store_key TEXT NOT NULL, job_id TEXT NOT NULL, diff --git a/src/state/openclaw-state-schema.sql b/src/state/openclaw-state-schema.sql index 7d333ba9268..37ab457f053 100644 --- a/src/state/openclaw-state-schema.sql +++ b/src/state/openclaw-state-schema.sql @@ -183,22 +183,6 @@ CREATE INDEX IF NOT EXISTS idx_commitments_scope_due CREATE INDEX IF NOT EXISTS idx_commitments_status_due ON commitments(status, due_earliest_ms, due_latest_ms); -CREATE TABLE IF NOT EXISTS transcript_files ( - agent_id TEXT NOT NULL, - session_id TEXT NOT NULL, - path TEXT NOT NULL, - imported_at INTEGER, - exported_at INTEGER, - PRIMARY KEY (agent_id, session_id, path), - FOREIGN KEY (agent_id) REFERENCES agent_databases(agent_id) ON DELETE CASCADE -); - -CREATE INDEX IF NOT EXISTS idx_transcript_files_path_updated - ON transcript_files(path, imported_at DESC, exported_at DESC, agent_id, session_id); - -CREATE INDEX IF NOT EXISTS idx_transcript_files_session_updated - ON transcript_files(agent_id, session_id, imported_at DESC, exported_at DESC, path); - CREATE TABLE IF NOT EXISTS cron_run_logs ( store_key TEXT NOT NULL, job_id TEXT NOT NULL,