refactor: store default memory index in agent database

This commit is contained in:
Peter Steinberger
2026-05-08 20:49:09 +01:00
parent 4993d796d5
commit 3fd49cd6fe
11 changed files with 126 additions and 27 deletions

View File

@@ -76,9 +76,10 @@ order shown. Set `memorySearch.provider` to override.
## How indexing works
OpenClaw indexes `MEMORY.md` and `memory/*.md` into chunks (~400 tokens with
80-token overlap) and stores them in a per-agent SQLite database.
80-token overlap) and stores them in each agent's `openclaw-agent.sqlite`
database.
- **Index location:** `~/.openclaw/memory/<agentId>.sqlite`
- **Index location:** `~/.openclaw/agents/<agentId>/agent/openclaw-agent.sqlite`
- **Storage maintenance:** SQLite WAL sidecars are bounded with periodic and
shutdown checkpoints.
- **File watching:** changes to memory files trigger a debounced reindex (1.5s).

View File

@@ -57,7 +57,8 @@ proceed with these assumptions:
The current branch is already past the proof-of-concept stage. The shared
database exists, Node `node:sqlite` is wired through a small runtime helper, and
several former sidecars now write to `state/openclaw.sqlite`.
former sidecars now write to `state/openclaw.sqlite` or the owning
`openclaw-agent.sqlite` database.
The remaining work is not choosing SQLite; it is deleting compatibility-shaped
interfaces that still look like the old file world:
@@ -599,6 +600,9 @@ Move these into the global database:
the shared database; legacy sidecar import remains.
- Plugin state from `plugin-state/state.sqlite`. Runtime writes now use the
shared database; legacy sidecar import remains.
- Builtin memory search no longer defaults to `memory/<agentId>.sqlite`; its
index tables live in the owning agent database unless `memorySearch.store.path`
explicitly asks for a sidecar.
- Sandbox container/browser registries from monolithic and sharded JSON. Runtime
writes now use the shared database; legacy JSON import remains.
- Cron job definitions, schedule state, and run history now use shared SQLite;
@@ -885,6 +889,9 @@ is newer than the backup.
writes; doctor imports the legacy sidecar.
- Move Task Flow tables into the global database. Done for runtime writes;
doctor imports the legacy sidecar.
- Move builtin memory-search tables into each agent database by default.
Done for the default path; explicit custom `memorySearch.store.path`
remains a sidecar opt-in.
- Delete duplicate database openers, WAL setup, permission helpers, and
close paths from those subsystems.

View File

@@ -464,10 +464,10 @@ When sqlite-vec is unavailable, OpenClaw falls back to in-process cosine similar
## Index storage
| Key | Type | Default | Description |
| --------------------- | -------- | ------------------------------------- | ------------------------------------------- |
| `store.path` | `string` | `~/.openclaw/memory/{agentId}.sqlite` | Index location (supports `{agentId}` token) |
| `store.fts.tokenizer` | `string` | `unicode61` | FTS5 tokenizer (`unicode61` or `trigram`) |
| Key | Type | Default | Description |
| --------------------- | -------- | ----------------------------- | ------------------------------------------------------------ |
| `store.path` | `string` | agent `openclaw-agent.sqlite` | Optional sidecar index location (supports `{agentId}` token) |
| `store.fts.tokenizer` | `string` | `unicode61` | FTS5 tokenizer (`unicode61` or `trigram`) |
---

View File

@@ -9,7 +9,11 @@ import {
} from "openclaw/plugin-sdk/memory-core-host-engine-embeddings";
import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
import { replaceSqliteSessionTranscriptEvents } from "../../../../src/config/sessions/transcript-store.sqlite.js";
import { closeOpenClawAgentDatabasesForTest } from "../../../../src/state/openclaw-agent-db.js";
import {
closeOpenClawAgentDatabasesForTest,
openOpenClawAgentDatabase,
resolveOpenClawAgentSqlitePath,
} from "../../../../src/state/openclaw-agent-db.js";
import { closeOpenClawStateDatabaseForTest } from "../../../../src/state/openclaw-state-db.js";
import "./test-runtime-mocks.js";
import type { MemoryIndexManager } from "./index.js";
@@ -380,6 +384,50 @@ describe("memory index", () => {
}
});
it("stores the default memory index inside the per-agent database", async () => {
const stateDir = path.join(workspaceDir, "managed-memory-state");
vi.stubEnv("OPENCLAW_STATE_DIR", stateDir);
const agentDbPath = resolveOpenClawAgentSqlitePath({ agentId: "main" });
const agentDb = openOpenClawAgentDatabase({ agentId: "main" });
agentDb.db
.prepare("INSERT INTO session_entries (session_key, entry_json, updated_at) VALUES (?, ?, ?)")
.run("agent:main:test", JSON.stringify({ sessionId: "keep-me", updatedAt: 1 }), 1);
closeOpenClawAgentDatabasesForTest();
const cfg: TestCfg = {
agents: {
defaults: {
workspace: workspaceDir,
memorySearch: {
provider: "openai",
model: "mock-embed",
store: { vector: { enabled: false } },
chunking: { tokens: 4000, overlap: 0 },
sync: { watch: false, onSessionStart: false, onSearch: true },
query: { minScore: 0, hybrid: { enabled: false } },
},
},
list: [{ id: "main", default: true }],
},
};
const manager = await getFreshManager(cfg);
try {
await manager.sync({ reason: "test", force: true });
expect(manager.status().dbPath).toBe(agentDbPath);
} finally {
await manager.close?.();
}
const reopened = openOpenClawAgentDatabase({ agentId: "main" });
expect(
reopened.db
.prepare("SELECT entry_json FROM session_entries WHERE session_key = ?")
.get("agent:main:test"),
).toEqual({
entry_json: JSON.stringify({ sessionId: "keep-me", updatedAt: 1 }),
});
});
it("indexes multimodal image and audio files from extra paths with Gemini structured inputs", async () => {
const mediaDir = path.join(workspaceDir, "media-memory");
await fs.mkdir(mediaDir, { recursive: true });

View File

@@ -972,8 +972,9 @@ export abstract class MemoryManagerSyncOps {
reason: params?.reason,
progress: progress ?? undefined,
useUnsafeReindex:
process.env.OPENCLAW_TEST_FAST === "1" &&
process.env.OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX === "1",
this.settings.store.managedAgentDatabase ||
(process.env.OPENCLAW_TEST_FAST === "1" &&
process.env.OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX === "1"),
dirtySessionTranscripts: this.dirtySessionTranscripts,
syncSessionTranscripts: async (targetedParams) => {
await this.syncSessionTranscripts(targetedParams);
@@ -1269,8 +1270,10 @@ export abstract class MemoryManagerSyncOps {
force?: boolean;
progress?: MemorySyncProgressState;
}): Promise<void> {
// Perf: for test runs, skip atomic temp-db swapping. The index is isolated
// under the per-test HOME anyway, and this cuts substantial fs+sqlite churn.
// Managed per-agent DBs cannot use whole-file swaps because the same
// database also owns sessions, VFS rows, and runtime state. Reset only the
// memory tables in place; explicit custom store paths still use the safer
// sidecar DB swap above.
this.resetIndex();
const shouldSyncMemory = this.sources.has("memory");

View File

@@ -197,6 +197,26 @@ describe("memory search config", () => {
const resolved = resolveMemorySearchConfig(cfg, "main");
expect(resolved?.provider).toBe("auto");
expect(resolved?.fallback).toBe("none");
expect(resolved?.store.path).toMatch(/agents[/\\]main[/\\]agent[/\\]openclaw-agent\.sqlite$/);
expect(resolved?.store.managedAgentDatabase).toBe(true);
});
it("keeps explicit memory store paths as sidecar indexes", () => {
const cfg = asConfig({
agents: {
defaults: {
memorySearch: {
enabled: true,
store: {
path: "/tmp/openclaw-memory-{agentId}.sqlite",
},
},
},
},
});
const resolved = resolveMemorySearchConfig(cfg, "main");
expect(resolved?.store.path).toBe("/tmp/openclaw-memory-main.sqlite");
expect(resolved?.store.managedAgentDatabase).toBe(false);
});
it("resolves custom provider ids through their configured api owner", () => {

View File

@@ -1,7 +1,4 @@
import os from "node:os";
import path from "node:path";
import type { OpenClawConfig, MemorySearchConfig } from "../config/config.js";
import { resolveStateDir } from "../config/paths.js";
import type { SecretInput } from "../config/types.secrets.js";
import {
isMemoryMultimodalEnabled,
@@ -9,6 +6,7 @@ import {
type MemoryMultimodalSettings,
} from "../memory-host-sdk/multimodal.js";
import { getMemoryEmbeddingProvider } from "../plugins/memory-embedding-providers.js";
import { resolveOpenClawAgentSqlitePath } from "../state/openclaw-agent-db.js";
import { clampInt, clampNumber, resolveUserPath } from "../utils.js";
import { resolveAgentConfig } from "./agent-scope.js";
import { findNormalizedProviderValue, normalizeProviderId } from "./provider-id.js";
@@ -49,6 +47,7 @@ export type ResolvedMemorySearchConfig = {
store: {
driver: "sqlite";
path: string;
managedAgentDatabase: boolean;
fts: {
tokenizer: "unicode61" | "trigram";
};
@@ -138,14 +137,24 @@ function normalizeSources(
return Array.from(normalized);
}
function resolveMemoryStorePath(agentId: string, raw?: string): string {
const stateDir = resolveStateDir(process.env, os.homedir);
const fallback = path.join(stateDir, "memory", `${agentId}.sqlite`);
function resolveMemoryStore(
agentId: string,
raw?: string,
): {
path: string;
managedAgentDatabase: boolean;
} {
if (!raw) {
return fallback;
return {
path: resolveOpenClawAgentSqlitePath({ agentId, env: process.env }),
managedAgentDatabase: true,
};
}
const withToken = raw.includes("{agentId}") ? raw.replaceAll("{agentId}", agentId) : raw;
return resolveUserPath(withToken);
return {
path: resolveUserPath(withToken),
managedAgentDatabase: false,
};
}
function getConfiguredMemoryEmbeddingProvider(
@@ -256,9 +265,14 @@ function mergeConfig(
const fts = {
tokenizer: overrides?.store?.fts?.tokenizer ?? defaults?.store?.fts?.tokenizer ?? "unicode61",
};
const resolvedStore = resolveMemoryStore(
agentId,
overrides?.store?.path ?? defaults?.store?.path,
);
const store = {
driver: overrides?.store?.driver ?? defaults?.store?.driver ?? "sqlite",
path: resolveMemoryStorePath(agentId, overrides?.store?.path ?? defaults?.store?.path),
path: resolvedStore.path,
managedAgentDatabase: resolvedStore.managedAgentDatabase,
fts,
vector,
};

View File

@@ -64,4 +64,12 @@ describe("status.scan-memory", () => {
requireDefaultStore,
});
});
it("uses the per-agent runtime database as the default memory store", async () => {
const { resolveDefaultMemoryStorePath } = await import("./status.scan-memory.ts");
expect(resolveDefaultMemoryStorePath("main")).toMatch(
/agents[/\\]main[/\\]agent[/\\]openclaw-agent\.sqlite$/,
);
});
});

View File

@@ -1,9 +1,7 @@
import os from "node:os";
import path from "node:path";
import { resolveMemorySearchConfig } from "../agents/memory-search.js";
import { resolveStateDir } from "../config/paths.js";
import type { OpenClawConfig } from "../config/types.js";
import { createLazyImportLoader } from "../shared/lazy-promise.js";
import { resolveOpenClawAgentSqlitePath } from "../state/openclaw-agent-db.js";
import type { getAgentLocalStatuses as getAgentLocalStatusesFn } from "./status.agent-local.js";
import {
resolveSharedMemoryStatusSnapshot,
@@ -20,7 +18,7 @@ function loadStatusScanDepsRuntimeModule() {
}
export function resolveDefaultMemoryStorePath(agentId: string): string {
return path.join(resolveStateDir(process.env, os.homedir), "memory", `${agentId}.sqlite`);
return resolveOpenClawAgentSqlitePath({ agentId });
}
export async function resolveStatusMemoryStatusSnapshot(params: {

View File

@@ -662,7 +662,7 @@ describe("config help copy quality", () => {
expect(FIELD_HELP["memory.qmd.update.interval"].includes("5m")).toBe(true);
expect(FIELD_HELP["memory.qmd.update.embedInterval"].includes("60m")).toBe(true);
expect(FIELD_HELP["agents.defaults.memorySearch.store.path"]).toContain(
"~/.openclaw/memory/{agentId}.sqlite",
"openclaw-agent.sqlite",
);
});

View File

@@ -1136,7 +1136,7 @@ export const FIELD_HELP: Record<string, string> = {
"agents.defaults.memorySearch.fallback":
'Backup provider used when primary embeddings fail: "openai", "gemini", "voyage", "mistral", "bedrock", "lmstudio", "ollama", "local", or "none". Set a real fallback for production reliability; use "none" only if you prefer explicit failures.',
"agents.defaults.memorySearch.store.path":
"Sets where the SQLite memory index is stored on disk for each agent. Keep the default `~/.openclaw/memory/{agentId}.sqlite` unless you need custom storage placement or backup policy alignment.",
"Sets where the SQLite memory index is stored. By default it lives in each agent's `openclaw-agent.sqlite` database; set this only when you intentionally want a separate sidecar index path, with `{agentId}` supported.",
"agents.defaults.memorySearch.store.vector.enabled":
"Enables the sqlite-vec extension used for vector similarity queries in memory search (default: true). Keep this enabled for normal semantic recall; disable only for debugging or fallback-only operation.",
"agents.defaults.memorySearch.store.vector.extensionPath":