fix(memory): reindex when sources change

This commit is contained in:
Vignesh Natarajan
2026-02-22 15:12:01 -08:00
parent 44727dc3a1
commit d7747148d0
3 changed files with 119 additions and 0 deletions

View File

@@ -118,6 +118,7 @@ Docs: https://docs.openclaw.ai
- Memory/Remote HTTP: centralize remote memory HTTP calls behind a shared guarded helper (`withRemoteHttpResponse`) so embeddings and batch flows use one request/release path.
- Memory/Embeddings: apply configured remote-base host pinning (`allowedHostnames`) across OpenAI/Voyage/Gemini embedding requests to keep private/self-hosted endpoints working without cross-host drift. (#18198) Thanks @ianpcook.
- Memory/Batch: route OpenAI/Voyage/Gemini batch upload/create/status/download requests through the same guarded HTTP path for consistent SSRF policy enforcement.
- Memory/Index: detect memory source-set changes (for example enabling `sessions` after an existing memory-only index) and trigger a full reindex so existing session transcripts are indexed without requiring `--force`. (#17576) Thanks @TarsAI-Agent.
- Memory/QMD: on Windows, resolve bare `qmd`/`mcporter` command names to npm shim executables (`.cmd`) before spawning, so qmd boot updates and mcporter-backed searches no longer fail with `spawn ... ENOENT` on default npm installs. (#23899) Thanks @arcbuilder-ai.
- Memory/QMD: parse plain-text `qmd collection list --json` output when older qmd builds ignore JSON mode, and retry memory searches once after re-ensuring managed collections when qmd returns `Collection not found ...`. (#23613) Thanks @leozhucn.
- Signal/RPC: guard malformed Signal RPC JSON responses with a clear status-scoped error and add regression coverage for invalid JSON responses. (#22995) Thanks @adhitShet.

View File

@@ -93,6 +93,8 @@ describe("memory index", () => {
function createCfg(params: {
storePath: string;
extraPaths?: string[];
sources?: Array<"memory" | "sessions">;
sessionMemory?: boolean;
model?: string;
vectorEnabled?: boolean;
cacheEnabled?: boolean;
@@ -115,6 +117,8 @@ describe("memory index", () => {
},
cache: params.cacheEnabled ? { enabled: true } : undefined,
extraPaths: params.extraPaths,
sources: params.sources,
experimental: { sessionMemory: params.sessionMemory ?? false },
},
},
list: [{ id: "main", default: true }],
@@ -195,6 +199,85 @@ describe("memory index", () => {
await statusOnly.manager.close?.();
});
it("reindexes sessions when source config adds sessions to an existing index", async () => {
const indexSourceChangePath = path.join(
workspaceDir,
`index-source-change-${Date.now()}.sqlite`,
);
const stateDir = path.join(fixtureRoot, `state-source-change-${Date.now()}`);
const sessionDir = path.join(stateDir, "agents", "main", "sessions");
await fs.mkdir(sessionDir, { recursive: true });
await fs.writeFile(
path.join(sessionDir, "session-source-change.jsonl"),
[
JSON.stringify({
type: "message",
message: {
role: "user",
content: [{ type: "text", text: "session change test user line" }],
},
}),
JSON.stringify({
type: "message",
message: {
role: "assistant",
content: [{ type: "text", text: "session change test assistant line" }],
},
}),
].join("\n") + "\n",
);
const previousStateDir = process.env.OPENCLAW_STATE_DIR;
process.env.OPENCLAW_STATE_DIR = stateDir;
const firstCfg = createCfg({
storePath: indexSourceChangePath,
sources: ["memory"],
sessionMemory: false,
});
const secondCfg = createCfg({
storePath: indexSourceChangePath,
sources: ["memory", "sessions"],
sessionMemory: true,
});
try {
const first = await getMemorySearchManager({ cfg: firstCfg, agentId: "main" });
expect(first.manager).not.toBeNull();
if (!first.manager) {
throw new Error("manager missing");
}
await first.manager.sync?.({ reason: "test" });
const firstStatus = first.manager.status();
expect(
firstStatus.sourceCounts?.find((entry) => entry.source === "sessions")?.files ?? 0,
).toBe(0);
await first.manager.close?.();
const second = await getMemorySearchManager({ cfg: secondCfg, agentId: "main" });
expect(second.manager).not.toBeNull();
if (!second.manager) {
throw new Error("manager missing");
}
await second.manager.sync?.({ reason: "test" });
const secondStatus = second.manager.status();
expect(secondStatus.sourceCounts?.find((entry) => entry.source === "sessions")?.files).toBe(
1,
);
expect(
secondStatus.sourceCounts?.find((entry) => entry.source === "sessions")?.chunks ?? 0,
).toBeGreaterThan(0);
await second.manager.close?.();
} finally {
if (previousStateDir === undefined) {
delete process.env.OPENCLAW_STATE_DIR;
} else {
process.env.OPENCLAW_STATE_DIR = previousStateDir;
}
await fs.rm(stateDir, { recursive: true, force: true });
}
});
it("reindexes when the embedding model changes", async () => {
const indexModelPath = path.join(workspaceDir, `index-model-change-${Date.now()}.sqlite`);
const base = createCfg({ storePath: indexModelPath });

View File

@@ -45,6 +45,7 @@ type MemoryIndexMeta = {
model: string;
provider: string;
providerKey?: string;
sources?: MemorySource[];
chunkTokens: number;
chunkOverlap: number;
vectorDims?: number;
@@ -851,12 +852,14 @@ export abstract class MemoryManagerSyncOps {
}
const vectorReady = await this.ensureVectorReady();
const meta = this.readMeta();
const configuredSources = this.resolveConfiguredSourcesForMeta();
const needsFullReindex =
params?.force ||
!meta ||
(this.provider && meta.model !== this.provider.model) ||
(this.provider && meta.provider !== this.provider.id) ||
meta.providerKey !== this.providerKey ||
this.metaSourcesDiffer(meta, configuredSources) ||
meta.chunkTokens !== this.settings.chunking.tokens ||
meta.chunkOverlap !== this.settings.chunking.overlap ||
(vectorReady && !meta?.vectorDims);
@@ -1056,6 +1059,7 @@ export abstract class MemoryManagerSyncOps {
model: this.provider?.model ?? "fts-only",
provider: this.provider?.id ?? "none",
providerKey: this.providerKey!,
sources: this.resolveConfiguredSourcesForMeta(),
chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap,
};
@@ -1126,6 +1130,7 @@ export abstract class MemoryManagerSyncOps {
model: this.provider?.model ?? "fts-only",
provider: this.provider?.id ?? "none",
providerKey: this.providerKey!,
sources: this.resolveConfiguredSourcesForMeta(),
chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap,
};
@@ -1172,4 +1177,34 @@ export abstract class MemoryManagerSyncOps {
)
.run(META_KEY, value);
}
private resolveConfiguredSourcesForMeta(): MemorySource[] {
const normalized = Array.from(this.sources)
.filter((source): source is MemorySource => source === "memory" || source === "sessions")
.toSorted();
return normalized.length > 0 ? normalized : ["memory"];
}
private normalizeMetaSources(meta: MemoryIndexMeta): MemorySource[] {
if (!Array.isArray(meta.sources)) {
// Backward compatibility for older indexes that did not persist sources.
return ["memory"];
}
const normalized = Array.from(
new Set(
meta.sources.filter(
(source): source is MemorySource => source === "memory" || source === "sessions",
),
),
).toSorted();
return normalized.length > 0 ? normalized : ["memory"];
}
private metaSourcesDiffer(meta: MemoryIndexMeta, configuredSources: MemorySource[]): boolean {
const metaSources = this.normalizeMetaSources(meta);
if (metaSources.length !== configuredSources.length) {
return true;
}
return metaSources.some((source, index) => source !== configuredSources[index]);
}
}