From 42590106ab6ea29106ee522e4e1acfb0f0ec880c Mon Sep 17 00:00:00 2001 From: Anonymous Amit <134582556+AnonAmit@users.noreply.github.com> Date: Sun, 12 Apr 2026 23:06:28 +0530 Subject: [PATCH] improve memory fallback lexical ranking (#65395) * improve memory fallback lexical ranking * use neutral lexical fallback fixtures * fix(memory-core): keep lexical boosts out of hybrid search --------- Co-authored-by: Vincent Koc --- docs/concepts/memory-search.md | 2 + .../src/memory/manager-search.test.ts | 62 +++++++++++++++++++ .../memory-core/src/memory/manager-search.ts | 47 +++++++++++++- extensions/memory-core/src/memory/manager.ts | 10 ++- 4 files changed, 118 insertions(+), 3 deletions(-) diff --git a/docs/concepts/memory-search.md b/docs/concepts/memory-search.md index 795a7a0a21f..944c006e118 100644 --- a/docs/concepts/memory-search.md +++ b/docs/concepts/memory-search.md @@ -67,6 +67,8 @@ flowchart LR If only one path is available (no embeddings or no FTS), the other runs alone. +When embeddings are unavailable, OpenClaw still uses lexical ranking over FTS results instead of falling back to raw exact-match ordering only. That degraded mode boosts chunks with stronger query-term coverage and relevant file paths, which keeps recall useful even without `sqlite-vec` or an embedding provider. + ## Improving search quality Two optional features help when you have a large note history: diff --git a/extensions/memory-core/src/memory/manager-search.test.ts b/extensions/memory-core/src/memory/manager-search.test.ts index 32eee675820..2256e32c08f 100644 --- a/extensions/memory-core/src/memory/manager-search.test.ts +++ b/extensions/memory-core/src/memory/manager-search.test.ts @@ -25,6 +25,7 @@ describe("searchKeyword trigram fallback", () => { async function runSearch(params: { rows: Array<{ id: string; path: string; text: string }>; query: string; + boostFallbackRanking?: boolean; }) { const db = createTrigramDb(); try { @@ -45,6 +46,7 @@ describe("searchKeyword trigram fallback", () => { sourceFilter: { sql: "", params: [] }, buildFtsQuery, bm25RankToScore, + boostFallbackRanking: params.boostFallbackRanking, }); } finally { db.close(); @@ -85,4 +87,64 @@ describe("searchKeyword trigram fallback", () => { expect(results.map((row) => row.id)).toEqual(["match"]); expect(results[0]?.textScore).toBeGreaterThan(0); }); + + it("applies fallback lexical boosts without exceeding bounded scores", async () => { + const results = await runSearch({ + rows: [ + { + id: "strong", + path: "memory/project-memory-notes.md", + text: "Project memory notes covering workspace context and retrieval behavior.", + }, + { + id: "weak", + path: "memory/notes.md", + text: "Project memory context.", + }, + ], + query: "project memory context", + boostFallbackRanking: true, + }); + expect(results.map((row) => row.id)).toEqual(["weak", "strong"]); + const rawResults = await runSearch({ + rows: [ + { + id: "strong", + path: "memory/project-memory-notes.md", + text: "Project memory notes covering workspace context and retrieval behavior.", + }, + { + id: "weak", + path: "memory/notes.md", + text: "Project memory context.", + }, + ], + query: "project memory context", + boostFallbackRanking: false, + }); + + const boostedById = new Map(results.map((row) => [row.id, row])); + const rawById = new Map(rawResults.map((row) => [row.id, row])); + expect(rawById.get("strong")?.textScore).toBeLessThan(rawById.get("weak")?.textScore ?? 0); + expect(boostedById.get("strong")?.score).toBeGreaterThan(boostedById.get("weak")?.score ?? 0); + expect(boostedById.get("strong")?.textScore).toBe(rawById.get("strong")?.textScore); + expect(boostedById.get("weak")?.textScore).toBe(rawById.get("weak")?.textScore); + expect(boostedById.get("strong")?.score).toBeLessThanOrEqual(1); + expect(boostedById.get("weak")?.score).toBeLessThanOrEqual(1); + }); + + it("does not overweight repeated query tokens in fallback scoring", async () => { + const unique = await runSearch({ + rows: [{ id: "1", path: "memory/project.md", text: "Project memory context." }], + query: "project memory context", + boostFallbackRanking: true, + }); + const repeated = await runSearch({ + rows: [{ id: "1", path: "memory/project.md", text: "Project memory context." }], + query: "project project project memory context", + boostFallbackRanking: true, + }); + + expect(repeated[0]?.score).toBe(unique[0]?.score); + }); }); diff --git a/extensions/memory-core/src/memory/manager-search.ts b/extensions/memory-core/src/memory/manager-search.ts index 039118c5361..650abfdcb99 100644 --- a/extensions/memory-core/src/memory/manager-search.ts +++ b/extensions/memory-core/src/memory/manager-search.ts @@ -22,6 +22,42 @@ export type SearchRowResult = { source: SearchSource; }; +function normalizeSearchTokens(raw: string): string[] { + return ( + raw + .match(FTS_QUERY_TOKEN_RE) + ?.map((token) => token.trim().toLowerCase()) + .filter(Boolean) ?? [] + ); +} + +function scoreFallbackKeywordResult(params: { + query: string; + path: string; + text: string; + ftsScore: number; +}): number { + const queryTokens = [...new Set(normalizeSearchTokens(params.query))]; + if (queryTokens.length === 0) { + return params.ftsScore; + } + + const textTokens = normalizeSearchTokens(params.text); + const textTokenSet = new Set(textTokens); + const pathLower = params.path.toLowerCase(); + const overlap = queryTokens.filter((token) => textTokenSet.has(token)).length; + const uniqueQueryOverlap = overlap / Math.max(new Set(queryTokens).size, 1); + const density = overlap / Math.max(textTokenSet.size, 1); + const pathBoost = queryTokens.reduce( + (score, token) => score + (pathLower.includes(token) ? 0.18 : 0), + 0, + ); + const textLengthBoost = Math.min(params.text.length / 160, 0.18); + + const lexicalBoost = uniqueQueryOverlap * 0.45 + density * 0.2 + pathBoost + textLengthBoost; + return Math.min(1, params.ftsScore + lexicalBoost); +} + function escapeLikePattern(term: string): string { return term.replaceAll("\\", "\\\\").replaceAll("%", "\\%").replaceAll("_", "\\_"); } @@ -198,6 +234,7 @@ export async function searchKeyword(params: { sourceFilter: { sql: string; params: SearchSource[] }; buildFtsQuery: (raw: string) => string | null; bm25RankToScore: (rank: number) => number; + boostFallbackRanking?: boolean; }): Promise> { if (params.limit <= 0) { return []; @@ -249,12 +286,20 @@ export async function searchKeyword(params: { return rows.map((row) => { const textScore = plan.matchQuery ? params.bm25RankToScore(row.rank) : 1; + const score = params.boostFallbackRanking + ? scoreFallbackKeywordResult({ + query: params.query, + path: row.path, + text: row.text, + ftsScore: textScore, + }) + : textScore; return { id: row.id, path: row.path, startLine: row.start_line, endLine: row.end_line, - score: textScore, + score, textScore, snippet: truncateUtf16Safe(row.text, params.snippetMaxChars), source: row.source, diff --git a/extensions/memory-core/src/memory/manager.ts b/extensions/memory-core/src/memory/manager.ts index 8a173400f15..78385ea7a35 100644 --- a/extensions/memory-core/src/memory/manager.ts +++ b/extensions/memory-core/src/memory/manager.ts @@ -345,7 +345,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem return []; } - const fullQueryResults = await this.searchKeyword(cleaned, candidates).catch(() => []); + const fullQueryResults = await this.searchKeyword(cleaned, candidates, { + boostFallbackRanking: true, + }).catch(() => []); const resultSets = fullQueryResults.length > 0 ? [fullQueryResults] @@ -358,7 +360,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem }); const searchTerms = keywords.length > 0 ? keywords : [cleaned]; return searchTerms.map((term) => - this.searchKeyword(term, candidates).catch(() => []), + this.searchKeyword(term, candidates, { boostFallbackRanking: true }).catch( + () => [], + ), ); })(), ); @@ -495,6 +499,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem private async searchKeyword( query: string, limit: number, + options?: { boostFallbackRanking?: boolean }, ): Promise> { if (!this.fts.enabled || !this.fts.available) { return []; @@ -513,6 +518,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem sourceFilter, buildFtsQuery: (raw) => this.buildFtsQuery(raw), bm25RankToScore, + boostFallbackRanking: options?.boostFallbackRanking, }); return results.map((entry) => entry as MemorySearchResult & { id: string; textScore: number }); }