improve memory fallback lexical ranking (#65395)

* improve memory fallback lexical ranking

* use neutral lexical fallback fixtures

* fix(memory-core): keep lexical boosts out of hybrid search

---------

Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
Anonymous Amit
2026-04-12 23:06:28 +05:30
committed by GitHub
parent 9259e593e6
commit 42590106ab
4 changed files with 118 additions and 3 deletions

View File

@@ -67,6 +67,8 @@ flowchart LR
If only one path is available (no embeddings or no FTS), the other runs alone.
When embeddings are unavailable, OpenClaw still uses lexical ranking over FTS results instead of falling back to raw exact-match ordering only. That degraded mode boosts chunks with stronger query-term coverage and relevant file paths, which keeps recall useful even without `sqlite-vec` or an embedding provider.
## Improving search quality
Two optional features help when you have a large note history:

View File

@@ -25,6 +25,7 @@ describe("searchKeyword trigram fallback", () => {
async function runSearch(params: {
rows: Array<{ id: string; path: string; text: string }>;
query: string;
boostFallbackRanking?: boolean;
}) {
const db = createTrigramDb();
try {
@@ -45,6 +46,7 @@ describe("searchKeyword trigram fallback", () => {
sourceFilter: { sql: "", params: [] },
buildFtsQuery,
bm25RankToScore,
boostFallbackRanking: params.boostFallbackRanking,
});
} finally {
db.close();
@@ -85,4 +87,64 @@ describe("searchKeyword trigram fallback", () => {
expect(results.map((row) => row.id)).toEqual(["match"]);
expect(results[0]?.textScore).toBeGreaterThan(0);
});
it("applies fallback lexical boosts without exceeding bounded scores", async () => {
const results = await runSearch({
rows: [
{
id: "strong",
path: "memory/project-memory-notes.md",
text: "Project memory notes covering workspace context and retrieval behavior.",
},
{
id: "weak",
path: "memory/notes.md",
text: "Project memory context.",
},
],
query: "project memory context",
boostFallbackRanking: true,
});
expect(results.map((row) => row.id)).toEqual(["weak", "strong"]);
const rawResults = await runSearch({
rows: [
{
id: "strong",
path: "memory/project-memory-notes.md",
text: "Project memory notes covering workspace context and retrieval behavior.",
},
{
id: "weak",
path: "memory/notes.md",
text: "Project memory context.",
},
],
query: "project memory context",
boostFallbackRanking: false,
});
const boostedById = new Map(results.map((row) => [row.id, row]));
const rawById = new Map(rawResults.map((row) => [row.id, row]));
expect(rawById.get("strong")?.textScore).toBeLessThan(rawById.get("weak")?.textScore ?? 0);
expect(boostedById.get("strong")?.score).toBeGreaterThan(boostedById.get("weak")?.score ?? 0);
expect(boostedById.get("strong")?.textScore).toBe(rawById.get("strong")?.textScore);
expect(boostedById.get("weak")?.textScore).toBe(rawById.get("weak")?.textScore);
expect(boostedById.get("strong")?.score).toBeLessThanOrEqual(1);
expect(boostedById.get("weak")?.score).toBeLessThanOrEqual(1);
});
it("does not overweight repeated query tokens in fallback scoring", async () => {
const unique = await runSearch({
rows: [{ id: "1", path: "memory/project.md", text: "Project memory context." }],
query: "project memory context",
boostFallbackRanking: true,
});
const repeated = await runSearch({
rows: [{ id: "1", path: "memory/project.md", text: "Project memory context." }],
query: "project project project memory context",
boostFallbackRanking: true,
});
expect(repeated[0]?.score).toBe(unique[0]?.score);
});
});

View File

@@ -22,6 +22,42 @@ export type SearchRowResult = {
source: SearchSource;
};
function normalizeSearchTokens(raw: string): string[] {
return (
raw
.match(FTS_QUERY_TOKEN_RE)
?.map((token) => token.trim().toLowerCase())
.filter(Boolean) ?? []
);
}
function scoreFallbackKeywordResult(params: {
query: string;
path: string;
text: string;
ftsScore: number;
}): number {
const queryTokens = [...new Set(normalizeSearchTokens(params.query))];
if (queryTokens.length === 0) {
return params.ftsScore;
}
const textTokens = normalizeSearchTokens(params.text);
const textTokenSet = new Set(textTokens);
const pathLower = params.path.toLowerCase();
const overlap = queryTokens.filter((token) => textTokenSet.has(token)).length;
const uniqueQueryOverlap = overlap / Math.max(new Set(queryTokens).size, 1);
const density = overlap / Math.max(textTokenSet.size, 1);
const pathBoost = queryTokens.reduce(
(score, token) => score + (pathLower.includes(token) ? 0.18 : 0),
0,
);
const textLengthBoost = Math.min(params.text.length / 160, 0.18);
const lexicalBoost = uniqueQueryOverlap * 0.45 + density * 0.2 + pathBoost + textLengthBoost;
return Math.min(1, params.ftsScore + lexicalBoost);
}
function escapeLikePattern(term: string): string {
return term.replaceAll("\\", "\\\\").replaceAll("%", "\\%").replaceAll("_", "\\_");
}
@@ -198,6 +234,7 @@ export async function searchKeyword(params: {
sourceFilter: { sql: string; params: SearchSource[] };
buildFtsQuery: (raw: string) => string | null;
bm25RankToScore: (rank: number) => number;
boostFallbackRanking?: boolean;
}): Promise<Array<SearchRowResult & { textScore: number }>> {
if (params.limit <= 0) {
return [];
@@ -249,12 +286,20 @@ export async function searchKeyword(params: {
return rows.map((row) => {
const textScore = plan.matchQuery ? params.bm25RankToScore(row.rank) : 1;
const score = params.boostFallbackRanking
? scoreFallbackKeywordResult({
query: params.query,
path: row.path,
text: row.text,
ftsScore: textScore,
})
: textScore;
return {
id: row.id,
path: row.path,
startLine: row.start_line,
endLine: row.end_line,
score: textScore,
score,
textScore,
snippet: truncateUtf16Safe(row.text, params.snippetMaxChars),
source: row.source,

View File

@@ -345,7 +345,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
return [];
}
const fullQueryResults = await this.searchKeyword(cleaned, candidates).catch(() => []);
const fullQueryResults = await this.searchKeyword(cleaned, candidates, {
boostFallbackRanking: true,
}).catch(() => []);
const resultSets =
fullQueryResults.length > 0
? [fullQueryResults]
@@ -358,7 +360,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
});
const searchTerms = keywords.length > 0 ? keywords : [cleaned];
return searchTerms.map((term) =>
this.searchKeyword(term, candidates).catch(() => []),
this.searchKeyword(term, candidates, { boostFallbackRanking: true }).catch(
() => [],
),
);
})(),
);
@@ -495,6 +499,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
private async searchKeyword(
query: string,
limit: number,
options?: { boostFallbackRanking?: boolean },
): Promise<Array<MemorySearchResult & { id: string; textScore: number }>> {
if (!this.fts.enabled || !this.fts.available) {
return [];
@@ -513,6 +518,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
sourceFilter,
buildFtsQuery: (raw) => this.buildFtsQuery(raw),
bm25RankToScore,
boostFallbackRanking: options?.boostFallbackRanking,
});
return results.map((entry) => entry as MemorySearchResult & { id: string; textScore: number });
}