mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-15 10:51:27 +00:00
improve memory fallback lexical ranking (#65395)
* improve memory fallback lexical ranking * use neutral lexical fallback fixtures * fix(memory-core): keep lexical boosts out of hybrid search --------- Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
@@ -67,6 +67,8 @@ flowchart LR
|
||||
|
||||
If only one path is available (no embeddings or no FTS), the other runs alone.
|
||||
|
||||
When embeddings are unavailable, OpenClaw still uses lexical ranking over FTS results instead of falling back to raw exact-match ordering only. That degraded mode boosts chunks with stronger query-term coverage and relevant file paths, which keeps recall useful even without `sqlite-vec` or an embedding provider.
|
||||
|
||||
## Improving search quality
|
||||
|
||||
Two optional features help when you have a large note history:
|
||||
|
||||
@@ -25,6 +25,7 @@ describe("searchKeyword trigram fallback", () => {
|
||||
async function runSearch(params: {
|
||||
rows: Array<{ id: string; path: string; text: string }>;
|
||||
query: string;
|
||||
boostFallbackRanking?: boolean;
|
||||
}) {
|
||||
const db = createTrigramDb();
|
||||
try {
|
||||
@@ -45,6 +46,7 @@ describe("searchKeyword trigram fallback", () => {
|
||||
sourceFilter: { sql: "", params: [] },
|
||||
buildFtsQuery,
|
||||
bm25RankToScore,
|
||||
boostFallbackRanking: params.boostFallbackRanking,
|
||||
});
|
||||
} finally {
|
||||
db.close();
|
||||
@@ -85,4 +87,64 @@ describe("searchKeyword trigram fallback", () => {
|
||||
expect(results.map((row) => row.id)).toEqual(["match"]);
|
||||
expect(results[0]?.textScore).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("applies fallback lexical boosts without exceeding bounded scores", async () => {
|
||||
const results = await runSearch({
|
||||
rows: [
|
||||
{
|
||||
id: "strong",
|
||||
path: "memory/project-memory-notes.md",
|
||||
text: "Project memory notes covering workspace context and retrieval behavior.",
|
||||
},
|
||||
{
|
||||
id: "weak",
|
||||
path: "memory/notes.md",
|
||||
text: "Project memory context.",
|
||||
},
|
||||
],
|
||||
query: "project memory context",
|
||||
boostFallbackRanking: true,
|
||||
});
|
||||
expect(results.map((row) => row.id)).toEqual(["weak", "strong"]);
|
||||
const rawResults = await runSearch({
|
||||
rows: [
|
||||
{
|
||||
id: "strong",
|
||||
path: "memory/project-memory-notes.md",
|
||||
text: "Project memory notes covering workspace context and retrieval behavior.",
|
||||
},
|
||||
{
|
||||
id: "weak",
|
||||
path: "memory/notes.md",
|
||||
text: "Project memory context.",
|
||||
},
|
||||
],
|
||||
query: "project memory context",
|
||||
boostFallbackRanking: false,
|
||||
});
|
||||
|
||||
const boostedById = new Map(results.map((row) => [row.id, row]));
|
||||
const rawById = new Map(rawResults.map((row) => [row.id, row]));
|
||||
expect(rawById.get("strong")?.textScore).toBeLessThan(rawById.get("weak")?.textScore ?? 0);
|
||||
expect(boostedById.get("strong")?.score).toBeGreaterThan(boostedById.get("weak")?.score ?? 0);
|
||||
expect(boostedById.get("strong")?.textScore).toBe(rawById.get("strong")?.textScore);
|
||||
expect(boostedById.get("weak")?.textScore).toBe(rawById.get("weak")?.textScore);
|
||||
expect(boostedById.get("strong")?.score).toBeLessThanOrEqual(1);
|
||||
expect(boostedById.get("weak")?.score).toBeLessThanOrEqual(1);
|
||||
});
|
||||
|
||||
it("does not overweight repeated query tokens in fallback scoring", async () => {
|
||||
const unique = await runSearch({
|
||||
rows: [{ id: "1", path: "memory/project.md", text: "Project memory context." }],
|
||||
query: "project memory context",
|
||||
boostFallbackRanking: true,
|
||||
});
|
||||
const repeated = await runSearch({
|
||||
rows: [{ id: "1", path: "memory/project.md", text: "Project memory context." }],
|
||||
query: "project project project memory context",
|
||||
boostFallbackRanking: true,
|
||||
});
|
||||
|
||||
expect(repeated[0]?.score).toBe(unique[0]?.score);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -22,6 +22,42 @@ export type SearchRowResult = {
|
||||
source: SearchSource;
|
||||
};
|
||||
|
||||
function normalizeSearchTokens(raw: string): string[] {
|
||||
return (
|
||||
raw
|
||||
.match(FTS_QUERY_TOKEN_RE)
|
||||
?.map((token) => token.trim().toLowerCase())
|
||||
.filter(Boolean) ?? []
|
||||
);
|
||||
}
|
||||
|
||||
function scoreFallbackKeywordResult(params: {
|
||||
query: string;
|
||||
path: string;
|
||||
text: string;
|
||||
ftsScore: number;
|
||||
}): number {
|
||||
const queryTokens = [...new Set(normalizeSearchTokens(params.query))];
|
||||
if (queryTokens.length === 0) {
|
||||
return params.ftsScore;
|
||||
}
|
||||
|
||||
const textTokens = normalizeSearchTokens(params.text);
|
||||
const textTokenSet = new Set(textTokens);
|
||||
const pathLower = params.path.toLowerCase();
|
||||
const overlap = queryTokens.filter((token) => textTokenSet.has(token)).length;
|
||||
const uniqueQueryOverlap = overlap / Math.max(new Set(queryTokens).size, 1);
|
||||
const density = overlap / Math.max(textTokenSet.size, 1);
|
||||
const pathBoost = queryTokens.reduce(
|
||||
(score, token) => score + (pathLower.includes(token) ? 0.18 : 0),
|
||||
0,
|
||||
);
|
||||
const textLengthBoost = Math.min(params.text.length / 160, 0.18);
|
||||
|
||||
const lexicalBoost = uniqueQueryOverlap * 0.45 + density * 0.2 + pathBoost + textLengthBoost;
|
||||
return Math.min(1, params.ftsScore + lexicalBoost);
|
||||
}
|
||||
|
||||
function escapeLikePattern(term: string): string {
|
||||
return term.replaceAll("\\", "\\\\").replaceAll("%", "\\%").replaceAll("_", "\\_");
|
||||
}
|
||||
@@ -198,6 +234,7 @@ export async function searchKeyword(params: {
|
||||
sourceFilter: { sql: string; params: SearchSource[] };
|
||||
buildFtsQuery: (raw: string) => string | null;
|
||||
bm25RankToScore: (rank: number) => number;
|
||||
boostFallbackRanking?: boolean;
|
||||
}): Promise<Array<SearchRowResult & { textScore: number }>> {
|
||||
if (params.limit <= 0) {
|
||||
return [];
|
||||
@@ -249,12 +286,20 @@ export async function searchKeyword(params: {
|
||||
|
||||
return rows.map((row) => {
|
||||
const textScore = plan.matchQuery ? params.bm25RankToScore(row.rank) : 1;
|
||||
const score = params.boostFallbackRanking
|
||||
? scoreFallbackKeywordResult({
|
||||
query: params.query,
|
||||
path: row.path,
|
||||
text: row.text,
|
||||
ftsScore: textScore,
|
||||
})
|
||||
: textScore;
|
||||
return {
|
||||
id: row.id,
|
||||
path: row.path,
|
||||
startLine: row.start_line,
|
||||
endLine: row.end_line,
|
||||
score: textScore,
|
||||
score,
|
||||
textScore,
|
||||
snippet: truncateUtf16Safe(row.text, params.snippetMaxChars),
|
||||
source: row.source,
|
||||
|
||||
@@ -345,7 +345,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
return [];
|
||||
}
|
||||
|
||||
const fullQueryResults = await this.searchKeyword(cleaned, candidates).catch(() => []);
|
||||
const fullQueryResults = await this.searchKeyword(cleaned, candidates, {
|
||||
boostFallbackRanking: true,
|
||||
}).catch(() => []);
|
||||
const resultSets =
|
||||
fullQueryResults.length > 0
|
||||
? [fullQueryResults]
|
||||
@@ -358,7 +360,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
});
|
||||
const searchTerms = keywords.length > 0 ? keywords : [cleaned];
|
||||
return searchTerms.map((term) =>
|
||||
this.searchKeyword(term, candidates).catch(() => []),
|
||||
this.searchKeyword(term, candidates, { boostFallbackRanking: true }).catch(
|
||||
() => [],
|
||||
),
|
||||
);
|
||||
})(),
|
||||
);
|
||||
@@ -495,6 +499,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
private async searchKeyword(
|
||||
query: string,
|
||||
limit: number,
|
||||
options?: { boostFallbackRanking?: boolean },
|
||||
): Promise<Array<MemorySearchResult & { id: string; textScore: number }>> {
|
||||
if (!this.fts.enabled || !this.fts.available) {
|
||||
return [];
|
||||
@@ -513,6 +518,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
sourceFilter,
|
||||
buildFtsQuery: (raw) => this.buildFtsQuery(raw),
|
||||
bm25RankToScore,
|
||||
boostFallbackRanking: options?.boostFallbackRanking,
|
||||
});
|
||||
return results.map((entry) => entry as MemorySearchResult & { id: string; textScore: number });
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user