improve memory fallback lexical ranking (#65395)

* improve memory fallback lexical ranking * use neutral lexical fallback fixtures * fix(memory-core): keep lexical boosts out of hybrid search --------- Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
2026-04-15 10:51:27 +00:00 · 2026-04-12 23:06:28 +05:30
parent 9259e593e6
commit 42590106ab
4 changed files with 118 additions and 3 deletions
--- a/docs/concepts/memory-search.md
+++ b/docs/concepts/memory-search.md
@@ -67,6 +67,8 @@ flowchart LR

 If only one path is available (no embeddings or no FTS), the other runs alone.

+When embeddings are unavailable, OpenClaw still uses lexical ranking over FTS results instead of falling back to raw exact-match ordering only. That degraded mode boosts chunks with stronger query-term coverage and relevant file paths, which keeps recall useful even without `sqlite-vec` or an embedding provider.
+
 ## Improving search quality

 Two optional features help when you have a large note history:
--- a/extensions/memory-core/src/memory/manager-search.test.ts
+++ b/extensions/memory-core/src/memory/manager-search.test.ts
@@ -25,6 +25,7 @@ describe("searchKeyword trigram fallback", () => {
  async function runSearch(params: {
    rows: Array<{ id: string; path: string; text: string }>;
    query: string;
+    boostFallbackRanking?: boolean;
  }) {
    const db = createTrigramDb();
    try {
@@ -45,6 +46,7 @@ describe("searchKeyword trigram fallback", () => {
        sourceFilter: { sql: "", params: [] },
        buildFtsQuery,
        bm25RankToScore,
+        boostFallbackRanking: params.boostFallbackRanking,
      });
    } finally {
      db.close();
@@ -85,4 +87,64 @@ describe("searchKeyword trigram fallback", () => {
    expect(results.map((row) => row.id)).toEqual(["match"]);
    expect(results[0]?.textScore).toBeGreaterThan(0);
  });
+
+  it("applies fallback lexical boosts without exceeding bounded scores", async () => {
+    const results = await runSearch({
+      rows: [
+        {
+          id: "strong",
+          path: "memory/project-memory-notes.md",
+          text: "Project memory notes covering workspace context and retrieval behavior.",
+        },
+        {
+          id: "weak",
+          path: "memory/notes.md",
+          text: "Project memory context.",
+        },
+      ],
+      query: "project memory context",
+      boostFallbackRanking: true,
+    });
+    expect(results.map((row) => row.id)).toEqual(["weak", "strong"]);
+    const rawResults = await runSearch({
+      rows: [
+        {
+          id: "strong",
+          path: "memory/project-memory-notes.md",
+          text: "Project memory notes covering workspace context and retrieval behavior.",
+        },
+        {
+          id: "weak",
+          path: "memory/notes.md",
+          text: "Project memory context.",
+        },
+      ],
+      query: "project memory context",
+      boostFallbackRanking: false,
+    });
+
+    const boostedById = new Map(results.map((row) => [row.id, row]));
+    const rawById = new Map(rawResults.map((row) => [row.id, row]));
+    expect(rawById.get("strong")?.textScore).toBeLessThan(rawById.get("weak")?.textScore ?? 0);
+    expect(boostedById.get("strong")?.score).toBeGreaterThan(boostedById.get("weak")?.score ?? 0);
+    expect(boostedById.get("strong")?.textScore).toBe(rawById.get("strong")?.textScore);
+    expect(boostedById.get("weak")?.textScore).toBe(rawById.get("weak")?.textScore);
+    expect(boostedById.get("strong")?.score).toBeLessThanOrEqual(1);
+    expect(boostedById.get("weak")?.score).toBeLessThanOrEqual(1);
+  });
+
+  it("does not overweight repeated query tokens in fallback scoring", async () => {
+    const unique = await runSearch({
+      rows: [{ id: "1", path: "memory/project.md", text: "Project memory context." }],
+      query: "project memory context",
+      boostFallbackRanking: true,
+    });
+    const repeated = await runSearch({
+      rows: [{ id: "1", path: "memory/project.md", text: "Project memory context." }],
+      query: "project project project memory context",
+      boostFallbackRanking: true,
+    });
+
+    expect(repeated[0]?.score).toBe(unique[0]?.score);
+  });
 });
--- a/extensions/memory-core/src/memory/manager-search.ts
+++ b/extensions/memory-core/src/memory/manager-search.ts
@@ -22,6 +22,42 @@ export type SearchRowResult = {
  source: SearchSource;
 };

+function normalizeSearchTokens(raw: string): string[] {
+  return (
+    raw
+      .match(FTS_QUERY_TOKEN_RE)
+      ?.map((token) => token.trim().toLowerCase())
+      .filter(Boolean) ?? []
+  );
+}
+
+function scoreFallbackKeywordResult(params: {
+  query: string;
+  path: string;
+  text: string;
+  ftsScore: number;
+}): number {
+  const queryTokens = [...new Set(normalizeSearchTokens(params.query))];
+  if (queryTokens.length === 0) {
+    return params.ftsScore;
+  }
+
+  const textTokens = normalizeSearchTokens(params.text);
+  const textTokenSet = new Set(textTokens);
+  const pathLower = params.path.toLowerCase();
+  const overlap = queryTokens.filter((token) => textTokenSet.has(token)).length;
+  const uniqueQueryOverlap = overlap / Math.max(new Set(queryTokens).size, 1);
+  const density = overlap / Math.max(textTokenSet.size, 1);
+  const pathBoost = queryTokens.reduce(
+    (score, token) => score + (pathLower.includes(token) ? 0.18 : 0),
+    0,
+  );
+  const textLengthBoost = Math.min(params.text.length / 160, 0.18);
+
+  const lexicalBoost = uniqueQueryOverlap * 0.45 + density * 0.2 + pathBoost + textLengthBoost;
+  return Math.min(1, params.ftsScore + lexicalBoost);
+}
+
 function escapeLikePattern(term: string): string {
  return term.replaceAll("\\", "\\\\").replaceAll("%", "\\%").replaceAll("_", "\\_");
 }
@@ -198,6 +234,7 @@ export async function searchKeyword(params: {
  sourceFilter: { sql: string; params: SearchSource[] };
  buildFtsQuery: (raw: string) => string | null;
  bm25RankToScore: (rank: number) => number;
+  boostFallbackRanking?: boolean;
 }): Promise<Array<SearchRowResult & { textScore: number }>> {
  if (params.limit <= 0) {
    return [];
@@ -249,12 +286,20 @@ export async function searchKeyword(params: {

  return rows.map((row) => {
    const textScore = plan.matchQuery ? params.bm25RankToScore(row.rank) : 1;
+    const score = params.boostFallbackRanking
+      ? scoreFallbackKeywordResult({
+          query: params.query,
+          path: row.path,
+          text: row.text,
+          ftsScore: textScore,
+        })
+      : textScore;
    return {
      id: row.id,
      path: row.path,
      startLine: row.start_line,
      endLine: row.end_line,
-      score: textScore,
+      score,
      textScore,
      snippet: truncateUtf16Safe(row.text, params.snippetMaxChars),
      source: row.source,
--- a/extensions/memory-core/src/memory/manager.ts
+++ b/extensions/memory-core/src/memory/manager.ts
@@ -345,7 +345,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
        return [];
      }

-      const fullQueryResults = await this.searchKeyword(cleaned, candidates).catch(() => []);
+      const fullQueryResults = await this.searchKeyword(cleaned, candidates, {
+        boostFallbackRanking: true,
+      }).catch(() => []);
      const resultSets =
        fullQueryResults.length > 0
          ? [fullQueryResults]
@@ -358,7 +360,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
                });
                const searchTerms = keywords.length > 0 ? keywords : [cleaned];
                return searchTerms.map((term) =>
-                  this.searchKeyword(term, candidates).catch(() => []),
+                  this.searchKeyword(term, candidates, { boostFallbackRanking: true }).catch(
+                    () => [],
+                  ),
                );
              })(),
            );
@@ -495,6 +499,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
  private async searchKeyword(
    query: string,
    limit: number,
+    options?: { boostFallbackRanking?: boolean },
  ): Promise<Array<MemorySearchResult & { id: string; textScore: number }>> {
    if (!this.fts.enabled || !this.fts.available) {
      return [];
@@ -513,6 +518,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
      sourceFilter,
      buildFtsQuery: (raw) => this.buildFtsQuery(raw),
      bm25RankToScore,
+      boostFallbackRanking: options?.boostFallbackRanking,
    });
    return results.map((entry) => entry as MemorySearchResult & { id: string; textScore: number });
  }