mirror of
https://github.com/moltbot/moltbot.git
synced 2026-03-08 06:54:24 +00:00
fix: local updates for PR #3600
Co-authored-by: kira-ariaki <kira-ariaki@users.noreply.github.com>
This commit is contained in:
committed by
Gustavo Madeira Santana
parent
0fd9d3abd1
commit
a44da67069
@@ -13,6 +13,7 @@ Status: beta.
|
||||
- Branding: update launchd labels, mobile bundle IDs, and logging subsystems to bot.molt (legacy com.clawdbot migrations). Thanks @thewilloftheshadow.
|
||||
- Tools: add per-sender group tool policies and fix precedence. (#1757) Thanks @adam91holt.
|
||||
- Agents: summarize dropped messages during compaction safeguard pruning. (#2509) Thanks @jogi47.
|
||||
- Memory Search: allow extra paths for memory indexing (ignores symlinks). (#3600) Thanks @kira-ariaki.
|
||||
- Skills: add multi-image input support to Nano Banana Pro skill. (#1958) Thanks @tyler6204.
|
||||
- Agents: honor tools.exec.safeBins in exec allowlist checks. (#2281)
|
||||
- Matrix: switch plugin SDK to @vector-im/matrix-bot-sdk.
|
||||
|
||||
@@ -39,3 +39,4 @@ Notes:
|
||||
- `memory status --deep` probes vector + embedding availability.
|
||||
- `memory status --deep --index` runs a reindex if the store is dirty.
|
||||
- `memory index --verbose` prints per-phase details (provider, model, sources, batch activity).
|
||||
- `memory status` includes any extra paths configured via `memorySearch.extraPaths`.
|
||||
|
||||
@@ -75,8 +75,9 @@ For the full compaction lifecycle, see
|
||||
|
||||
## Vector memory search
|
||||
|
||||
Moltbot can build a small vector index over `MEMORY.md` and `memory/*.md` so
|
||||
semantic queries can find related notes even when wording differs.
|
||||
Moltbot can build a small vector index over `MEMORY.md` and `memory/*.md` (plus
|
||||
any extra directories or files you opt in) so semantic queries can find related
|
||||
notes even when wording differs.
|
||||
|
||||
Defaults:
|
||||
- Enabled by default.
|
||||
@@ -96,6 +97,27 @@ embeddings for memory search. For Gemini, use `GEMINI_API_KEY` or
|
||||
`models.providers.google.apiKey`. When using a custom OpenAI-compatible endpoint,
|
||||
set `memorySearch.remote.apiKey` (and optional `memorySearch.remote.headers`).
|
||||
|
||||
### Additional memory paths
|
||||
|
||||
If you want to index Markdown files outside the default workspace layout, add
|
||||
explicit paths:
|
||||
|
||||
```json5
|
||||
agents: {
|
||||
defaults: {
|
||||
memorySearch: {
|
||||
extraPaths: ["../team-docs", "/srv/shared-notes/overview.md"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Notes:
|
||||
- Paths can be absolute or workspace-relative.
|
||||
- Directories are scanned recursively for `.md` files.
|
||||
- Only Markdown files are indexed.
|
||||
- Symlinks are ignored (files or directories).
|
||||
|
||||
### Gemini embeddings (native)
|
||||
|
||||
Set the provider to `gemini` to use the Gemini embeddings API directly:
|
||||
@@ -189,14 +211,14 @@ Local mode:
|
||||
### How the memory tools work
|
||||
|
||||
- `memory_search` semantically searches Markdown chunks (~400 token target, 80-token overlap) from `MEMORY.md` + `memory/**/*.md`. It returns snippet text (capped ~700 chars), file path, line range, score, provider/model, and whether we fell back from local → remote embeddings. No full file payload is returned.
|
||||
- `memory_get` reads a specific memory Markdown file (workspace-relative), optionally from a starting line and for N lines. Paths outside `MEMORY.md` / `memory/` are rejected.
|
||||
- `memory_get` reads a specific memory Markdown file (workspace-relative), optionally from a starting line and for N lines. Paths outside `MEMORY.md` / `memory/` are allowed only when explicitly listed in `memorySearch.extraPaths`.
|
||||
- Both tools are enabled only when `memorySearch.enabled` resolves true for the agent.
|
||||
|
||||
### What gets indexed (and when)
|
||||
|
||||
- File type: Markdown only (`MEMORY.md`, `memory/**/*.md`).
|
||||
- File type: Markdown only (`MEMORY.md`, `memory/**/*.md`, plus any `.md` files under `memorySearch.extraPaths`).
|
||||
- Index storage: per-agent SQLite at `~/.clawdbot/memory/<agentId>.sqlite` (configurable via `agents.defaults.memorySearch.store.path`, supports `{agentId}` token).
|
||||
- Freshness: watcher on `MEMORY.md` + `memory/` marks the index dirty (debounce 1.5s). Sync is scheduled on session start, on search, or on an interval and runs asynchronously. Session transcripts use delta thresholds to trigger background sync.
|
||||
- Freshness: watcher on `MEMORY.md`, `memory/`, and `memorySearch.extraPaths` marks the index dirty (debounce 1.5s). Sync is scheduled on session start, on search, or on an interval and runs asynchronously. Session transcripts use delta thresholds to trigger background sync.
|
||||
- Reindex triggers: the index stores the embedding **provider/model + endpoint fingerprint + chunking params**. If any of those change, Moltbot automatically resets and reindexes the entire store.
|
||||
|
||||
### Hybrid search (BM25 + vector)
|
||||
|
||||
@@ -267,7 +267,8 @@ Save to `~/.clawdbot/moltbot.json` and you can DM the bot from that number.
|
||||
model: "gemini-embedding-001",
|
||||
remote: {
|
||||
apiKey: "${GEMINI_API_KEY}"
|
||||
}
|
||||
},
|
||||
extraPaths: ["../team-docs", "/srv/shared-notes"]
|
||||
},
|
||||
sandbox: {
|
||||
mode: "non-main",
|
||||
|
||||
@@ -82,6 +82,29 @@ describe("memory search config", () => {
|
||||
expect(resolved?.store.vector.extensionPath).toBe("/opt/sqlite-vec.dylib");
|
||||
});
|
||||
|
||||
it("merges extra memory paths from defaults and overrides", () => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
memorySearch: {
|
||||
extraPaths: ["/shared/notes", " docs "],
|
||||
},
|
||||
},
|
||||
list: [
|
||||
{
|
||||
id: "main",
|
||||
default: true,
|
||||
memorySearch: {
|
||||
extraPaths: ["/shared/notes", "../team-notes"],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
const resolved = resolveMemorySearchConfig(cfg, "main");
|
||||
expect(resolved?.extraPaths).toEqual(["/shared/notes", "docs", "../team-notes"]);
|
||||
});
|
||||
|
||||
it("includes batch defaults for openai without remote overrides", () => {
|
||||
const cfg = {
|
||||
agents: {
|
||||
|
||||
@@ -9,7 +9,7 @@ import { resolveAgentConfig } from "./agent-scope.js";
|
||||
export type ResolvedMemorySearchConfig = {
|
||||
enabled: boolean;
|
||||
sources: Array<"memory" | "sessions">;
|
||||
paths: string[];
|
||||
extraPaths: string[];
|
||||
provider: "openai" | "local" | "gemini" | "auto";
|
||||
remote?: {
|
||||
baseUrl?: string;
|
||||
@@ -163,9 +163,10 @@ function mergeConfig(
|
||||
modelCacheDir: overrides?.local?.modelCacheDir ?? defaults?.local?.modelCacheDir,
|
||||
};
|
||||
const sources = normalizeSources(overrides?.sources ?? defaults?.sources, sessionMemory);
|
||||
// Merge paths from defaults and overrides (both arrays combined, deduped)
|
||||
const pathsSet = new Set<string>([...(defaults?.paths ?? []), ...(overrides?.paths ?? [])]);
|
||||
const paths = Array.from(pathsSet);
|
||||
const rawPaths = [...(defaults?.extraPaths ?? []), ...(overrides?.extraPaths ?? [])]
|
||||
.map((value) => value.trim())
|
||||
.filter(Boolean);
|
||||
const extraPaths = Array.from(new Set(rawPaths));
|
||||
const vector = {
|
||||
enabled: overrides?.store?.vector?.enabled ?? defaults?.store?.vector?.enabled ?? true,
|
||||
extensionPath:
|
||||
@@ -240,7 +241,7 @@ function mergeConfig(
|
||||
return {
|
||||
enabled,
|
||||
sources,
|
||||
paths,
|
||||
extraPaths,
|
||||
provider,
|
||||
remote,
|
||||
experimental: {
|
||||
|
||||
@@ -83,7 +83,7 @@ export function createMemoryGetTool(options: {
|
||||
label: "Memory Get",
|
||||
name: "memory_get",
|
||||
description:
|
||||
"Safe snippet read from MEMORY.md or memory/*.md with optional from/lines; use after memory_search to pull only the needed lines and keep context small.",
|
||||
"Safe snippet read from MEMORY.md, memory/*.md, or configured memorySearch.extraPaths with optional from/lines; use after memory_search to pull only the needed lines and keep context small.",
|
||||
parameters: MemoryGetSchema,
|
||||
execute: async (_toolCallId, params) => {
|
||||
const relPath = readStringParam(params, "path", { required: true });
|
||||
|
||||
@@ -12,7 +12,7 @@ import { setVerbose } from "../globals.js";
|
||||
import { withProgress, withProgressTotals } from "./progress.js";
|
||||
import { formatErrorMessage, withManager } from "./cli-utils.js";
|
||||
import { getMemorySearchManager, type MemorySearchManagerResult } from "../memory/index.js";
|
||||
import { listMemoryFiles } from "../memory/internal.js";
|
||||
import { listMemoryFiles, normalizeExtraMemoryPaths } from "../memory/internal.js";
|
||||
import { defaultRuntime } from "../runtime.js";
|
||||
import { formatDocsLink } from "../terminal/links.js";
|
||||
import { colorize, isRich, theme } from "../terminal/theme.js";
|
||||
@@ -74,6 +74,10 @@ function resolveAgentIds(cfg: ReturnType<typeof loadConfig>, agent?: string): st
|
||||
return [resolveDefaultAgentId(cfg)];
|
||||
}
|
||||
|
||||
function formatExtraPaths(workspaceDir: string, extraPaths: string[]): string[] {
|
||||
return normalizeExtraMemoryPaths(workspaceDir, extraPaths).map((entry) => shortenHomePath(entry));
|
||||
}
|
||||
|
||||
async function checkReadableFile(pathname: string): Promise<{ exists: boolean; issue?: string }> {
|
||||
try {
|
||||
await fs.access(pathname, fsSync.constants.R_OK);
|
||||
@@ -110,7 +114,10 @@ async function scanSessionFiles(agentId: string): Promise<SourceScan> {
|
||||
}
|
||||
}
|
||||
|
||||
async function scanMemoryFiles(workspaceDir: string): Promise<SourceScan> {
|
||||
async function scanMemoryFiles(
|
||||
workspaceDir: string,
|
||||
extraPaths: string[] = [],
|
||||
): Promise<SourceScan> {
|
||||
const issues: string[] = [];
|
||||
const memoryFile = path.join(workspaceDir, "MEMORY.md");
|
||||
const altMemoryFile = path.join(workspaceDir, "memory.md");
|
||||
@@ -121,6 +128,25 @@ async function scanMemoryFiles(workspaceDir: string): Promise<SourceScan> {
|
||||
if (primary.issue) issues.push(primary.issue);
|
||||
if (alt.issue) issues.push(alt.issue);
|
||||
|
||||
const resolvedExtraPaths = normalizeExtraMemoryPaths(workspaceDir, extraPaths);
|
||||
for (const extraPath of resolvedExtraPaths) {
|
||||
try {
|
||||
const stat = await fs.lstat(extraPath);
|
||||
if (stat.isSymbolicLink()) continue;
|
||||
const extraCheck = await checkReadableFile(extraPath);
|
||||
if (extraCheck.issue) issues.push(extraCheck.issue);
|
||||
} catch (err) {
|
||||
const code = (err as NodeJS.ErrnoException).code;
|
||||
if (code === "ENOENT") {
|
||||
issues.push(`additional memory path missing (${shortenHomePath(extraPath)})`);
|
||||
} else {
|
||||
issues.push(
|
||||
`additional memory path not accessible (${shortenHomePath(extraPath)}): ${code ?? "error"}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let dirReadable: boolean | null = null;
|
||||
try {
|
||||
await fs.access(memoryDir, fsSync.constants.R_OK);
|
||||
@@ -141,7 +167,7 @@ async function scanMemoryFiles(workspaceDir: string): Promise<SourceScan> {
|
||||
let listed: string[] = [];
|
||||
let listedOk = false;
|
||||
try {
|
||||
listed = await listMemoryFiles(workspaceDir);
|
||||
listed = await listMemoryFiles(workspaceDir, resolvedExtraPaths);
|
||||
listedOk = true;
|
||||
} catch (err) {
|
||||
const code = (err as NodeJS.ErrnoException).code;
|
||||
@@ -176,11 +202,13 @@ async function scanMemorySources(params: {
|
||||
workspaceDir: string;
|
||||
agentId: string;
|
||||
sources: MemorySourceName[];
|
||||
extraPaths?: string[];
|
||||
}): Promise<MemorySourceScan> {
|
||||
const scans: SourceScan[] = [];
|
||||
const extraPaths = params.extraPaths ?? [];
|
||||
for (const source of params.sources) {
|
||||
if (source === "memory") {
|
||||
scans.push(await scanMemoryFiles(params.workspaceDir));
|
||||
scans.push(await scanMemoryFiles(params.workspaceDir, extraPaths));
|
||||
}
|
||||
if (source === "sessions") {
|
||||
scans.push(await scanSessionFiles(params.agentId));
|
||||
@@ -268,6 +296,7 @@ export async function runMemoryStatus(opts: MemoryCommandOptions) {
|
||||
workspaceDir: status.workspaceDir,
|
||||
agentId,
|
||||
sources,
|
||||
extraPaths: status.extraPaths,
|
||||
});
|
||||
allResults.push({ agentId, status, embeddingProbe, indexError, scan });
|
||||
},
|
||||
@@ -299,6 +328,7 @@ export async function runMemoryStatus(opts: MemoryCommandOptions) {
|
||||
const line = indexError ? `Memory index failed: ${indexError}` : "Memory index complete.";
|
||||
defaultRuntime.log(line);
|
||||
}
|
||||
const extraPaths = formatExtraPaths(status.workspaceDir, status.extraPaths ?? []);
|
||||
const lines = [
|
||||
`${heading("Memory Search")} ${muted(`(${agentId})`)}`,
|
||||
`${label("Provider")} ${info(status.provider)} ${muted(
|
||||
@@ -306,6 +336,7 @@ export async function runMemoryStatus(opts: MemoryCommandOptions) {
|
||||
)}`,
|
||||
`${label("Model")} ${info(status.model)}`,
|
||||
status.sources?.length ? `${label("Sources")} ${info(status.sources.join(", "))}` : null,
|
||||
extraPaths.length ? `${label("Extra paths")} ${info(extraPaths.join(", "))}` : null,
|
||||
`${label("Indexed")} ${success(indexedLabel)}`,
|
||||
`${label("Dirty")} ${status.dirty ? warn("yes") : muted("no")}`,
|
||||
`${label("Store")} ${info(shortenHomePath(status.dbPath))}`,
|
||||
@@ -469,6 +500,7 @@ export function registerMemoryCli(program: Command) {
|
||||
const sourceLabels = status.sources.map((source) =>
|
||||
formatSourceLabel(source, status.workspaceDir, agentId),
|
||||
);
|
||||
const extraPaths = formatExtraPaths(status.workspaceDir, status.extraPaths ?? []);
|
||||
const lines = [
|
||||
`${heading("Memory Index")} ${muted(`(${agentId})`)}`,
|
||||
`${label("Provider")} ${info(status.provider)} ${muted(
|
||||
@@ -478,6 +510,9 @@ export function registerMemoryCli(program: Command) {
|
||||
sourceLabels.length
|
||||
? `${label("Sources")} ${info(sourceLabels.join(", "))}`
|
||||
: null,
|
||||
extraPaths.length
|
||||
? `${label("Extra paths")} ${info(extraPaths.join(", "))}`
|
||||
: null,
|
||||
].filter(Boolean) as string[];
|
||||
if (status.fallback) {
|
||||
lines.push(`${label("Fallback")} ${warn(status.fallback.from)}`);
|
||||
|
||||
@@ -222,7 +222,7 @@ const FIELD_LABELS: Record<string, string> = {
|
||||
"agents.defaults.memorySearch": "Memory Search",
|
||||
"agents.defaults.memorySearch.enabled": "Enable Memory Search",
|
||||
"agents.defaults.memorySearch.sources": "Memory Search Sources",
|
||||
"agents.defaults.memorySearch.paths": "Additional Memory Paths",
|
||||
"agents.defaults.memorySearch.extraPaths": "Extra Memory Paths",
|
||||
"agents.defaults.memorySearch.experimental.sessionMemory":
|
||||
"Memory Search Session Index (Experimental)",
|
||||
"agents.defaults.memorySearch.provider": "Memory Search Provider",
|
||||
@@ -500,8 +500,8 @@ const FIELD_HELP: Record<string, string> = {
|
||||
"Vector search over MEMORY.md and memory/*.md (per-agent overrides supported).",
|
||||
"agents.defaults.memorySearch.sources":
|
||||
'Sources to index for memory search (default: ["memory"]; add "sessions" to include session transcripts).',
|
||||
"agents.defaults.memorySearch.paths":
|
||||
"Additional paths to include in memory search (directories or .md files; relative paths resolved from workspace).",
|
||||
"agents.defaults.memorySearch.extraPaths":
|
||||
"Extra paths to include in memory search (directories or .md files; relative paths resolved from workspace).",
|
||||
"agents.defaults.memorySearch.experimental.sessionMemory":
|
||||
"Enable experimental session transcript indexing for memory search (default: false).",
|
||||
"agents.defaults.memorySearch.provider": 'Embedding provider ("openai", "gemini", or "local").',
|
||||
|
||||
@@ -226,8 +226,8 @@ export type MemorySearchConfig = {
|
||||
enabled?: boolean;
|
||||
/** Sources to index and search (default: ["memory"]). */
|
||||
sources?: Array<"memory" | "sessions">;
|
||||
/** Additional paths to include in memory search (directories or .md files). */
|
||||
paths?: string[];
|
||||
/** Extra paths to include in memory search (directories or .md files). */
|
||||
extraPaths?: string[];
|
||||
/** Experimental memory search settings. */
|
||||
experimental?: {
|
||||
/** Enable session transcript indexing (experimental, default: false). */
|
||||
|
||||
@@ -304,7 +304,7 @@ export const MemorySearchSchema = z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
sources: z.array(z.union([z.literal("memory"), z.literal("sessions")])).optional(),
|
||||
paths: z.array(z.string()).optional(),
|
||||
extraPaths: z.array(z.string()).optional(),
|
||||
experimental: z
|
||||
.object({
|
||||
sessionMemory: z.boolean().optional(),
|
||||
|
||||
@@ -412,4 +412,52 @@ describe("memory index", () => {
|
||||
manager = result.manager;
|
||||
await expect(result.manager.readFile({ relPath: "NOTES.md" })).rejects.toThrow("path required");
|
||||
});
|
||||
|
||||
it("allows reading from additional memory paths and blocks symlinks", async () => {
|
||||
const extraDir = path.join(workspaceDir, "extra");
|
||||
await fs.mkdir(extraDir, { recursive: true });
|
||||
await fs.writeFile(path.join(extraDir, "extra.md"), "Extra content.");
|
||||
|
||||
const cfg = {
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: indexPath },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: true },
|
||||
extraPaths: [extraDir],
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
};
|
||||
const result = await getMemorySearchManager({ cfg, agentId: "main" });
|
||||
expect(result.manager).not.toBeNull();
|
||||
if (!result.manager) throw new Error("manager missing");
|
||||
manager = result.manager;
|
||||
await expect(result.manager.readFile({ relPath: "extra/extra.md" })).resolves.toEqual({
|
||||
path: "extra/extra.md",
|
||||
text: "Extra content.",
|
||||
});
|
||||
|
||||
const linkPath = path.join(extraDir, "linked.md");
|
||||
let symlinkOk = true;
|
||||
try {
|
||||
await fs.symlink(path.join(extraDir, "extra.md"), linkPath, "file");
|
||||
} catch (err) {
|
||||
const code = (err as NodeJS.ErrnoException).code;
|
||||
if (code === "EPERM" || code === "EACCES") {
|
||||
symlinkOk = false;
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
if (symlinkOk) {
|
||||
await expect(result.manager.readFile({ relPath: "extra/linked.md" })).rejects.toThrow(
|
||||
"path required",
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -4,7 +4,22 @@ import path from "node:path";
|
||||
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
|
||||
import { chunkMarkdown, listMemoryFiles } from "./internal.js";
|
||||
import { chunkMarkdown, listMemoryFiles, normalizeExtraMemoryPaths } from "./internal.js";
|
||||
|
||||
describe("normalizeExtraMemoryPaths", () => {
|
||||
it("trims, resolves, and dedupes paths", () => {
|
||||
const workspaceDir = path.join(os.tmpdir(), "memory-test-workspace");
|
||||
const absPath = path.resolve(path.sep, "shared-notes");
|
||||
const result = normalizeExtraMemoryPaths(workspaceDir, [
|
||||
" notes ",
|
||||
"./notes",
|
||||
absPath,
|
||||
absPath,
|
||||
"",
|
||||
]);
|
||||
expect(result).toEqual([path.resolve(workspaceDir, "notes"), absPath]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("listMemoryFiles", () => {
|
||||
let tmpDir: string;
|
||||
@@ -18,10 +33,7 @@ describe("listMemoryFiles", () => {
|
||||
});
|
||||
|
||||
it("includes files from additional paths (directory)", async () => {
|
||||
// Create default memory file
|
||||
await fs.writeFile(path.join(tmpDir, "MEMORY.md"), "# Default memory");
|
||||
|
||||
// Create additional directory with files
|
||||
const extraDir = path.join(tmpDir, "extra-notes");
|
||||
await fs.mkdir(extraDir, { recursive: true });
|
||||
await fs.writeFile(path.join(extraDir, "note1.md"), "# Note 1");
|
||||
@@ -29,11 +41,11 @@ describe("listMemoryFiles", () => {
|
||||
await fs.writeFile(path.join(extraDir, "ignore.txt"), "Not a markdown file");
|
||||
|
||||
const files = await listMemoryFiles(tmpDir, [extraDir]);
|
||||
expect(files).toHaveLength(3); // MEMORY.md + 2 notes
|
||||
expect(files.some((f) => f.endsWith("MEMORY.md"))).toBe(true);
|
||||
expect(files.some((f) => f.endsWith("note1.md"))).toBe(true);
|
||||
expect(files.some((f) => f.endsWith("note2.md"))).toBe(true);
|
||||
expect(files.some((f) => f.endsWith("ignore.txt"))).toBe(false);
|
||||
expect(files).toHaveLength(3);
|
||||
expect(files.some((file) => file.endsWith("MEMORY.md"))).toBe(true);
|
||||
expect(files.some((file) => file.endsWith("note1.md"))).toBe(true);
|
||||
expect(files.some((file) => file.endsWith("note2.md"))).toBe(true);
|
||||
expect(files.some((file) => file.endsWith("ignore.txt"))).toBe(false);
|
||||
});
|
||||
|
||||
it("includes files from additional paths (single file)", async () => {
|
||||
@@ -43,7 +55,7 @@ describe("listMemoryFiles", () => {
|
||||
|
||||
const files = await listMemoryFiles(tmpDir, [singleFile]);
|
||||
expect(files).toHaveLength(2);
|
||||
expect(files.some((f) => f.endsWith("standalone.md"))).toBe(true);
|
||||
expect(files.some((file) => file.endsWith("standalone.md"))).toBe(true);
|
||||
});
|
||||
|
||||
it("handles relative paths in additional paths", async () => {
|
||||
@@ -52,10 +64,9 @@ describe("listMemoryFiles", () => {
|
||||
await fs.mkdir(extraDir, { recursive: true });
|
||||
await fs.writeFile(path.join(extraDir, "nested.md"), "# Nested");
|
||||
|
||||
// Use relative path
|
||||
const files = await listMemoryFiles(tmpDir, ["subdir"]);
|
||||
expect(files).toHaveLength(2);
|
||||
expect(files.some((f) => f.endsWith("nested.md"))).toBe(true);
|
||||
expect(files.some((file) => file.endsWith("nested.md"))).toBe(true);
|
||||
});
|
||||
|
||||
it("ignores non-existent additional paths", async () => {
|
||||
@@ -64,6 +75,42 @@ describe("listMemoryFiles", () => {
|
||||
const files = await listMemoryFiles(tmpDir, ["/does/not/exist"]);
|
||||
expect(files).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("ignores symlinked files and directories", async () => {
|
||||
await fs.writeFile(path.join(tmpDir, "MEMORY.md"), "# Default memory");
|
||||
const extraDir = path.join(tmpDir, "extra");
|
||||
await fs.mkdir(extraDir, { recursive: true });
|
||||
await fs.writeFile(path.join(extraDir, "note.md"), "# Note");
|
||||
|
||||
const targetFile = path.join(tmpDir, "target.md");
|
||||
await fs.writeFile(targetFile, "# Target");
|
||||
const linkFile = path.join(extraDir, "linked.md");
|
||||
|
||||
const targetDir = path.join(tmpDir, "target-dir");
|
||||
await fs.mkdir(targetDir, { recursive: true });
|
||||
await fs.writeFile(path.join(targetDir, "nested.md"), "# Nested");
|
||||
const linkDir = path.join(tmpDir, "linked-dir");
|
||||
|
||||
let symlinksOk = true;
|
||||
try {
|
||||
await fs.symlink(targetFile, linkFile, "file");
|
||||
await fs.symlink(targetDir, linkDir, "dir");
|
||||
} catch (err) {
|
||||
const code = (err as NodeJS.ErrnoException).code;
|
||||
if (code === "EPERM" || code === "EACCES") {
|
||||
symlinksOk = false;
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
const files = await listMemoryFiles(tmpDir, [extraDir, linkDir]);
|
||||
expect(files.some((file) => file.endsWith("note.md"))).toBe(true);
|
||||
if (symlinksOk) {
|
||||
expect(files.some((file) => file.endsWith("linked.md"))).toBe(false);
|
||||
expect(files.some((file) => file.endsWith("nested.md"))).toBe(false);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("chunkMarkdown", () => {
|
||||
|
||||
@@ -30,6 +30,17 @@ export function normalizeRelPath(value: string): string {
|
||||
return trimmed.replace(/\\/g, "/");
|
||||
}
|
||||
|
||||
export function normalizeExtraMemoryPaths(workspaceDir: string, extraPaths?: string[]): string[] {
|
||||
if (!extraPaths?.length) return [];
|
||||
const resolved = extraPaths
|
||||
.map((value) => value.trim())
|
||||
.filter(Boolean)
|
||||
.map((value) =>
|
||||
path.isAbsolute(value) ? path.resolve(value) : path.resolve(workspaceDir, value),
|
||||
);
|
||||
return Array.from(new Set(resolved));
|
||||
}
|
||||
|
||||
export function isMemoryPath(relPath: string): boolean {
|
||||
const normalized = normalizeRelPath(relPath);
|
||||
if (!normalized) return false;
|
||||
@@ -37,19 +48,11 @@ export function isMemoryPath(relPath: string): boolean {
|
||||
return normalized.startsWith("memory/");
|
||||
}
|
||||
|
||||
async function exists(filePath: string): Promise<boolean> {
|
||||
try {
|
||||
await fs.access(filePath);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function walkDir(dir: string, files: string[]) {
|
||||
const entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
const full = path.join(dir, entry.name);
|
||||
if (entry.isSymbolicLink()) continue;
|
||||
if (entry.isDirectory()) {
|
||||
await walkDir(full, files);
|
||||
continue;
|
||||
@@ -62,28 +65,45 @@ async function walkDir(dir: string, files: string[]) {
|
||||
|
||||
export async function listMemoryFiles(
|
||||
workspaceDir: string,
|
||||
additionalPaths?: string[],
|
||||
extraPaths?: string[],
|
||||
): Promise<string[]> {
|
||||
const result: string[] = [];
|
||||
const memoryFile = path.join(workspaceDir, "MEMORY.md");
|
||||
const altMemoryFile = path.join(workspaceDir, "memory.md");
|
||||
if (await exists(memoryFile)) result.push(memoryFile);
|
||||
if (await exists(altMemoryFile)) result.push(altMemoryFile);
|
||||
const memoryDir = path.join(workspaceDir, "memory");
|
||||
if (await exists(memoryDir)) {
|
||||
await walkDir(memoryDir, result);
|
||||
}
|
||||
// Include files from additional explicit paths
|
||||
if (additionalPaths && additionalPaths.length > 0) {
|
||||
for (const p of additionalPaths) {
|
||||
const resolved = path.isAbsolute(p) ? p : path.resolve(workspaceDir, p);
|
||||
if (!(await exists(resolved))) continue;
|
||||
const stat = await fs.stat(resolved);
|
||||
if (stat.isDirectory()) {
|
||||
await walkDir(resolved, result);
|
||||
} else if (stat.isFile() && resolved.endsWith(".md")) {
|
||||
result.push(resolved);
|
||||
}
|
||||
|
||||
const addMarkdownFile = async (absPath: string) => {
|
||||
try {
|
||||
const stat = await fs.lstat(absPath);
|
||||
if (stat.isSymbolicLink() || !stat.isFile()) return;
|
||||
if (!absPath.endsWith(".md")) return;
|
||||
result.push(absPath);
|
||||
} catch {}
|
||||
};
|
||||
|
||||
await addMarkdownFile(memoryFile);
|
||||
await addMarkdownFile(altMemoryFile);
|
||||
try {
|
||||
const dirStat = await fs.lstat(memoryDir);
|
||||
if (!dirStat.isSymbolicLink() && dirStat.isDirectory()) {
|
||||
await walkDir(memoryDir, result);
|
||||
}
|
||||
} catch {}
|
||||
|
||||
const normalizedExtraPaths = normalizeExtraMemoryPaths(workspaceDir, extraPaths);
|
||||
if (normalizedExtraPaths.length > 0) {
|
||||
for (const inputPath of normalizedExtraPaths) {
|
||||
try {
|
||||
const stat = await fs.lstat(inputPath);
|
||||
if (stat.isSymbolicLink()) continue;
|
||||
if (stat.isDirectory()) {
|
||||
await walkDir(inputPath, result);
|
||||
continue;
|
||||
}
|
||||
if (stat.isFile() && inputPath.endsWith(".md")) {
|
||||
result.push(inputPath);
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
if (result.length <= 1) return result;
|
||||
|
||||
@@ -13,6 +13,7 @@ export function computeMemoryManagerCacheKey(params: {
|
||||
JSON.stringify({
|
||||
enabled: settings.enabled,
|
||||
sources: [...settings.sources].sort((a, b) => a.localeCompare(b)),
|
||||
extraPaths: [...settings.extraPaths].sort((a, b) => a.localeCompare(b)),
|
||||
provider: settings.provider,
|
||||
model: settings.model,
|
||||
fallback: settings.fallback,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import fsSync from "node:fs";
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
@@ -35,9 +36,9 @@ import {
|
||||
hashText,
|
||||
isMemoryPath,
|
||||
listMemoryFiles,
|
||||
normalizeExtraMemoryPaths,
|
||||
type MemoryChunk,
|
||||
type MemoryFileEntry,
|
||||
normalizeRelPath,
|
||||
parseEmbedding,
|
||||
} from "./internal.js";
|
||||
import { bm25RankToScore, buildFtsQuery, mergeHybridResults } from "./hybrid.js";
|
||||
@@ -396,13 +397,52 @@ export class MemoryIndexManager {
|
||||
from?: number;
|
||||
lines?: number;
|
||||
}): Promise<{ text: string; path: string }> {
|
||||
const relPath = normalizeRelPath(params.relPath);
|
||||
if (!relPath || !isMemoryPath(relPath)) {
|
||||
const rawPath = params.relPath.trim();
|
||||
if (!rawPath) {
|
||||
throw new Error("path required");
|
||||
}
|
||||
const absPath = path.resolve(this.workspaceDir, relPath);
|
||||
if (!absPath.startsWith(this.workspaceDir)) {
|
||||
throw new Error("path escapes workspace");
|
||||
const absPath = path.isAbsolute(rawPath)
|
||||
? path.resolve(rawPath)
|
||||
: path.resolve(this.workspaceDir, rawPath);
|
||||
const relPath = path.relative(this.workspaceDir, absPath).replace(/\\/g, "/");
|
||||
const inWorkspace =
|
||||
relPath.length > 0 && !relPath.startsWith("..") && !path.isAbsolute(relPath);
|
||||
const allowedWorkspace = inWorkspace && isMemoryPath(relPath);
|
||||
let allowedAdditional = false;
|
||||
if (!allowedWorkspace && this.settings.extraPaths.length > 0) {
|
||||
const additionalPaths = normalizeExtraMemoryPaths(
|
||||
this.workspaceDir,
|
||||
this.settings.extraPaths,
|
||||
);
|
||||
for (const additionalPath of additionalPaths) {
|
||||
try {
|
||||
const stat = await fs.lstat(additionalPath);
|
||||
if (stat.isSymbolicLink()) continue;
|
||||
if (stat.isDirectory()) {
|
||||
if (absPath === additionalPath || absPath.startsWith(`${additionalPath}${path.sep}`)) {
|
||||
allowedAdditional = true;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (stat.isFile()) {
|
||||
if (absPath === additionalPath && absPath.endsWith(".md")) {
|
||||
allowedAdditional = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
if (!allowedWorkspace && !allowedAdditional) {
|
||||
throw new Error("path required");
|
||||
}
|
||||
if (!absPath.endsWith(".md")) {
|
||||
throw new Error("path required");
|
||||
}
|
||||
const stat = await fs.lstat(absPath);
|
||||
if (stat.isSymbolicLink() || !stat.isFile()) {
|
||||
throw new Error("path required");
|
||||
}
|
||||
const content = await fs.readFile(absPath, "utf-8");
|
||||
if (!params.from && !params.lines) {
|
||||
@@ -425,6 +465,7 @@ export class MemoryIndexManager {
|
||||
model: string;
|
||||
requestedProvider: string;
|
||||
sources: MemorySource[];
|
||||
extraPaths: string[];
|
||||
sourceCounts: Array<{ source: MemorySource; files: number; chunks: number }>;
|
||||
cache?: { enabled: boolean; entries?: number; maxEntries?: number };
|
||||
fts?: { enabled: boolean; available: boolean; error?: string };
|
||||
@@ -498,6 +539,7 @@ export class MemoryIndexManager {
|
||||
model: this.provider.model,
|
||||
requestedProvider: this.requestedProvider,
|
||||
sources: Array.from(this.sources),
|
||||
extraPaths: this.settings.extraPaths,
|
||||
sourceCounts,
|
||||
cache: this.cache.enabled
|
||||
? {
|
||||
@@ -769,11 +811,23 @@ export class MemoryIndexManager {
|
||||
|
||||
private ensureWatcher() {
|
||||
if (!this.sources.has("memory") || !this.settings.sync.watch || this.watcher) return;
|
||||
const watchPaths = [
|
||||
const additionalPaths = normalizeExtraMemoryPaths(this.workspaceDir, this.settings.extraPaths)
|
||||
.map((entry) => {
|
||||
try {
|
||||
const stat = fsSync.lstatSync(entry);
|
||||
return stat.isSymbolicLink() ? null : entry;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter((entry): entry is string => Boolean(entry));
|
||||
const watchPaths = new Set<string>([
|
||||
path.join(this.workspaceDir, "MEMORY.md"),
|
||||
path.join(this.workspaceDir, "memory.md"),
|
||||
path.join(this.workspaceDir, "memory"),
|
||||
];
|
||||
this.watcher = chokidar.watch(watchPaths, {
|
||||
...additionalPaths,
|
||||
]);
|
||||
this.watcher = chokidar.watch(Array.from(watchPaths), {
|
||||
ignoreInitial: true,
|
||||
awaitWriteFinish: {
|
||||
stabilityThreshold: this.settings.sync.watchDebounceMs,
|
||||
@@ -975,7 +1029,7 @@ export class MemoryIndexManager {
|
||||
needsFullReindex: boolean;
|
||||
progress?: MemorySyncProgressState;
|
||||
}) {
|
||||
const files = await listMemoryFiles(this.workspaceDir, this.settings.paths);
|
||||
const files = await listMemoryFiles(this.workspaceDir, this.settings.extraPaths);
|
||||
const fileEntries = await Promise.all(
|
||||
files.map(async (file) => buildFileEntry(file, this.workspaceDir)),
|
||||
);
|
||||
|
||||
@@ -14,7 +14,7 @@ type ProgressState = {
|
||||
|
||||
export async function syncMemoryFiles(params: {
|
||||
workspaceDir: string;
|
||||
additionalPaths?: string[];
|
||||
extraPaths?: string[];
|
||||
db: DatabaseSync;
|
||||
needsFullReindex: boolean;
|
||||
progress?: ProgressState;
|
||||
@@ -28,7 +28,7 @@ export async function syncMemoryFiles(params: {
|
||||
ftsAvailable: boolean;
|
||||
model: string;
|
||||
}) {
|
||||
const files = await listMemoryFiles(params.workspaceDir, params.additionalPaths);
|
||||
const files = await listMemoryFiles(params.workspaceDir, params.extraPaths);
|
||||
const fileEntries = await Promise.all(
|
||||
files.map(async (file) => buildFileEntry(file, params.workspaceDir)),
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user