mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-21 05:32:53 +00:00
fix(web-fetch): finalize RFC2544 SSRF policy support
This commit is contained in:
committed by
Ayaan Zaidi
parent
ce32697250
commit
9ed448088b
@@ -1,4 +1,4 @@
|
||||
0a75b57f5dbb0bb1488eacb47111ee22ff42dd3747bfe07bb69c9445d5e55c3e config-baseline.json
|
||||
ff15bb8b4231fc80174249ae89bcb61439d7adda5ee6be95e4d304680253a59f config-baseline.core.json
|
||||
2595f0a5abe45b5066e722843d339afdadb6ff8a108893e1a2183cd835f591cc config-baseline.json
|
||||
3c8455d44a63d495ad295d2c9d76fed7a190b80344dabaa0e78ba433bf2d253b config-baseline.core.json
|
||||
7f42b22b46c487d64aaac46001ba9d9096cf7bf0b1c263a54d39946303ff5018 config-baseline.channel.json
|
||||
483d4f3c1d516719870ad6f2aba6779b9950f85471ee77b9994a077a7574a892 config-baseline.plugin.json
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { LookupFn } from "../../infra/net/ssrf.js";
|
||||
import * as ssrf from "../../infra/net/ssrf.js";
|
||||
import { type FetchMock, withFetchPreconnect } from "../../test-utils/fetch-mock.js";
|
||||
import { makeFetchHeaders } from "./web-fetch.test-harness.js";
|
||||
import "./web-fetch.test-mocks.js";
|
||||
|
||||
const lookupMock = vi.fn();
|
||||
const resolvePinnedHostname = ssrf.resolvePinnedHostname;
|
||||
|
||||
function redirectResponse(location: string): Response {
|
||||
return {
|
||||
@@ -32,7 +33,11 @@ function setMockFetch(
|
||||
return fetchSpy;
|
||||
}
|
||||
|
||||
async function createWebFetchToolForTest(params?: { firecrawlApiKey?: string }) {
|
||||
async function createWebFetchToolForTest(params?: {
|
||||
firecrawlApiKey?: string;
|
||||
ssrfPolicy?: { allowRfc2544BenchmarkRange?: boolean };
|
||||
cacheTtlMinutes?: number;
|
||||
}) {
|
||||
const { createWebFetchTool } = await import("./web-tools.js");
|
||||
return createWebFetchTool({
|
||||
config: {
|
||||
@@ -52,13 +57,14 @@ async function createWebFetchToolForTest(params?: { firecrawlApiKey?: string })
|
||||
tools: {
|
||||
web: {
|
||||
fetch: {
|
||||
cacheTtlMinutes: 0,
|
||||
cacheTtlMinutes: params?.cacheTtlMinutes ?? 0,
|
||||
ssrfPolicy: params?.ssrfPolicy,
|
||||
...(params?.firecrawlApiKey ? { provider: "firecrawl" } : {}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
lookupFn: lookupMock as unknown as LookupFn,
|
||||
lookupFn: lookupMock,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -74,13 +80,15 @@ describe("web_fetch SSRF protection", () => {
|
||||
const priorFetch = global.fetch;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.stubEnv("FIRECRAWL_API_KEY", "");
|
||||
vi.spyOn(ssrf, "resolvePinnedHostname").mockImplementation((hostname) =>
|
||||
resolvePinnedHostname(hostname, lookupMock),
|
||||
);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = priorFetch;
|
||||
lookupMock.mockClear();
|
||||
vi.unstubAllEnvs();
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("blocks localhost hostnames before fetch/firecrawl", async () => {
|
||||
@@ -147,4 +155,31 @@ describe("web_fetch SSRF protection", () => {
|
||||
extractor: "raw",
|
||||
});
|
||||
});
|
||||
|
||||
it("allows RFC2544 benchmark-range URLs only when web_fetch ssrfPolicy opts in", async () => {
|
||||
const url = "http://198.18.0.153/file";
|
||||
lookupMock.mockResolvedValue([{ address: "198.18.0.153", family: 4 }]);
|
||||
|
||||
const deniedTool = await createWebFetchToolForTest({ cacheTtlMinutes: 1 });
|
||||
await expectBlockedUrl(deniedTool, url, /private|internal|blocked/i);
|
||||
|
||||
const fetchSpy = setMockFetch().mockResolvedValue(textResponse("benchmark ok"));
|
||||
const allowedTool = await createWebFetchToolForTest({
|
||||
ssrfPolicy: { allowRfc2544BenchmarkRange: true },
|
||||
cacheTtlMinutes: 1,
|
||||
});
|
||||
|
||||
const allowed = await allowedTool?.execute?.("call", { url });
|
||||
expect(allowed?.details).toMatchObject({
|
||||
status: 200,
|
||||
extractor: "raw",
|
||||
});
|
||||
expect(fetchSpy).toHaveBeenCalledTimes(1);
|
||||
|
||||
// A stricter tool instance must still block the same URL instead of reusing
|
||||
// the permissive-policy cache entry.
|
||||
const stricterTool = await createWebFetchToolForTest({ cacheTtlMinutes: 1 });
|
||||
await expectBlockedUrl(stricterTool, url, /private|internal|blocked/i);
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
@@ -247,6 +247,9 @@ type WebFetchRuntimeParams = {
|
||||
cacheTtlMs: number;
|
||||
userAgent: string;
|
||||
readabilityEnabled: boolean;
|
||||
ssrfPolicy?: {
|
||||
allowRfc2544BenchmarkRange?: boolean;
|
||||
};
|
||||
lookupFn?: LookupFn;
|
||||
resolveProviderFallback: () => ReturnType<typeof resolveWebFetchDefinition>;
|
||||
};
|
||||
@@ -360,8 +363,14 @@ async function maybeFetchProviderWebFetchPayload(
|
||||
}
|
||||
|
||||
async function runWebFetch(params: WebFetchRuntimeParams): Promise<Record<string, unknown>> {
|
||||
const policy =
|
||||
params.ssrfPolicy?.allowRfc2544BenchmarkRange === true
|
||||
? { allowRfc2544BenchmarkRange: true }
|
||||
: undefined;
|
||||
// Include the effective SSRF policy in the cache key to prevent cross-policy cache bypass
|
||||
const ssrfPolicySuffix = policy ? `:${JSON.stringify(policy)}` : "";
|
||||
const cacheKey = normalizeCacheKey(
|
||||
`fetch:${params.url}:${params.extractMode}:${params.maxChars}`,
|
||||
`fetch:${params.url}:${params.extractMode}:${params.maxChars}${ssrfPolicySuffix}`,
|
||||
);
|
||||
const cached = readCache(FETCH_CACHE, cacheKey);
|
||||
if (cached) {
|
||||
@@ -382,12 +391,14 @@ async function runWebFetch(params: WebFetchRuntimeParams): Promise<Record<string
|
||||
let res: Response;
|
||||
let release: (() => Promise<void>) | null = null;
|
||||
let finalUrl = params.url;
|
||||
|
||||
try {
|
||||
const result = await fetchWithWebToolsNetworkGuard({
|
||||
url: params.url,
|
||||
maxRedirects: params.maxRedirects,
|
||||
timeoutSeconds: params.timeoutSeconds,
|
||||
lookupFn: params.lookupFn,
|
||||
policy,
|
||||
init: {
|
||||
headers: {
|
||||
Accept: "text/markdown, text/html;q=0.9, */*;q=0.1",
|
||||
@@ -573,6 +584,7 @@ export function createWebFetchTool(options?: {
|
||||
return null;
|
||||
}
|
||||
const readabilityEnabled = resolveFetchReadabilityEnabled(fetch);
|
||||
const ssrfPolicy = fetch?.ssrfPolicy;
|
||||
const userAgent =
|
||||
(fetch && "userAgent" in fetch && typeof fetch.userAgent === "string" && fetch.userAgent) ||
|
||||
DEFAULT_FETCH_USER_AGENT;
|
||||
@@ -617,6 +629,7 @@ export function createWebFetchTool(options?: {
|
||||
cacheTtlMs: resolveCacheTtlMs(fetch?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
|
||||
userAgent,
|
||||
readabilityEnabled,
|
||||
ssrfPolicy,
|
||||
lookupFn: options?.lookupFn,
|
||||
resolveProviderFallback,
|
||||
});
|
||||
|
||||
@@ -7353,6 +7353,21 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
description:
|
||||
"Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).",
|
||||
},
|
||||
ssrfPolicy: {
|
||||
type: "object",
|
||||
properties: {
|
||||
allowRfc2544BenchmarkRange: {
|
||||
type: "boolean",
|
||||
title: "Web Fetch Allow RFC 2544 Benchmark Range",
|
||||
description:
|
||||
"Allow RFC 2544 benchmark-range IPs (198.18.0.0/15) for fake-IP proxy compatibility such as Clash or Surge.",
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
title: "Web Fetch SSRF Policy",
|
||||
description:
|
||||
"Scoped SSRF policy overrides for web_fetch. Keep this narrow and opt in only for known local-network proxy environments.",
|
||||
},
|
||||
firecrawl: {
|
||||
type: "object",
|
||||
properties: {
|
||||
@@ -23885,6 +23900,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
help: "Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).",
|
||||
tags: ["tools"],
|
||||
},
|
||||
"tools.web.fetch.ssrfPolicy": {
|
||||
label: "Web Fetch SSRF Policy",
|
||||
help: "Scoped SSRF policy overrides for web_fetch. Keep this narrow and opt in only for known local-network proxy environments.",
|
||||
tags: ["access", "tools"],
|
||||
},
|
||||
"tools.web.fetch.ssrfPolicy.allowRfc2544BenchmarkRange": {
|
||||
label: "Web Fetch Allow RFC 2544 Benchmark Range",
|
||||
help: "Allow RFC 2544 benchmark-range IPs (198.18.0.0/15) for fake-IP proxy compatibility such as Clash or Surge.",
|
||||
tags: ["access", "tools"],
|
||||
},
|
||||
"gateway.controlUi.basePath": {
|
||||
label: "Control UI Base Path",
|
||||
help: "Optional URL prefix where the Control UI is served (e.g. /openclaw).",
|
||||
|
||||
@@ -720,6 +720,10 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"tools.web.fetch.userAgent": "Override User-Agent header for web_fetch requests.",
|
||||
"tools.web.fetch.readability":
|
||||
"Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).",
|
||||
"tools.web.fetch.ssrfPolicy":
|
||||
"Scoped SSRF policy overrides for web_fetch. Keep this narrow and opt in only for known local-network proxy environments.",
|
||||
"tools.web.fetch.ssrfPolicy.allowRfc2544BenchmarkRange":
|
||||
"Allow RFC 2544 benchmark-range IPs (198.18.0.0/15) for fake-IP proxy compatibility such as Clash or Surge.",
|
||||
models:
|
||||
"Model catalog root for provider definitions, merge/replace behavior, and optional Bedrock discovery integration. Keep provider definitions explicit and validated before relying on production failover paths.",
|
||||
"models.mode":
|
||||
|
||||
@@ -254,6 +254,9 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"tools.web.fetch.maxRedirects": "Web Fetch Max Redirects",
|
||||
"tools.web.fetch.userAgent": "Web Fetch User-Agent",
|
||||
"tools.web.fetch.readability": "Web Fetch Readability Extraction",
|
||||
"tools.web.fetch.ssrfPolicy": "Web Fetch SSRF Policy",
|
||||
"tools.web.fetch.ssrfPolicy.allowRfc2544BenchmarkRange":
|
||||
"Web Fetch Allow RFC 2544 Benchmark Range",
|
||||
"gateway.controlUi.basePath": "Control UI Base Path",
|
||||
"gateway.controlUi.root": "Control UI Assets Root",
|
||||
"gateway.controlUi.allowedOrigins": "Control UI Allowed Origins",
|
||||
|
||||
@@ -289,6 +289,22 @@ describe("config schema", () => {
|
||||
expect(parsed?.web?.fetch?.maxResponseBytes).toBe(2_000_000);
|
||||
});
|
||||
|
||||
it("accepts web fetch ssrfPolicy in the runtime zod schema", () => {
|
||||
const parsed = ToolsSchema.parse({
|
||||
web: {
|
||||
fetch: {
|
||||
ssrfPolicy: {
|
||||
allowRfc2544BenchmarkRange: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(parsed?.web?.fetch?.ssrfPolicy).toEqual({
|
||||
allowRfc2544BenchmarkRange: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects unknown keys inside web fetch firecrawl config", () => {
|
||||
expect(() =>
|
||||
ToolsSchema.parse({
|
||||
|
||||
@@ -550,6 +550,11 @@ export type ToolsConfig = {
|
||||
userAgent?: string;
|
||||
/** Use Readability to extract main content (default: true). */
|
||||
readability?: boolean;
|
||||
/** SSRF policy configuration for web_fetch. */
|
||||
ssrfPolicy?: {
|
||||
/** Allow RFC 2544 benchmark range IPs (198.18.0.0/15) for fake-IP proxy compatibility (e.g., Clash TUN mode, Surge). */
|
||||
allowRfc2544BenchmarkRange?: boolean;
|
||||
};
|
||||
};
|
||||
};
|
||||
media?: MediaToolsConfig;
|
||||
|
||||
@@ -330,6 +330,12 @@ export const ToolsWebFetchSchema = z
|
||||
maxRedirects: z.number().int().nonnegative().optional(),
|
||||
userAgent: z.string().optional(),
|
||||
readability: z.boolean().optional(),
|
||||
ssrfPolicy: z
|
||||
.object({
|
||||
allowRfc2544BenchmarkRange: z.boolean().optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
// Keep the legacy Firecrawl fetch shape loadable so existing installs can
|
||||
// start and then migrate cleanly through doctor.
|
||||
firecrawl: z
|
||||
|
||||
Reference in New Issue
Block a user