fix(web-fetch): finalize RFC2544 SSRF policy support

This commit is contained in:
xing-xing-coder
2026-04-09 13:04:11 +08:00
committed by Ayaan Zaidi
parent ce32697250
commit 9ed448088b
9 changed files with 116 additions and 9 deletions

View File

@@ -1,4 +1,4 @@
0a75b57f5dbb0bb1488eacb47111ee22ff42dd3747bfe07bb69c9445d5e55c3e config-baseline.json
ff15bb8b4231fc80174249ae89bcb61439d7adda5ee6be95e4d304680253a59f config-baseline.core.json
2595f0a5abe45b5066e722843d339afdadb6ff8a108893e1a2183cd835f591cc config-baseline.json
3c8455d44a63d495ad295d2c9d76fed7a190b80344dabaa0e78ba433bf2d253b config-baseline.core.json
7f42b22b46c487d64aaac46001ba9d9096cf7bf0b1c263a54d39946303ff5018 config-baseline.channel.json
483d4f3c1d516719870ad6f2aba6779b9950f85471ee77b9994a077a7574a892 config-baseline.plugin.json

View File

@@ -1,10 +1,11 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { LookupFn } from "../../infra/net/ssrf.js";
import * as ssrf from "../../infra/net/ssrf.js";
import { type FetchMock, withFetchPreconnect } from "../../test-utils/fetch-mock.js";
import { makeFetchHeaders } from "./web-fetch.test-harness.js";
import "./web-fetch.test-mocks.js";
const lookupMock = vi.fn();
const resolvePinnedHostname = ssrf.resolvePinnedHostname;
function redirectResponse(location: string): Response {
return {
@@ -32,7 +33,11 @@ function setMockFetch(
return fetchSpy;
}
async function createWebFetchToolForTest(params?: { firecrawlApiKey?: string }) {
async function createWebFetchToolForTest(params?: {
firecrawlApiKey?: string;
ssrfPolicy?: { allowRfc2544BenchmarkRange?: boolean };
cacheTtlMinutes?: number;
}) {
const { createWebFetchTool } = await import("./web-tools.js");
return createWebFetchTool({
config: {
@@ -52,13 +57,14 @@ async function createWebFetchToolForTest(params?: { firecrawlApiKey?: string })
tools: {
web: {
fetch: {
cacheTtlMinutes: 0,
cacheTtlMinutes: params?.cacheTtlMinutes ?? 0,
ssrfPolicy: params?.ssrfPolicy,
...(params?.firecrawlApiKey ? { provider: "firecrawl" } : {}),
},
},
},
},
lookupFn: lookupMock as unknown as LookupFn,
lookupFn: lookupMock,
});
}
@@ -74,13 +80,15 @@ describe("web_fetch SSRF protection", () => {
const priorFetch = global.fetch;
beforeEach(() => {
vi.stubEnv("FIRECRAWL_API_KEY", "");
vi.spyOn(ssrf, "resolvePinnedHostname").mockImplementation((hostname) =>
resolvePinnedHostname(hostname, lookupMock),
);
});
afterEach(() => {
global.fetch = priorFetch;
lookupMock.mockClear();
vi.unstubAllEnvs();
vi.restoreAllMocks();
});
it("blocks localhost hostnames before fetch/firecrawl", async () => {
@@ -147,4 +155,31 @@ describe("web_fetch SSRF protection", () => {
extractor: "raw",
});
});
it("allows RFC2544 benchmark-range URLs only when web_fetch ssrfPolicy opts in", async () => {
const url = "http://198.18.0.153/file";
lookupMock.mockResolvedValue([{ address: "198.18.0.153", family: 4 }]);
const deniedTool = await createWebFetchToolForTest({ cacheTtlMinutes: 1 });
await expectBlockedUrl(deniedTool, url, /private|internal|blocked/i);
const fetchSpy = setMockFetch().mockResolvedValue(textResponse("benchmark ok"));
const allowedTool = await createWebFetchToolForTest({
ssrfPolicy: { allowRfc2544BenchmarkRange: true },
cacheTtlMinutes: 1,
});
const allowed = await allowedTool?.execute?.("call", { url });
expect(allowed?.details).toMatchObject({
status: 200,
extractor: "raw",
});
expect(fetchSpy).toHaveBeenCalledTimes(1);
// A stricter tool instance must still block the same URL instead of reusing
// the permissive-policy cache entry.
const stricterTool = await createWebFetchToolForTest({ cacheTtlMinutes: 1 });
await expectBlockedUrl(stricterTool, url, /private|internal|blocked/i);
});
});

View File

@@ -247,6 +247,9 @@ type WebFetchRuntimeParams = {
cacheTtlMs: number;
userAgent: string;
readabilityEnabled: boolean;
ssrfPolicy?: {
allowRfc2544BenchmarkRange?: boolean;
};
lookupFn?: LookupFn;
resolveProviderFallback: () => ReturnType<typeof resolveWebFetchDefinition>;
};
@@ -360,8 +363,14 @@ async function maybeFetchProviderWebFetchPayload(
}
async function runWebFetch(params: WebFetchRuntimeParams): Promise<Record<string, unknown>> {
const policy =
params.ssrfPolicy?.allowRfc2544BenchmarkRange === true
? { allowRfc2544BenchmarkRange: true }
: undefined;
// Include the effective SSRF policy in the cache key to prevent cross-policy cache bypass
const ssrfPolicySuffix = policy ? `:${JSON.stringify(policy)}` : "";
const cacheKey = normalizeCacheKey(
`fetch:${params.url}:${params.extractMode}:${params.maxChars}`,
`fetch:${params.url}:${params.extractMode}:${params.maxChars}${ssrfPolicySuffix}`,
);
const cached = readCache(FETCH_CACHE, cacheKey);
if (cached) {
@@ -382,12 +391,14 @@ async function runWebFetch(params: WebFetchRuntimeParams): Promise<Record<string
let res: Response;
let release: (() => Promise<void>) | null = null;
let finalUrl = params.url;
try {
const result = await fetchWithWebToolsNetworkGuard({
url: params.url,
maxRedirects: params.maxRedirects,
timeoutSeconds: params.timeoutSeconds,
lookupFn: params.lookupFn,
policy,
init: {
headers: {
Accept: "text/markdown, text/html;q=0.9, */*;q=0.1",
@@ -573,6 +584,7 @@ export function createWebFetchTool(options?: {
return null;
}
const readabilityEnabled = resolveFetchReadabilityEnabled(fetch);
const ssrfPolicy = fetch?.ssrfPolicy;
const userAgent =
(fetch && "userAgent" in fetch && typeof fetch.userAgent === "string" && fetch.userAgent) ||
DEFAULT_FETCH_USER_AGENT;
@@ -617,6 +629,7 @@ export function createWebFetchTool(options?: {
cacheTtlMs: resolveCacheTtlMs(fetch?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
userAgent,
readabilityEnabled,
ssrfPolicy,
lookupFn: options?.lookupFn,
resolveProviderFallback,
});

View File

@@ -7353,6 +7353,21 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
description:
"Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).",
},
ssrfPolicy: {
type: "object",
properties: {
allowRfc2544BenchmarkRange: {
type: "boolean",
title: "Web Fetch Allow RFC 2544 Benchmark Range",
description:
"Allow RFC 2544 benchmark-range IPs (198.18.0.0/15) for fake-IP proxy compatibility such as Clash or Surge.",
},
},
additionalProperties: false,
title: "Web Fetch SSRF Policy",
description:
"Scoped SSRF policy overrides for web_fetch. Keep this narrow and opt in only for known local-network proxy environments.",
},
firecrawl: {
type: "object",
properties: {
@@ -23885,6 +23900,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
help: "Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).",
tags: ["tools"],
},
"tools.web.fetch.ssrfPolicy": {
label: "Web Fetch SSRF Policy",
help: "Scoped SSRF policy overrides for web_fetch. Keep this narrow and opt in only for known local-network proxy environments.",
tags: ["access", "tools"],
},
"tools.web.fetch.ssrfPolicy.allowRfc2544BenchmarkRange": {
label: "Web Fetch Allow RFC 2544 Benchmark Range",
help: "Allow RFC 2544 benchmark-range IPs (198.18.0.0/15) for fake-IP proxy compatibility such as Clash or Surge.",
tags: ["access", "tools"],
},
"gateway.controlUi.basePath": {
label: "Control UI Base Path",
help: "Optional URL prefix where the Control UI is served (e.g. /openclaw).",

View File

@@ -720,6 +720,10 @@ export const FIELD_HELP: Record<string, string> = {
"tools.web.fetch.userAgent": "Override User-Agent header for web_fetch requests.",
"tools.web.fetch.readability":
"Use Readability to extract main content from HTML (fallbacks to basic HTML cleanup).",
"tools.web.fetch.ssrfPolicy":
"Scoped SSRF policy overrides for web_fetch. Keep this narrow and opt in only for known local-network proxy environments.",
"tools.web.fetch.ssrfPolicy.allowRfc2544BenchmarkRange":
"Allow RFC 2544 benchmark-range IPs (198.18.0.0/15) for fake-IP proxy compatibility such as Clash or Surge.",
models:
"Model catalog root for provider definitions, merge/replace behavior, and optional Bedrock discovery integration. Keep provider definitions explicit and validated before relying on production failover paths.",
"models.mode":

View File

@@ -254,6 +254,9 @@ export const FIELD_LABELS: Record<string, string> = {
"tools.web.fetch.maxRedirects": "Web Fetch Max Redirects",
"tools.web.fetch.userAgent": "Web Fetch User-Agent",
"tools.web.fetch.readability": "Web Fetch Readability Extraction",
"tools.web.fetch.ssrfPolicy": "Web Fetch SSRF Policy",
"tools.web.fetch.ssrfPolicy.allowRfc2544BenchmarkRange":
"Web Fetch Allow RFC 2544 Benchmark Range",
"gateway.controlUi.basePath": "Control UI Base Path",
"gateway.controlUi.root": "Control UI Assets Root",
"gateway.controlUi.allowedOrigins": "Control UI Allowed Origins",

View File

@@ -289,6 +289,22 @@ describe("config schema", () => {
expect(parsed?.web?.fetch?.maxResponseBytes).toBe(2_000_000);
});
it("accepts web fetch ssrfPolicy in the runtime zod schema", () => {
const parsed = ToolsSchema.parse({
web: {
fetch: {
ssrfPolicy: {
allowRfc2544BenchmarkRange: true,
},
},
},
});
expect(parsed?.web?.fetch?.ssrfPolicy).toEqual({
allowRfc2544BenchmarkRange: true,
});
});
it("rejects unknown keys inside web fetch firecrawl config", () => {
expect(() =>
ToolsSchema.parse({

View File

@@ -550,6 +550,11 @@ export type ToolsConfig = {
userAgent?: string;
/** Use Readability to extract main content (default: true). */
readability?: boolean;
/** SSRF policy configuration for web_fetch. */
ssrfPolicy?: {
/** Allow RFC 2544 benchmark range IPs (198.18.0.0/15) for fake-IP proxy compatibility (e.g., Clash TUN mode, Surge). */
allowRfc2544BenchmarkRange?: boolean;
};
};
};
media?: MediaToolsConfig;

View File

@@ -330,6 +330,12 @@ export const ToolsWebFetchSchema = z
maxRedirects: z.number().int().nonnegative().optional(),
userAgent: z.string().optional(),
readability: z.boolean().optional(),
ssrfPolicy: z
.object({
allowRfc2544BenchmarkRange: z.boolean().optional(),
})
.strict()
.optional(),
// Keep the legacy Firecrawl fetch shape loadable so existing installs can
// start and then migrate cleanly through doctor.
firecrawl: z