fix: unify web tool proxy path (#27430) (thanks @kevinWangSheng)

This commit is contained in:
Peter Steinberger
2026-02-26 12:32:30 +01:00
parent d8e2030d47
commit 46003e85bf
6 changed files with 298 additions and 191 deletions

View File

@@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai
- Telegram/Inline buttons: allow callback-query button handling in groups (including `/models` follow-up buttons) when group policy authorizes the sender, by removing the redundant callback allowlist gate that blocked open-policy groups. (#27343) Thanks @GodsBoy.
- Telegram/Streaming preview: when finalizing without an existing preview message, prime pending preview text with final answer before stop-flush so users do not briefly see stale 1-2 word fragments (for example `no` before `no problem`). (#27449) Thanks @emanuelst for the original fix direction in #19673.
- Daemon/macOS launchd: forward proxy env vars into supervised service environments, keep LaunchAgent `KeepAlive=true` semantics, and harden restart sequencing to `print -> bootout -> wait old pid exit -> bootstrap -> kickstart`. (#27276) thanks @frankekn.
- Web tools/Proxy: route `web_search` provider HTTP calls (Brave, Perplexity, xAI, Gemini, Kimi), redirect resolution, and `web_fetch` through a shared proxy-aware SSRF guard path so gateway installs behind `HTTP_PROXY`/`HTTPS_PROXY`/`ALL_PROXY` no longer fail with transport `fetch failed` errors. (#27430) thanks @kevinWangSheng.
- Android/Node invoke: remove native gateway WebSocket `Origin` header to avoid false origin rejections, unify invoke command registry/policy/error parsing paths, and keep command availability checks centralized to reduce dispatcher/advertisement drift. (#27257) Thanks @obviyus.
- CI/Windows: shard the Windows `checks-windows` test lane into two matrix jobs and honor explicit shard index overrides in `scripts/test-parallel.mjs` to reduce CI critical-path wall time. (#27234) Thanks @joshavant.
- Agents/Models config: preserve agent-level provider `apiKey` and `baseUrl` during merge-mode `models.json` updates when agent values are present. (#27293) thanks @Sid-Qin.

View File

@@ -527,6 +527,7 @@ async function runWebFetch(params: WebFetchRuntimeParams): Promise<Record<string
url: params.url,
maxRedirects: params.maxRedirects,
timeoutMs: params.timeoutSeconds * 1000,
proxy: "env",
init: {
headers: {
Accept: "text/markdown, text/html;q=0.9, */*;q=0.1",

View File

@@ -16,7 +16,6 @@ import {
readResponseText,
resolveCacheTtlMs,
resolveTimeoutSeconds,
withTimeout,
writeCache,
} from "./web-shared.js";
@@ -600,6 +599,21 @@ function resolveGeminiModel(gemini?: GeminiConfig): string {
return fromConfig || DEFAULT_GEMINI_MODEL;
}
async function fetchTrustedWebSearchEndpoint(params: {
url: string;
timeoutSeconds: number;
init: RequestInit;
}): Promise<{ response: Response; release: () => Promise<void> }> {
const { response, release } = await fetchWithSsrFGuard({
url: params.url,
init: params.init,
timeoutMs: params.timeoutSeconds * 1000,
policy: TRUSTED_NETWORK_SSRF_POLICY,
proxy: "env",
});
return { response, release };
}
async function runGeminiSearch(params: {
query: string;
apiKey: string;
@@ -608,75 +622,81 @@ async function runGeminiSearch(params: {
}): Promise<{ content: string; citations: Array<{ url: string; title?: string }> }> {
const endpoint = `${GEMINI_API_BASE}/models/${params.model}:generateContent`;
const res = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
"x-goog-api-key": params.apiKey,
},
body: JSON.stringify({
contents: [
{
parts: [{ text: params.query }],
},
],
tools: [{ google_search: {} }],
}),
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
});
if (!res.ok) {
const detailResult = await readResponseText(res, { maxBytes: 64_000 });
// Strip API key from any error detail to prevent accidental key leakage in logs
const safeDetail = (detailResult.text || res.statusText).replace(/key=[^&\s]+/gi, "key=***");
throw new Error(`Gemini API error (${res.status}): ${safeDetail}`);
}
let data: GeminiGroundingResponse;
try {
data = (await res.json()) as GeminiGroundingResponse;
} catch (err) {
const safeError = String(err).replace(/key=[^&\s]+/gi, "key=***");
throw new Error(`Gemini API returned invalid JSON: ${safeError}`, { cause: err });
}
if (data.error) {
const rawMsg = data.error.message || data.error.status || "unknown";
const safeMsg = rawMsg.replace(/key=[^&\s]+/gi, "key=***");
throw new Error(`Gemini API error (${data.error.code}): ${safeMsg}`);
}
const candidate = data.candidates?.[0];
const content =
candidate?.content?.parts
?.map((p) => p.text)
.filter(Boolean)
.join("\n") ?? "No response";
const groundingChunks = candidate?.groundingMetadata?.groundingChunks ?? [];
const rawCitations = groundingChunks
.filter((chunk) => chunk.web?.uri)
.map((chunk) => ({
url: chunk.web!.uri!,
title: chunk.web?.title || undefined,
}));
// Resolve Google grounding redirect URLs to direct URLs with concurrency cap.
// Gemini typically returns 3-8 citations; cap at 10 concurrent to be safe.
const MAX_CONCURRENT_REDIRECTS = 10;
const citations: Array<{ url: string; title?: string }> = [];
for (let i = 0; i < rawCitations.length; i += MAX_CONCURRENT_REDIRECTS) {
const batch = rawCitations.slice(i, i + MAX_CONCURRENT_REDIRECTS);
const resolved = await Promise.all(
batch.map(async (citation) => {
const resolvedUrl = await resolveRedirectUrl(citation.url);
return { ...citation, url: resolvedUrl };
const { response: res, release } = await fetchTrustedWebSearchEndpoint({
url: endpoint,
timeoutSeconds: params.timeoutSeconds,
init: {
method: "POST",
headers: {
"Content-Type": "application/json",
"x-goog-api-key": params.apiKey,
},
body: JSON.stringify({
contents: [
{
parts: [{ text: params.query }],
},
],
tools: [{ google_search: {} }],
}),
);
citations.push(...resolved);
}
},
});
try {
if (!res.ok) {
const detailResult = await readResponseText(res, { maxBytes: 64_000 });
// Strip API key from any error detail to prevent accidental key leakage in logs
const safeDetail = (detailResult.text || res.statusText).replace(/key=[^&\s]+/gi, "key=***");
throw new Error(`Gemini API error (${res.status}): ${safeDetail}`);
}
return { content, citations };
let data: GeminiGroundingResponse;
try {
data = (await res.json()) as GeminiGroundingResponse;
} catch (err) {
const safeError = String(err).replace(/key=[^&\s]+/gi, "key=***");
throw new Error(`Gemini API returned invalid JSON: ${safeError}`, { cause: err });
}
if (data.error) {
const rawMsg = data.error.message || data.error.status || "unknown";
const safeMsg = rawMsg.replace(/key=[^&\s]+/gi, "key=***");
throw new Error(`Gemini API error (${data.error.code}): ${safeMsg}`);
}
const candidate = data.candidates?.[0];
const content =
candidate?.content?.parts
?.map((p) => p.text)
.filter(Boolean)
.join("\n") ?? "No response";
const groundingChunks = candidate?.groundingMetadata?.groundingChunks ?? [];
const rawCitations = groundingChunks
.filter((chunk) => chunk.web?.uri)
.map((chunk) => ({
url: chunk.web!.uri!,
title: chunk.web?.title || undefined,
}));
// Resolve Google grounding redirect URLs to direct URLs with concurrency cap.
// Gemini typically returns 3-8 citations; cap at 10 concurrent to be safe.
const MAX_CONCURRENT_REDIRECTS = 10;
const citations: Array<{ url: string; title?: string }> = [];
for (let i = 0; i < rawCitations.length; i += MAX_CONCURRENT_REDIRECTS) {
const batch = rawCitations.slice(i, i + MAX_CONCURRENT_REDIRECTS);
const resolved = await Promise.all(
batch.map(async (citation) => {
const resolvedUrl = await resolveRedirectUrl(citation.url);
return { ...citation, url: resolvedUrl };
}),
);
citations.push(...resolved);
}
return { content, citations };
} finally {
await release();
}
}
const REDIRECT_TIMEOUT_MS = 5000;
@@ -692,6 +712,7 @@ async function resolveRedirectUrl(url: string): Promise<string> {
init: { method: "HEAD" },
timeoutMs: REDIRECT_TIMEOUT_MS,
policy: TRUSTED_NETWORK_SSRF_POLICY,
proxy: "env",
});
try {
return finalUrl || url;
@@ -871,27 +892,33 @@ async function runPerplexitySearch(params: {
body.search_recency_filter = recencyFilter;
}
const res = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${params.apiKey}`,
"HTTP-Referer": "https://openclaw.ai",
"X-Title": "OpenClaw Web Search",
const { response: res, release } = await fetchTrustedWebSearchEndpoint({
url: endpoint,
timeoutSeconds: params.timeoutSeconds,
init: {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${params.apiKey}`,
"HTTP-Referer": "https://openclaw.ai",
"X-Title": "OpenClaw Web Search",
},
body: JSON.stringify(body),
},
body: JSON.stringify(body),
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
});
try {
if (!res.ok) {
return await throwWebSearchApiError(res, "Perplexity");
}
if (!res.ok) {
return throwWebSearchApiError(res, "Perplexity");
const data = (await res.json()) as PerplexitySearchResponse;
const content = data.choices?.[0]?.message?.content ?? "No response";
const citations = data.citations ?? [];
return { content, citations };
} finally {
await release();
}
const data = (await res.json()) as PerplexitySearchResponse;
const content = data.choices?.[0]?.message?.content ?? "No response";
const citations = data.citations ?? [];
return { content, citations };
}
async function runGrokSearch(params: {
@@ -921,28 +948,34 @@ async function runGrokSearch(params: {
// citations are returned automatically when available — we just parse
// them from the response without requesting them explicitly (#12910).
const res = await fetch(XAI_API_ENDPOINT, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${params.apiKey}`,
const { response: res, release } = await fetchTrustedWebSearchEndpoint({
url: XAI_API_ENDPOINT,
timeoutSeconds: params.timeoutSeconds,
init: {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${params.apiKey}`,
},
body: JSON.stringify(body),
},
body: JSON.stringify(body),
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
});
try {
if (!res.ok) {
return await throwWebSearchApiError(res, "xAI");
}
if (!res.ok) {
return throwWebSearchApiError(res, "xAI");
const data = (await res.json()) as GrokSearchResponse;
const { text: extractedText, annotationCitations } = extractGrokContent(data);
const content = extractedText ?? "No response";
// Prefer top-level citations; fall back to annotation-derived ones
const citations = (data.citations ?? []).length > 0 ? data.citations! : annotationCitations;
const inlineCitations = data.inline_citations;
return { content, citations, inlineCitations };
} finally {
await release();
}
const data = (await res.json()) as GrokSearchResponse;
const { text: extractedText, annotationCitations } = extractGrokContent(data);
const content = extractedText ?? "No response";
// Prefer top-level citations; fall back to annotation-derived ones
const citations = (data.citations ?? []).length > 0 ? data.citations! : annotationCitations;
const inlineCitations = data.inline_citations;
return { content, citations, inlineCitations };
}
function extractKimiMessageText(message: KimiMessage | undefined): string | undefined {
@@ -1014,65 +1047,71 @@ async function runKimiSearch(params: {
const MAX_ROUNDS = 3;
for (let round = 0; round < MAX_ROUNDS; round += 1) {
const res = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${params.apiKey}`,
const { response: res, release } = await fetchTrustedWebSearchEndpoint({
url: endpoint,
timeoutSeconds: params.timeoutSeconds,
init: {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${params.apiKey}`,
},
body: JSON.stringify({
model: params.model,
messages,
tools: [KIMI_WEB_SEARCH_TOOL],
}),
},
body: JSON.stringify({
model: params.model,
messages,
tools: [KIMI_WEB_SEARCH_TOOL],
}),
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
});
if (!res.ok) {
return throwWebSearchApiError(res, "Kimi");
}
const data = (await res.json()) as KimiSearchResponse;
for (const citation of extractKimiCitations(data)) {
collectedCitations.add(citation);
}
const choice = data.choices?.[0];
const message = choice?.message;
const text = extractKimiMessageText(message);
const toolCalls = message?.tool_calls ?? [];
if (choice?.finish_reason !== "tool_calls" || toolCalls.length === 0) {
return { content: text ?? "No response", citations: [...collectedCitations] };
}
messages.push({
role: "assistant",
content: message?.content ?? "",
...(message?.reasoning_content
? {
reasoning_content: message.reasoning_content,
}
: {}),
tool_calls: toolCalls,
});
const toolContent = buildKimiToolResultContent(data);
let pushedToolResult = false;
for (const toolCall of toolCalls) {
const toolCallId = toolCall.id?.trim();
if (!toolCallId) {
continue;
try {
if (!res.ok) {
return await throwWebSearchApiError(res, "Kimi");
}
pushedToolResult = true;
messages.push({
role: "tool",
tool_call_id: toolCallId,
content: toolContent,
});
}
if (!pushedToolResult) {
return { content: text ?? "No response", citations: [...collectedCitations] };
const data = (await res.json()) as KimiSearchResponse;
for (const citation of extractKimiCitations(data)) {
collectedCitations.add(citation);
}
const choice = data.choices?.[0];
const message = choice?.message;
const text = extractKimiMessageText(message);
const toolCalls = message?.tool_calls ?? [];
if (choice?.finish_reason !== "tool_calls" || toolCalls.length === 0) {
return { content: text ?? "No response", citations: [...collectedCitations] };
}
messages.push({
role: "assistant",
content: message?.content ?? "",
...(message?.reasoning_content
? {
reasoning_content: message.reasoning_content,
}
: {}),
tool_calls: toolCalls,
});
const toolContent = buildKimiToolResultContent(data);
let pushedToolResult = false;
for (const toolCall of toolCalls) {
const toolCallId = toolCall.id?.trim();
if (!toolCallId) {
continue;
}
pushedToolResult = true;
messages.push({
role: "tool",
tool_call_id: toolCallId,
content: toolContent,
});
}
if (!pushedToolResult) {
return { content: text ?? "No response", citations: [...collectedCitations] };
}
} finally {
await release();
}
}
@@ -1248,42 +1287,50 @@ async function runWebSearch(params: {
url.searchParams.set("freshness", params.freshness);
}
// Resolve proxy from environment variables
const proxyUrl = resolveProxyUrl();
const dispatcher = proxyUrl ? new ProxyAgent(proxyUrl) : undefined;
const res = await undiciFetch(url.toString(), {
method: "GET",
headers: {
Accept: "application/json",
"X-Subscription-Token": params.apiKey,
const { response: res, release } = await fetchTrustedWebSearchEndpoint({
url: url.toString(),
timeoutSeconds: params.timeoutSeconds,
init: {
method: "GET",
headers: {
Accept: "application/json",
"X-Subscription-Token": params.apiKey,
},
},
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
...(dispatcher ? { dispatcher } : {}),
});
let mapped: Array<{
title: string;
url: string;
description: string;
published?: string;
siteName?: string;
}> = [];
try {
if (!res.ok) {
const detailResult = await readResponseText(res, { maxBytes: 64_000 });
const detail = detailResult.text;
throw new Error(`Brave Search API error (${res.status}): ${detail || res.statusText}`);
}
if (!res.ok) {
const detailResult = await readResponseText(res, { maxBytes: 64_000 });
const detail = detailResult.text;
throw new Error(`Brave Search API error (${res.status}): ${detail || res.statusText}`);
const data = (await res.json()) as BraveSearchResponse;
const results = Array.isArray(data.web?.results) ? (data.web?.results ?? []) : [];
mapped = results.map((entry) => {
const description = entry.description ?? "";
const title = entry.title ?? "";
const url = entry.url ?? "";
const rawSiteName = resolveSiteName(url);
return {
title: title ? wrapWebContent(title, "web_search") : "",
url, // Keep raw for tool chaining
description: description ? wrapWebContent(description, "web_search") : "",
published: entry.age || undefined,
siteName: rawSiteName || undefined,
};
});
} finally {
await release();
}
const data = (await res.json()) as BraveSearchResponse;
const results = Array.isArray(data.web?.results) ? (data.web?.results ?? []) : [];
const mapped = results.map((entry) => {
const description = entry.description ?? "";
const title = entry.title ?? "";
const url = entry.url ?? "";
const rawSiteName = resolveSiteName(url);
return {
title: title ? wrapWebContent(title, "web_search") : "",
url, // Keep raw for tool chaining
description: description ? wrapWebContent(description, "web_search") : "",
published: entry.age || undefined,
siteName: rawSiteName || undefined,
};
});
const payload = {
query: params.query,
provider: params.provider,

View File

@@ -1,3 +1,4 @@
import { EnvHttpProxyAgent } from "undici";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { withFetchPreconnect } from "../../test-utils/fetch-mock.js";
import { createWebFetchTool, createWebSearchTool } from "./web-tools.js";
@@ -143,6 +144,19 @@ describe("web_search country and language parameters", () => {
expect(mockFetch).not.toHaveBeenCalled();
expect(result?.details).toMatchObject({ error: "invalid_freshness" });
});
it("uses proxy-aware dispatcher when HTTP_PROXY is configured", async () => {
vi.stubEnv("HTTP_PROXY", "http://127.0.0.1:7890");
const mockFetch = installMockFetch({ web: { results: [] } });
const tool = createWebSearchTool({ config: undefined, sandboxed: true });
await tool?.execute?.("call-1", { query: "proxy-test" });
const requestInit = mockFetch.mock.calls[0]?.[1] as
| (RequestInit & { dispatcher?: unknown })
| undefined;
expect(requestInit?.dispatcher).toBeInstanceOf(EnvHttpProxyAgent);
});
});
describe("web_search perplexity baseUrl defaults", () => {

View File

@@ -1,3 +1,4 @@
import { EnvHttpProxyAgent } from "undici";
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import * as ssrf from "../../infra/net/ssrf.js";
import { withFetchPreconnect } from "../../test-utils/fetch-mock.js";
@@ -146,6 +147,7 @@ describe("web_fetch extraction fallbacks", () => {
afterEach(() => {
global.fetch = priorFetch;
vi.unstubAllEnvs();
vi.restoreAllMocks();
});
@@ -256,6 +258,27 @@ describe("web_fetch extraction fallbacks", () => {
expect(details?.warning).toContain("Response body truncated");
});
it("uses proxy-aware dispatcher when HTTP_PROXY is configured", async () => {
vi.stubEnv("HTTP_PROXY", "http://127.0.0.1:7890");
const mockFetch = installMockFetch((input: RequestInfo | URL) =>
Promise.resolve({
ok: true,
status: 200,
headers: makeHeaders({ "content-type": "text/plain" }),
text: async () => "proxy body",
url: requestUrl(input),
} as Response),
);
const tool = createFetchTool({ firecrawl: { enabled: false } });
await tool?.execute?.("call", { url: "https://example.com/proxy" });
const requestInit = mockFetch.mock.calls[0]?.[1] as
| (RequestInit & { dispatcher?: unknown })
| undefined;
expect(requestInit?.dispatcher).toBeInstanceOf(EnvHttpProxyAgent);
});
// NOTE: Test for wrapping url/finalUrl/warning fields requires DNS mocking.
// The sanitization of these fields is verified by external-content.test.ts tests.

View File

@@ -1,4 +1,4 @@
import type { Dispatcher } from "undici";
import { EnvHttpProxyAgent, type Dispatcher } from "undici";
import { logWarn } from "../../logger.js";
import { bindAbortRelay } from "../../utils/fetch-timeout.js";
import {
@@ -22,6 +22,7 @@ export type GuardedFetchOptions = {
policy?: SsrFPolicy;
lookupFn?: LookupFn;
pinDns?: boolean;
proxy?: "env";
auditContext?: string;
};
@@ -32,6 +33,14 @@ export type GuardedFetchResult = {
};
const DEFAULT_MAX_REDIRECTS = 3;
const ENV_PROXY_KEYS = [
"HTTP_PROXY",
"HTTPS_PROXY",
"ALL_PROXY",
"http_proxy",
"https_proxy",
"all_proxy",
] as const;
const CROSS_ORIGIN_REDIRECT_SENSITIVE_HEADERS = [
"authorization",
"proxy-authorization",
@@ -39,6 +48,16 @@ const CROSS_ORIGIN_REDIRECT_SENSITIVE_HEADERS = [
"cookie2",
];
function hasEnvProxyConfigured(): boolean {
for (const key of ENV_PROXY_KEYS) {
const value = process.env[key];
if (typeof value === "string" && value.trim()) {
return true;
}
}
return false;
}
function isRedirectStatus(status: number): boolean {
return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;
}
@@ -138,7 +157,9 @@ export async function fetchWithSsrFGuard(params: GuardedFetchOptions): Promise<G
lookupFn: params.lookupFn,
policy: params.policy,
});
if (params.pinDns !== false) {
if (params.proxy === "env" && hasEnvProxyConfigured()) {
dispatcher = new EnvHttpProxyAgent();
} else if (params.pinDns !== false) {
dispatcher = createPinnedDispatcher(pinned);
}