diff --git a/CHANGELOG.md b/CHANGELOG.md index 13056320aee..c6a4cb9897f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ Docs: https://docs.openclaw.ai - Telegram/Inline buttons: allow callback-query button handling in groups (including `/models` follow-up buttons) when group policy authorizes the sender, by removing the redundant callback allowlist gate that blocked open-policy groups. (#27343) Thanks @GodsBoy. - Telegram/Streaming preview: when finalizing without an existing preview message, prime pending preview text with final answer before stop-flush so users do not briefly see stale 1-2 word fragments (for example `no` before `no problem`). (#27449) Thanks @emanuelst for the original fix direction in #19673. - Daemon/macOS launchd: forward proxy env vars into supervised service environments, keep LaunchAgent `KeepAlive=true` semantics, and harden restart sequencing to `print -> bootout -> wait old pid exit -> bootstrap -> kickstart`. (#27276) thanks @frankekn. +- Web tools/Proxy: route `web_search` provider HTTP calls (Brave, Perplexity, xAI, Gemini, Kimi), redirect resolution, and `web_fetch` through a shared proxy-aware SSRF guard path so gateway installs behind `HTTP_PROXY`/`HTTPS_PROXY`/`ALL_PROXY` no longer fail with transport `fetch failed` errors. (#27430) thanks @kevinWangSheng. - Android/Node invoke: remove native gateway WebSocket `Origin` header to avoid false origin rejections, unify invoke command registry/policy/error parsing paths, and keep command availability checks centralized to reduce dispatcher/advertisement drift. (#27257) Thanks @obviyus. - CI/Windows: shard the Windows `checks-windows` test lane into two matrix jobs and honor explicit shard index overrides in `scripts/test-parallel.mjs` to reduce CI critical-path wall time. (#27234) Thanks @joshavant. - Agents/Models config: preserve agent-level provider `apiKey` and `baseUrl` during merge-mode `models.json` updates when agent values are present. (#27293) thanks @Sid-Qin. diff --git a/src/agents/tools/web-fetch.ts b/src/agents/tools/web-fetch.ts index 06f4ac1d973..b2141f2100d 100644 --- a/src/agents/tools/web-fetch.ts +++ b/src/agents/tools/web-fetch.ts @@ -527,6 +527,7 @@ async function runWebFetch(params: WebFetchRuntimeParams): Promise Promise }> { + const { response, release } = await fetchWithSsrFGuard({ + url: params.url, + init: params.init, + timeoutMs: params.timeoutSeconds * 1000, + policy: TRUSTED_NETWORK_SSRF_POLICY, + proxy: "env", + }); + return { response, release }; +} + async function runGeminiSearch(params: { query: string; apiKey: string; @@ -608,75 +622,81 @@ async function runGeminiSearch(params: { }): Promise<{ content: string; citations: Array<{ url: string; title?: string }> }> { const endpoint = `${GEMINI_API_BASE}/models/${params.model}:generateContent`; - const res = await fetch(endpoint, { - method: "POST", - headers: { - "Content-Type": "application/json", - "x-goog-api-key": params.apiKey, - }, - body: JSON.stringify({ - contents: [ - { - parts: [{ text: params.query }], - }, - ], - tools: [{ google_search: {} }], - }), - signal: withTimeout(undefined, params.timeoutSeconds * 1000), - }); - - if (!res.ok) { - const detailResult = await readResponseText(res, { maxBytes: 64_000 }); - // Strip API key from any error detail to prevent accidental key leakage in logs - const safeDetail = (detailResult.text || res.statusText).replace(/key=[^&\s]+/gi, "key=***"); - throw new Error(`Gemini API error (${res.status}): ${safeDetail}`); - } - - let data: GeminiGroundingResponse; - try { - data = (await res.json()) as GeminiGroundingResponse; - } catch (err) { - const safeError = String(err).replace(/key=[^&\s]+/gi, "key=***"); - throw new Error(`Gemini API returned invalid JSON: ${safeError}`, { cause: err }); - } - - if (data.error) { - const rawMsg = data.error.message || data.error.status || "unknown"; - const safeMsg = rawMsg.replace(/key=[^&\s]+/gi, "key=***"); - throw new Error(`Gemini API error (${data.error.code}): ${safeMsg}`); - } - - const candidate = data.candidates?.[0]; - const content = - candidate?.content?.parts - ?.map((p) => p.text) - .filter(Boolean) - .join("\n") ?? "No response"; - - const groundingChunks = candidate?.groundingMetadata?.groundingChunks ?? []; - const rawCitations = groundingChunks - .filter((chunk) => chunk.web?.uri) - .map((chunk) => ({ - url: chunk.web!.uri!, - title: chunk.web?.title || undefined, - })); - - // Resolve Google grounding redirect URLs to direct URLs with concurrency cap. - // Gemini typically returns 3-8 citations; cap at 10 concurrent to be safe. - const MAX_CONCURRENT_REDIRECTS = 10; - const citations: Array<{ url: string; title?: string }> = []; - for (let i = 0; i < rawCitations.length; i += MAX_CONCURRENT_REDIRECTS) { - const batch = rawCitations.slice(i, i + MAX_CONCURRENT_REDIRECTS); - const resolved = await Promise.all( - batch.map(async (citation) => { - const resolvedUrl = await resolveRedirectUrl(citation.url); - return { ...citation, url: resolvedUrl }; + const { response: res, release } = await fetchTrustedWebSearchEndpoint({ + url: endpoint, + timeoutSeconds: params.timeoutSeconds, + init: { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-goog-api-key": params.apiKey, + }, + body: JSON.stringify({ + contents: [ + { + parts: [{ text: params.query }], + }, + ], + tools: [{ google_search: {} }], }), - ); - citations.push(...resolved); - } + }, + }); + try { + if (!res.ok) { + const detailResult = await readResponseText(res, { maxBytes: 64_000 }); + // Strip API key from any error detail to prevent accidental key leakage in logs + const safeDetail = (detailResult.text || res.statusText).replace(/key=[^&\s]+/gi, "key=***"); + throw new Error(`Gemini API error (${res.status}): ${safeDetail}`); + } - return { content, citations }; + let data: GeminiGroundingResponse; + try { + data = (await res.json()) as GeminiGroundingResponse; + } catch (err) { + const safeError = String(err).replace(/key=[^&\s]+/gi, "key=***"); + throw new Error(`Gemini API returned invalid JSON: ${safeError}`, { cause: err }); + } + + if (data.error) { + const rawMsg = data.error.message || data.error.status || "unknown"; + const safeMsg = rawMsg.replace(/key=[^&\s]+/gi, "key=***"); + throw new Error(`Gemini API error (${data.error.code}): ${safeMsg}`); + } + + const candidate = data.candidates?.[0]; + const content = + candidate?.content?.parts + ?.map((p) => p.text) + .filter(Boolean) + .join("\n") ?? "No response"; + + const groundingChunks = candidate?.groundingMetadata?.groundingChunks ?? []; + const rawCitations = groundingChunks + .filter((chunk) => chunk.web?.uri) + .map((chunk) => ({ + url: chunk.web!.uri!, + title: chunk.web?.title || undefined, + })); + + // Resolve Google grounding redirect URLs to direct URLs with concurrency cap. + // Gemini typically returns 3-8 citations; cap at 10 concurrent to be safe. + const MAX_CONCURRENT_REDIRECTS = 10; + const citations: Array<{ url: string; title?: string }> = []; + for (let i = 0; i < rawCitations.length; i += MAX_CONCURRENT_REDIRECTS) { + const batch = rawCitations.slice(i, i + MAX_CONCURRENT_REDIRECTS); + const resolved = await Promise.all( + batch.map(async (citation) => { + const resolvedUrl = await resolveRedirectUrl(citation.url); + return { ...citation, url: resolvedUrl }; + }), + ); + citations.push(...resolved); + } + + return { content, citations }; + } finally { + await release(); + } } const REDIRECT_TIMEOUT_MS = 5000; @@ -692,6 +712,7 @@ async function resolveRedirectUrl(url: string): Promise { init: { method: "HEAD" }, timeoutMs: REDIRECT_TIMEOUT_MS, policy: TRUSTED_NETWORK_SSRF_POLICY, + proxy: "env", }); try { return finalUrl || url; @@ -871,27 +892,33 @@ async function runPerplexitySearch(params: { body.search_recency_filter = recencyFilter; } - const res = await fetch(endpoint, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${params.apiKey}`, - "HTTP-Referer": "https://openclaw.ai", - "X-Title": "OpenClaw Web Search", + const { response: res, release } = await fetchTrustedWebSearchEndpoint({ + url: endpoint, + timeoutSeconds: params.timeoutSeconds, + init: { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${params.apiKey}`, + "HTTP-Referer": "https://openclaw.ai", + "X-Title": "OpenClaw Web Search", + }, + body: JSON.stringify(body), }, - body: JSON.stringify(body), - signal: withTimeout(undefined, params.timeoutSeconds * 1000), }); + try { + if (!res.ok) { + return await throwWebSearchApiError(res, "Perplexity"); + } - if (!res.ok) { - return throwWebSearchApiError(res, "Perplexity"); + const data = (await res.json()) as PerplexitySearchResponse; + const content = data.choices?.[0]?.message?.content ?? "No response"; + const citations = data.citations ?? []; + + return { content, citations }; + } finally { + await release(); } - - const data = (await res.json()) as PerplexitySearchResponse; - const content = data.choices?.[0]?.message?.content ?? "No response"; - const citations = data.citations ?? []; - - return { content, citations }; } async function runGrokSearch(params: { @@ -921,28 +948,34 @@ async function runGrokSearch(params: { // citations are returned automatically when available — we just parse // them from the response without requesting them explicitly (#12910). - const res = await fetch(XAI_API_ENDPOINT, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${params.apiKey}`, + const { response: res, release } = await fetchTrustedWebSearchEndpoint({ + url: XAI_API_ENDPOINT, + timeoutSeconds: params.timeoutSeconds, + init: { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${params.apiKey}`, + }, + body: JSON.stringify(body), }, - body: JSON.stringify(body), - signal: withTimeout(undefined, params.timeoutSeconds * 1000), }); + try { + if (!res.ok) { + return await throwWebSearchApiError(res, "xAI"); + } - if (!res.ok) { - return throwWebSearchApiError(res, "xAI"); + const data = (await res.json()) as GrokSearchResponse; + const { text: extractedText, annotationCitations } = extractGrokContent(data); + const content = extractedText ?? "No response"; + // Prefer top-level citations; fall back to annotation-derived ones + const citations = (data.citations ?? []).length > 0 ? data.citations! : annotationCitations; + const inlineCitations = data.inline_citations; + + return { content, citations, inlineCitations }; + } finally { + await release(); } - - const data = (await res.json()) as GrokSearchResponse; - const { text: extractedText, annotationCitations } = extractGrokContent(data); - const content = extractedText ?? "No response"; - // Prefer top-level citations; fall back to annotation-derived ones - const citations = (data.citations ?? []).length > 0 ? data.citations! : annotationCitations; - const inlineCitations = data.inline_citations; - - return { content, citations, inlineCitations }; } function extractKimiMessageText(message: KimiMessage | undefined): string | undefined { @@ -1014,65 +1047,71 @@ async function runKimiSearch(params: { const MAX_ROUNDS = 3; for (let round = 0; round < MAX_ROUNDS; round += 1) { - const res = await fetch(endpoint, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bearer ${params.apiKey}`, + const { response: res, release } = await fetchTrustedWebSearchEndpoint({ + url: endpoint, + timeoutSeconds: params.timeoutSeconds, + init: { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${params.apiKey}`, + }, + body: JSON.stringify({ + model: params.model, + messages, + tools: [KIMI_WEB_SEARCH_TOOL], + }), }, - body: JSON.stringify({ - model: params.model, - messages, - tools: [KIMI_WEB_SEARCH_TOOL], - }), - signal: withTimeout(undefined, params.timeoutSeconds * 1000), }); - - if (!res.ok) { - return throwWebSearchApiError(res, "Kimi"); - } - - const data = (await res.json()) as KimiSearchResponse; - for (const citation of extractKimiCitations(data)) { - collectedCitations.add(citation); - } - const choice = data.choices?.[0]; - const message = choice?.message; - const text = extractKimiMessageText(message); - const toolCalls = message?.tool_calls ?? []; - - if (choice?.finish_reason !== "tool_calls" || toolCalls.length === 0) { - return { content: text ?? "No response", citations: [...collectedCitations] }; - } - - messages.push({ - role: "assistant", - content: message?.content ?? "", - ...(message?.reasoning_content - ? { - reasoning_content: message.reasoning_content, - } - : {}), - tool_calls: toolCalls, - }); - - const toolContent = buildKimiToolResultContent(data); - let pushedToolResult = false; - for (const toolCall of toolCalls) { - const toolCallId = toolCall.id?.trim(); - if (!toolCallId) { - continue; + try { + if (!res.ok) { + return await throwWebSearchApiError(res, "Kimi"); } - pushedToolResult = true; - messages.push({ - role: "tool", - tool_call_id: toolCallId, - content: toolContent, - }); - } - if (!pushedToolResult) { - return { content: text ?? "No response", citations: [...collectedCitations] }; + const data = (await res.json()) as KimiSearchResponse; + for (const citation of extractKimiCitations(data)) { + collectedCitations.add(citation); + } + const choice = data.choices?.[0]; + const message = choice?.message; + const text = extractKimiMessageText(message); + const toolCalls = message?.tool_calls ?? []; + + if (choice?.finish_reason !== "tool_calls" || toolCalls.length === 0) { + return { content: text ?? "No response", citations: [...collectedCitations] }; + } + + messages.push({ + role: "assistant", + content: message?.content ?? "", + ...(message?.reasoning_content + ? { + reasoning_content: message.reasoning_content, + } + : {}), + tool_calls: toolCalls, + }); + + const toolContent = buildKimiToolResultContent(data); + let pushedToolResult = false; + for (const toolCall of toolCalls) { + const toolCallId = toolCall.id?.trim(); + if (!toolCallId) { + continue; + } + pushedToolResult = true; + messages.push({ + role: "tool", + tool_call_id: toolCallId, + content: toolContent, + }); + } + + if (!pushedToolResult) { + return { content: text ?? "No response", citations: [...collectedCitations] }; + } + } finally { + await release(); } } @@ -1248,42 +1287,50 @@ async function runWebSearch(params: { url.searchParams.set("freshness", params.freshness); } - // Resolve proxy from environment variables - const proxyUrl = resolveProxyUrl(); - const dispatcher = proxyUrl ? new ProxyAgent(proxyUrl) : undefined; - - const res = await undiciFetch(url.toString(), { - method: "GET", - headers: { - Accept: "application/json", - "X-Subscription-Token": params.apiKey, + const { response: res, release } = await fetchTrustedWebSearchEndpoint({ + url: url.toString(), + timeoutSeconds: params.timeoutSeconds, + init: { + method: "GET", + headers: { + Accept: "application/json", + "X-Subscription-Token": params.apiKey, + }, }, - signal: withTimeout(undefined, params.timeoutSeconds * 1000), - ...(dispatcher ? { dispatcher } : {}), }); + let mapped: Array<{ + title: string; + url: string; + description: string; + published?: string; + siteName?: string; + }> = []; + try { + if (!res.ok) { + const detailResult = await readResponseText(res, { maxBytes: 64_000 }); + const detail = detailResult.text; + throw new Error(`Brave Search API error (${res.status}): ${detail || res.statusText}`); + } - if (!res.ok) { - const detailResult = await readResponseText(res, { maxBytes: 64_000 }); - const detail = detailResult.text; - throw new Error(`Brave Search API error (${res.status}): ${detail || res.statusText}`); + const data = (await res.json()) as BraveSearchResponse; + const results = Array.isArray(data.web?.results) ? (data.web?.results ?? []) : []; + mapped = results.map((entry) => { + const description = entry.description ?? ""; + const title = entry.title ?? ""; + const url = entry.url ?? ""; + const rawSiteName = resolveSiteName(url); + return { + title: title ? wrapWebContent(title, "web_search") : "", + url, // Keep raw for tool chaining + description: description ? wrapWebContent(description, "web_search") : "", + published: entry.age || undefined, + siteName: rawSiteName || undefined, + }; + }); + } finally { + await release(); } - const data = (await res.json()) as BraveSearchResponse; - const results = Array.isArray(data.web?.results) ? (data.web?.results ?? []) : []; - const mapped = results.map((entry) => { - const description = entry.description ?? ""; - const title = entry.title ?? ""; - const url = entry.url ?? ""; - const rawSiteName = resolveSiteName(url); - return { - title: title ? wrapWebContent(title, "web_search") : "", - url, // Keep raw for tool chaining - description: description ? wrapWebContent(description, "web_search") : "", - published: entry.age || undefined, - siteName: rawSiteName || undefined, - }; - }); - const payload = { query: params.query, provider: params.provider, diff --git a/src/agents/tools/web-tools.enabled-defaults.test.ts b/src/agents/tools/web-tools.enabled-defaults.test.ts index b129581f5a0..5c7d078f739 100644 --- a/src/agents/tools/web-tools.enabled-defaults.test.ts +++ b/src/agents/tools/web-tools.enabled-defaults.test.ts @@ -1,3 +1,4 @@ +import { EnvHttpProxyAgent } from "undici"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { withFetchPreconnect } from "../../test-utils/fetch-mock.js"; import { createWebFetchTool, createWebSearchTool } from "./web-tools.js"; @@ -143,6 +144,19 @@ describe("web_search country and language parameters", () => { expect(mockFetch).not.toHaveBeenCalled(); expect(result?.details).toMatchObject({ error: "invalid_freshness" }); }); + + it("uses proxy-aware dispatcher when HTTP_PROXY is configured", async () => { + vi.stubEnv("HTTP_PROXY", "http://127.0.0.1:7890"); + const mockFetch = installMockFetch({ web: { results: [] } }); + const tool = createWebSearchTool({ config: undefined, sandboxed: true }); + + await tool?.execute?.("call-1", { query: "proxy-test" }); + + const requestInit = mockFetch.mock.calls[0]?.[1] as + | (RequestInit & { dispatcher?: unknown }) + | undefined; + expect(requestInit?.dispatcher).toBeInstanceOf(EnvHttpProxyAgent); + }); }); describe("web_search perplexity baseUrl defaults", () => { diff --git a/src/agents/tools/web-tools.fetch.test.ts b/src/agents/tools/web-tools.fetch.test.ts index 0c69e1e1767..53836b92067 100644 --- a/src/agents/tools/web-tools.fetch.test.ts +++ b/src/agents/tools/web-tools.fetch.test.ts @@ -1,3 +1,4 @@ +import { EnvHttpProxyAgent } from "undici"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import * as ssrf from "../../infra/net/ssrf.js"; import { withFetchPreconnect } from "../../test-utils/fetch-mock.js"; @@ -146,6 +147,7 @@ describe("web_fetch extraction fallbacks", () => { afterEach(() => { global.fetch = priorFetch; + vi.unstubAllEnvs(); vi.restoreAllMocks(); }); @@ -256,6 +258,27 @@ describe("web_fetch extraction fallbacks", () => { expect(details?.warning).toContain("Response body truncated"); }); + it("uses proxy-aware dispatcher when HTTP_PROXY is configured", async () => { + vi.stubEnv("HTTP_PROXY", "http://127.0.0.1:7890"); + const mockFetch = installMockFetch((input: RequestInfo | URL) => + Promise.resolve({ + ok: true, + status: 200, + headers: makeHeaders({ "content-type": "text/plain" }), + text: async () => "proxy body", + url: requestUrl(input), + } as Response), + ); + const tool = createFetchTool({ firecrawl: { enabled: false } }); + + await tool?.execute?.("call", { url: "https://example.com/proxy" }); + + const requestInit = mockFetch.mock.calls[0]?.[1] as + | (RequestInit & { dispatcher?: unknown }) + | undefined; + expect(requestInit?.dispatcher).toBeInstanceOf(EnvHttpProxyAgent); + }); + // NOTE: Test for wrapping url/finalUrl/warning fields requires DNS mocking. // The sanitization of these fields is verified by external-content.test.ts tests. diff --git a/src/infra/net/fetch-guard.ts b/src/infra/net/fetch-guard.ts index c3e2b7864b1..77260f474f5 100644 --- a/src/infra/net/fetch-guard.ts +++ b/src/infra/net/fetch-guard.ts @@ -1,4 +1,4 @@ -import type { Dispatcher } from "undici"; +import { EnvHttpProxyAgent, type Dispatcher } from "undici"; import { logWarn } from "../../logger.js"; import { bindAbortRelay } from "../../utils/fetch-timeout.js"; import { @@ -22,6 +22,7 @@ export type GuardedFetchOptions = { policy?: SsrFPolicy; lookupFn?: LookupFn; pinDns?: boolean; + proxy?: "env"; auditContext?: string; }; @@ -32,6 +33,14 @@ export type GuardedFetchResult = { }; const DEFAULT_MAX_REDIRECTS = 3; +const ENV_PROXY_KEYS = [ + "HTTP_PROXY", + "HTTPS_PROXY", + "ALL_PROXY", + "http_proxy", + "https_proxy", + "all_proxy", +] as const; const CROSS_ORIGIN_REDIRECT_SENSITIVE_HEADERS = [ "authorization", "proxy-authorization", @@ -39,6 +48,16 @@ const CROSS_ORIGIN_REDIRECT_SENSITIVE_HEADERS = [ "cookie2", ]; +function hasEnvProxyConfigured(): boolean { + for (const key of ENV_PROXY_KEYS) { + const value = process.env[key]; + if (typeof value === "string" && value.trim()) { + return true; + } + } + return false; +} + function isRedirectStatus(status: number): boolean { return status === 301 || status === 302 || status === 303 || status === 307 || status === 308; } @@ -138,7 +157,9 @@ export async function fetchWithSsrFGuard(params: GuardedFetchOptions): Promise