diff --git a/src/media-understanding/runner.proxy.test.ts b/src/media-understanding/runner.proxy.test.ts index 78959fc946a..b96f099d3cc 100644 --- a/src/media-understanding/runner.proxy.test.ts +++ b/src/media-understanding/runner.proxy.test.ts @@ -1,26 +1,57 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../config/config.js"; import { buildProviderRegistry, runCapability } from "./runner.js"; -import { withAudioFixture, withMediaFixture } from "./runner.test-utils.js"; +import { withAudioFixture, withVideoFixture } from "./runner.test-utils.js"; import type { AudioTranscriptionRequest, VideoDescriptionRequest } from "./types.js"; -async function withVideoFixture( - filePrefix: string, - run: (params: { - ctx: { MediaPath: string; MediaType: string }; - media: ReturnType; - cache: ReturnType; - }) => Promise, -) { - await withMediaFixture( - { - filePrefix, - extension: "mp4", - mediaType: "video/mp4", - fileContents: Buffer.from("video"), - }, - run, - ); +async function runAudioCapabilityWithFetchCapture(params: { + fixturePrefix: string; + outputText: string; +}): Promise { + let seenFetchFn: typeof fetch | undefined; + await withAudioFixture(params.fixturePrefix, async ({ ctx, media, cache }) => { + const providerRegistry = buildProviderRegistry({ + openai: { + id: "openai", + capabilities: ["audio"], + transcribeAudio: async (req: AudioTranscriptionRequest) => { + seenFetchFn = req.fetchFn; + return { text: params.outputText, model: req.model }; + }, + }, + }); + + const cfg = { + models: { + providers: { + openai: { + apiKey: "test-key", + models: [], + }, + }, + }, + tools: { + media: { + audio: { + enabled: true, + models: [{ provider: "openai", model: "whisper-1" }], + }, + }, + }, + } as unknown as OpenClawConfig; + + const result = await runCapability({ + capability: "audio", + cfg, + ctx, + attachments: cache, + media, + providerRegistry, + }); + + expect(result.outputs[0]?.text).toBe(params.outputText); + }); + return seenFetchFn; } describe("runCapability proxy fetch passthrough", () => { @@ -29,53 +60,12 @@ describe("runCapability proxy fetch passthrough", () => { it("passes fetchFn to audio provider when HTTPS_PROXY is set", async () => { vi.stubEnv("HTTPS_PROXY", "http://proxy.test:8080"); - - await withAudioFixture("openclaw-audio-proxy", async ({ ctx, media, cache }) => { - let seenFetchFn: typeof fetch | undefined; - - const providerRegistry = buildProviderRegistry({ - openai: { - id: "openai", - capabilities: ["audio"], - transcribeAudio: async (req: AudioTranscriptionRequest) => { - seenFetchFn = req.fetchFn; - return { text: "transcribed", model: req.model }; - }, - }, - }); - - const cfg = { - models: { - providers: { - openai: { - apiKey: "test-key", - models: [], - }, - }, - }, - tools: { - media: { - audio: { - enabled: true, - models: [{ provider: "openai", model: "whisper-1" }], - }, - }, - }, - } as unknown as OpenClawConfig; - - const result = await runCapability({ - capability: "audio", - cfg, - ctx, - attachments: cache, - media, - providerRegistry, - }); - - expect(result.outputs[0]?.text).toBe("transcribed"); - expect(seenFetchFn).toBeDefined(); - expect(seenFetchFn).not.toBe(globalThis.fetch); + const seenFetchFn = await runAudioCapabilityWithFetchCapture({ + fixturePrefix: "openclaw-audio-proxy", + outputText: "transcribed", }); + expect(seenFetchFn).toBeDefined(); + expect(seenFetchFn).not.toBe(globalThis.fetch); }); it("passes fetchFn to video provider when HTTPS_PROXY is set", async () => { @@ -134,50 +124,10 @@ describe("runCapability proxy fetch passthrough", () => { vi.stubEnv("https_proxy", ""); vi.stubEnv("http_proxy", ""); - await withAudioFixture("openclaw-audio-no-proxy", async ({ ctx, media, cache }) => { - let seenFetchFn: typeof fetch | undefined; - - const providerRegistry = buildProviderRegistry({ - openai: { - id: "openai", - capabilities: ["audio"], - transcribeAudio: async (req: AudioTranscriptionRequest) => { - seenFetchFn = req.fetchFn; - return { text: "ok", model: req.model }; - }, - }, - }); - - const cfg = { - models: { - providers: { - openai: { - apiKey: "test-key", - models: [], - }, - }, - }, - tools: { - media: { - audio: { - enabled: true, - models: [{ provider: "openai", model: "whisper-1" }], - }, - }, - }, - } as unknown as OpenClawConfig; - - const result = await runCapability({ - capability: "audio", - cfg, - ctx, - attachments: cache, - media, - providerRegistry, - }); - - expect(result.outputs[0]?.text).toBe("ok"); - expect(seenFetchFn).toBeUndefined(); + const seenFetchFn = await runAudioCapabilityWithFetchCapture({ + fixturePrefix: "openclaw-audio-no-proxy", + outputText: "ok", }); + expect(seenFetchFn).toBeUndefined(); }); }); diff --git a/src/media-understanding/runner.test-utils.ts b/src/media-understanding/runner.test-utils.ts index f4776c02e17..c83d3178255 100644 --- a/src/media-understanding/runner.test-utils.ts +++ b/src/media-understanding/runner.test-utils.ts @@ -54,3 +54,18 @@ export async function withAudioFixture( run, ); } + +export async function withVideoFixture( + filePrefix: string, + run: (params: MediaFixtureParams) => Promise, +) { + await withMediaFixture( + { + filePrefix, + extension: "mp4", + mediaType: "video/mp4", + fileContents: Buffer.from("video"), + }, + run, + ); +} diff --git a/src/media-understanding/runner.video.test.ts b/src/media-understanding/runner.video.test.ts index 3e9f3266db8..6991cf1a4ac 100644 --- a/src/media-understanding/runner.video.test.ts +++ b/src/media-understanding/runner.video.test.ts @@ -2,26 +2,7 @@ import { describe, expect, it } from "vitest"; import type { OpenClawConfig } from "../config/config.js"; import { withEnvAsync } from "../test-utils/env.js"; import { runCapability } from "./runner.js"; -import { withMediaFixture } from "./runner.test-utils.js"; - -async function withVideoFixture( - filePrefix: string, - run: (params: { - ctx: { MediaPath: string; MediaType: string }; - media: ReturnType; - cache: ReturnType; - }) => Promise, -) { - await withMediaFixture( - { - filePrefix, - extension: "mp4", - mediaType: "video/mp4", - fileContents: Buffer.from("video"), - }, - run, - ); -} +import { withVideoFixture } from "./runner.test-utils.js"; describe("runCapability video provider wiring", () => { it("merges video baseUrl and headers with entry precedence", async () => {