fix: surface normalized video durations

This commit is contained in:
Peter Steinberger
2026-04-05 23:57:18 +01:00
parent 09fe144e52
commit b5ade7b629
10 changed files with 283 additions and 23 deletions

View File

@@ -58,24 +58,24 @@ Use `action: "list"` to inspect available providers and models at runtime:
## Tool parameters
| Parameter | Type | Description |
| ----------------- | -------- | ------------------------------------------------------------------------------------- |
| `prompt` | string | Video generation prompt (required for `action: "generate"`) |
| `action` | string | `"generate"` (default) or `"list"` to inspect providers |
| `model` | string | Provider/model override, e.g. `qwen/wan2.6-t2v` |
| `image` | string | Single reference image path or URL |
| `images` | string[] | Multiple reference images (up to 5) |
| `video` | string | Single reference video path or URL |
| `videos` | string[] | Multiple reference videos (up to 4) |
| `size` | string | Size hint when the provider supports it |
| `aspectRatio` | string | Aspect ratio: `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9` |
| `resolution` | string | Resolution hint: `480P`, `720P`, or `1080P` |
| `durationSeconds` | number | Target duration in seconds |
| `audio` | boolean | Enable generated audio when the provider supports it |
| `watermark` | boolean | Toggle provider watermarking when supported |
| `filename` | string | Output filename hint |
| Parameter | Type | Description |
| ----------------- | -------- | -------------------------------------------------------------------------------------- |
| `prompt` | string | Video generation prompt (required for `action: "generate"`) |
| `action` | string | `"generate"` (default) or `"list"` to inspect providers |
| `model` | string | Provider/model override, e.g. `qwen/wan2.6-t2v` |
| `image` | string | Single reference image path or URL |
| `images` | string[] | Multiple reference images (up to 5) |
| `video` | string | Single reference video path or URL |
| `videos` | string[] | Multiple reference videos (up to 4) |
| `size` | string | Size hint when the provider supports it |
| `aspectRatio` | string | Aspect ratio: `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9` |
| `resolution` | string | Resolution hint: `480P`, `720P`, or `1080P` |
| `durationSeconds` | number | Target duration in seconds. OpenClaw may round to the nearest provider-supported value |
| `audio` | boolean | Enable generated audio when the provider supports it |
| `watermark` | boolean | Toggle provider watermarking when supported |
| `filename` | string | Output filename hint |
Not all providers support all parameters. The tool validates provider capability limits before it submits the request.
Not all providers support all parameters. The tool validates provider capability limits before it submits the request. When a provider or model only supports a discrete set of video lengths, OpenClaw rounds `durationSeconds` to the nearest supported value and reports the normalized duration in the tool result.
## Configuration

View File

@@ -162,6 +162,7 @@ export function buildGoogleVideoGenerationProvider(): VideoGenerationProvider {
maxInputImages: 1,
maxInputVideos: 1,
maxDurationSeconds: GOOGLE_VIDEO_MAX_DURATION_SECONDS,
supportedDurationSeconds: GOOGLE_VIDEO_ALLOWED_DURATION_SECONDS,
supportsAspectRatio: true,
supportsResolution: true,
supportsSize: true,

View File

@@ -232,6 +232,7 @@ export function buildMinimaxVideoGenerationProvider(): VideoGenerationProvider {
maxInputImages: 1,
maxInputVideos: 0,
maxDurationSeconds: 10,
supportedDurationSecondsByModel: MINIMAX_MODEL_ALLOWED_DURATIONS,
supportsResolution: true,
supportsWatermark: false,
},

View File

@@ -194,6 +194,7 @@ export function buildOpenAIVideoGenerationProvider(): VideoGenerationProvider {
maxInputImages: 1,
maxInputVideos: 1,
maxDurationSeconds: 12,
supportedDurationSeconds: OPENAI_VIDEO_SECONDS,
supportsSize: true,
},
async generateVideo(req) {

View File

@@ -11,6 +11,7 @@ function asConfig(value: unknown): OpenClawConfig {
describe("createVideoGenerateTool", () => {
beforeEach(() => {
vi.restoreAllMocks();
vi.spyOn(videoGenerationRuntime, "listRuntimeVideoGenerationProviders").mockReturnValue([]);
});
afterEach(() => {
@@ -88,4 +89,90 @@ describe("createVideoGenerateTool", () => {
metadata: { taskId: "task-1" },
});
});
it("shows duration normalization details from runtime metadata", async () => {
vi.spyOn(videoGenerationRuntime, "generateVideo").mockResolvedValue({
provider: "google",
model: "veo-3.1-fast-generate-preview",
attempts: [],
videos: [
{
buffer: Buffer.from("video-bytes"),
mimeType: "video/mp4",
fileName: "lobster.mp4",
},
],
metadata: {
requestedDurationSeconds: 5,
normalizedDurationSeconds: 6,
supportedDurationSeconds: [4, 6, 8],
},
});
vi.spyOn(mediaStore, "saveMediaBuffer").mockResolvedValueOnce({
path: "/tmp/generated-lobster.mp4",
id: "generated-lobster.mp4",
size: 11,
contentType: "video/mp4",
});
const tool = createVideoGenerateTool({
config: asConfig({
agents: {
defaults: {
videoGenerationModel: { primary: "google/veo-3.1-fast-generate-preview" },
},
},
}),
});
if (!tool) {
throw new Error("expected video_generate tool");
}
const result = await tool.execute("call-1", {
prompt: "friendly lobster surfing",
durationSeconds: 5,
});
const text = (result.content?.[0] as { text: string } | undefined)?.text ?? "";
expect(text).toContain("Duration normalized: requested 5s; used 6s.");
expect(result.details).toMatchObject({
durationSeconds: 6,
requestedDurationSeconds: 5,
supportedDurationSeconds: [4, 6, 8],
});
});
it("lists supported provider durations when advertised", async () => {
vi.spyOn(videoGenerationRuntime, "listRuntimeVideoGenerationProviders").mockReturnValue([
{
id: "google",
defaultModel: "veo-3.1-fast-generate-preview",
models: ["veo-3.1-fast-generate-preview"],
capabilities: {
maxDurationSeconds: 8,
supportedDurationSeconds: [4, 6, 8],
},
generateVideo: vi.fn(async () => {
throw new Error("not used");
}),
},
]);
const tool = createVideoGenerateTool({
config: asConfig({
agents: {
defaults: {
videoGenerationModel: { primary: "google/veo-3.1-fast-generate-preview" },
},
},
}),
});
if (!tool) {
throw new Error("expected video_generate tool");
}
const result = await tool.execute("call-1", { action: "list" });
const text = (result.content?.[0] as { text: string } | undefined)?.text ?? "";
expect(text).toContain("supportedDurationSeconds=4/6/8");
});
});

View File

@@ -6,6 +6,7 @@ import { loadWebMedia } from "../../media/web-media.js";
import { readSnakeCaseParamRaw } from "../../param-key.js";
import { getProviderEnvVars } from "../../secrets/provider-env-vars.js";
import { resolveUserPath } from "../../utils.js";
import { resolveVideoGenerationSupportedDurations } from "../../video-generation/duration-support.js";
import { parseVideoGenerationModelRef } from "../../video-generation/model-ref.js";
import {
generateVideo,
@@ -114,7 +115,8 @@ const VideoGenerateToolSchema = Type.Object({
),
durationSeconds: Type.Optional(
Type.Number({
description: "Optional target duration in seconds.",
description:
"Optional target duration in seconds. OpenClaw may round this to the nearest provider-supported duration.",
minimum: 1,
}),
),
@@ -329,6 +331,7 @@ function resolveSelectedVideoGenerationProvider(params: {
function validateVideoGenerationCapabilities(params: {
provider: VideoGenerationProvider | undefined;
model?: string;
inputImageCount: number;
inputVideoCount: number;
size?: string;
@@ -371,6 +374,10 @@ function validateVideoGenerationCapabilities(params: {
if (
typeof params.durationSeconds === "number" &&
Number.isFinite(params.durationSeconds) &&
!resolveVideoGenerationSupportedDurations({
provider,
model: params.model,
}) &&
typeof caps.maxDurationSeconds === "number" &&
params.durationSeconds > caps.maxDurationSeconds
) {
@@ -535,7 +542,7 @@ export function createVideoGenerateTool(options?: {
name: "video_generate",
displaySummary: "Generate videos",
description:
"Generate videos using configured providers. Generated videos are saved under OpenClaw-managed media storage and delivered automatically as attachments.",
"Generate videos using configured providers. Generated videos are saved under OpenClaw-managed media storage and delivered automatically as attachments. Duration requests may be rounded to the nearest provider-supported value.",
parameters: VideoGenerateToolSchema,
execute: async (_toolCallId, rawArgs) => {
const args = rawArgs as Record<string, unknown>;
@@ -564,6 +571,17 @@ export function createVideoGenerateTool(options?: {
provider.capabilities.maxDurationSeconds
? `maxDurationSeconds=${provider.capabilities.maxDurationSeconds}`
: null,
provider.capabilities.supportedDurationSeconds?.length
? `supportedDurationSeconds=${provider.capabilities.supportedDurationSeconds.join("/")}`
: null,
provider.capabilities.supportedDurationSecondsByModel &&
Object.keys(provider.capabilities.supportedDurationSecondsByModel).length > 0
? `supportedDurationSecondsByModel=${Object.entries(
provider.capabilities.supportedDurationSecondsByModel,
)
.map(([modelId, durations]) => `${modelId}:${durations.join("/")}`)
.join("; ")}`
: null,
provider.capabilities.supportsResolution ? "resolution" : null,
provider.capabilities.supportsAspectRatio ? "aspectRatio" : null,
provider.capabilities.supportsSize ? "size" : null,
@@ -639,6 +657,8 @@ export function createVideoGenerateTool(options?: {
});
validateVideoGenerationCapabilities({
provider: selectedProvider,
model:
parseVideoGenerationModelRef(model)?.model ?? model ?? selectedProvider?.defaultModel,
inputImageCount: loadedReferenceImages.length,
inputVideoCount: loadedReferenceVideos.length,
size,
@@ -674,10 +694,30 @@ export function createVideoGenerateTool(options?: {
),
),
);
const requestedDurationSeconds =
typeof result.metadata?.requestedDurationSeconds === "number" &&
Number.isFinite(result.metadata.requestedDurationSeconds)
? result.metadata.requestedDurationSeconds
: durationSeconds;
const normalizedDurationSeconds =
typeof result.metadata?.normalizedDurationSeconds === "number" &&
Number.isFinite(result.metadata.normalizedDurationSeconds)
? result.metadata.normalizedDurationSeconds
: requestedDurationSeconds;
const supportedDurationSeconds = Array.isArray(result.metadata?.supportedDurationSeconds)
? result.metadata.supportedDurationSeconds.filter(
(entry): entry is number => typeof entry === "number" && Number.isFinite(entry),
)
: undefined;
const lines = [
`Generated ${savedVideos.length} video${savedVideos.length === 1 ? "" : "s"} with ${result.provider}/${result.model}.`,
typeof requestedDurationSeconds === "number" &&
typeof normalizedDurationSeconds === "number" &&
requestedDurationSeconds !== normalizedDurationSeconds
? `Duration normalized: requested ${requestedDurationSeconds}s; used ${normalizedDurationSeconds}s.`
: null,
...savedVideos.map((video) => `MEDIA:${video.path}`),
];
].filter((entry): entry is string => Boolean(entry));
return {
content: [{ type: "text", text: lines.join("\n") }],
@@ -722,7 +762,17 @@ export function createVideoGenerateTool(options?: {
...(size ? { size } : {}),
...(aspectRatio ? { aspectRatio } : {}),
...(resolution ? { resolution } : {}),
...(typeof durationSeconds === "number" ? { durationSeconds } : {}),
...(typeof normalizedDurationSeconds === "number"
? { durationSeconds: normalizedDurationSeconds }
: {}),
...(typeof requestedDurationSeconds === "number" &&
typeof normalizedDurationSeconds === "number" &&
requestedDurationSeconds !== normalizedDurationSeconds
? { requestedDurationSeconds }
: {}),
...(supportedDurationSeconds && supportedDurationSeconds.length > 0
? { supportedDurationSeconds }
: {}),
...(typeof audio === "boolean" ? { audio } : {}),
...(typeof watermark === "boolean" ? { watermark } : {}),
...(filename ? { filename } : {}),

View File

@@ -0,0 +1,54 @@
import type { VideoGenerationProvider } from "./types.js";
function normalizeSupportedDurationValues(
values: readonly number[] | undefined,
): number[] | undefined {
if (!Array.isArray(values) || values.length === 0) {
return undefined;
}
const normalized = [...new Set(values)]
.filter((value) => Number.isFinite(value) && value > 0)
.map((value) => Math.round(value))
.filter((value) => value > 0)
.toSorted((left, right) => left - right);
return normalized.length > 0 ? normalized : undefined;
}
export function resolveVideoGenerationSupportedDurations(params: {
provider?: VideoGenerationProvider;
model?: string;
}): number[] | undefined {
const caps = params.provider?.capabilities;
const model = params.model?.trim();
const modelSpecific =
model && caps?.supportedDurationSecondsByModel
? caps.supportedDurationSecondsByModel[model]
: undefined;
return normalizeSupportedDurationValues(modelSpecific ?? caps?.supportedDurationSeconds);
}
export function normalizeVideoGenerationDuration(params: {
provider?: VideoGenerationProvider;
model?: string;
durationSeconds?: number;
}): number | undefined {
if (typeof params.durationSeconds !== "number" || !Number.isFinite(params.durationSeconds)) {
return undefined;
}
const rounded = Math.max(1, Math.round(params.durationSeconds));
const supported = resolveVideoGenerationSupportedDurations(params);
if (!supported || supported.length === 0) {
return rounded;
}
return supported.reduce((best, current) => {
const currentDistance = Math.abs(current - rounded);
const bestDistance = Math.abs(best - rounded);
if (currentDistance < bestDistance) {
return current;
}
if (currentDistance === bestDistance && current > best) {
return current;
}
return best;
});
}

View File

@@ -150,6 +150,43 @@ describe("video-generation runtime", () => {
expect(mocks.listVideoGenerationProviders).toHaveBeenCalledWith({} as OpenClawConfig);
});
it("normalizes requested durations to supported provider values", async () => {
let seenDurationSeconds: number | undefined;
mocks.resolveAgentModelPrimaryValue.mockReturnValue("video-plugin/vid-v1");
mocks.getVideoGenerationProvider.mockReturnValue({
id: "video-plugin",
capabilities: {
supportedDurationSeconds: [4, 6, 8],
},
generateVideo: async (req) => {
seenDurationSeconds = req.durationSeconds;
return {
videos: [{ buffer: Buffer.from("mp4-bytes"), mimeType: "video/mp4" }],
model: "vid-v1",
};
},
});
const result = await generateVideo({
cfg: {
agents: {
defaults: {
videoGenerationModel: { primary: "video-plugin/vid-v1" },
},
},
} as OpenClawConfig,
prompt: "animate a cat",
durationSeconds: 5,
});
expect(seenDurationSeconds).toBe(6);
expect(result.metadata).toMatchObject({
requestedDurationSeconds: 5,
normalizedDurationSeconds: 6,
supportedDurationSeconds: [4, 6, 8],
});
});
it("builds a generic config hint without hardcoded provider ids", async () => {
mocks.listVideoGenerationProviders.mockReturnValue([
{

View File

@@ -8,6 +8,10 @@ import {
} from "../config/model-input.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import { getProviderEnvVars } from "../secrets/provider-env-vars.js";
import {
normalizeVideoGenerationDuration,
resolveVideoGenerationSupportedDurations,
} from "./duration-support.js";
import { parseVideoGenerationModelRef } from "./model-ref.js";
import { getVideoGenerationProvider, listVideoGenerationProviders } from "./provider-registry.js";
import type {
@@ -147,6 +151,19 @@ export async function generateVideo(
}
try {
const requestedDurationSeconds =
typeof params.durationSeconds === "number" && Number.isFinite(params.durationSeconds)
? Math.max(1, Math.round(params.durationSeconds))
: undefined;
const normalizedDurationSeconds = normalizeVideoGenerationDuration({
provider,
model: candidate.model,
durationSeconds: requestedDurationSeconds,
});
const supportedDurationSeconds = resolveVideoGenerationSupportedDurations({
provider,
model: candidate.model,
});
const result: VideoGenerationResult = await provider.generateVideo({
provider: candidate.provider,
model: candidate.model,
@@ -157,7 +174,7 @@ export async function generateVideo(
size: params.size,
aspectRatio: params.aspectRatio,
resolution: params.resolution,
durationSeconds: params.durationSeconds,
durationSeconds: normalizedDurationSeconds,
audio: params.audio,
watermark: params.watermark,
inputImages: params.inputImages,
@@ -171,7 +188,17 @@ export async function generateVideo(
provider: candidate.provider,
model: result.model ?? candidate.model,
attempts,
metadata: result.metadata,
metadata:
typeof requestedDurationSeconds === "number" &&
typeof normalizedDurationSeconds === "number" &&
requestedDurationSeconds !== normalizedDurationSeconds
? {
...result.metadata,
requestedDurationSeconds,
normalizedDurationSeconds,
...(supportedDurationSeconds ? { supportedDurationSeconds } : {}),
}
: result.metadata,
};
} catch (err) {
lastError = err;

View File

@@ -52,6 +52,8 @@ export type VideoGenerationProviderCapabilities = {
maxInputImages?: number;
maxInputVideos?: number;
maxDurationSeconds?: number;
supportedDurationSeconds?: readonly number[];
supportedDurationSecondsByModel?: Readonly<Record<string, readonly number[]>>;
supportsSize?: boolean;
supportsAspectRatio?: boolean;
supportsResolution?: boolean;