mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-04 20:45:40 +00:00
feat(video): add xai and alibaba providers
This commit is contained in:
@@ -16,6 +16,7 @@ import { isModernXaiModel, resolveXaiForwardCompatModel } from "./provider-model
|
||||
import { resolveFallbackXaiAuth } from "./src/tool-auth-shared.js";
|
||||
import { resolveEffectiveXSearchConfig } from "./src/x-search-config.js";
|
||||
import { wrapXaiProviderStream } from "./stream.js";
|
||||
import { buildXaiVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
import { createXaiWebSearchProvider } from "./web-search.js";
|
||||
|
||||
const PROVIDER_ID = "xai";
|
||||
@@ -230,6 +231,7 @@ export default defineSingleProviderPluginEntry({
|
||||
},
|
||||
register(api) {
|
||||
api.registerWebSearchProvider(createXaiWebSearchProvider());
|
||||
api.registerVideoGenerationProvider(buildXaiVideoGenerationProvider());
|
||||
api.registerTool((ctx) => createLazyCodeExecutionTool(ctx), { name: "code_execution" });
|
||||
api.registerTool((ctx) => createLazyXSearchTool(ctx), { name: "x_search" });
|
||||
},
|
||||
|
||||
@@ -77,6 +77,7 @@
|
||||
},
|
||||
"contracts": {
|
||||
"webSearchProviders": ["grok"],
|
||||
"videoGenerationProviders": ["xai"],
|
||||
"tools": ["code_execution", "x_search"]
|
||||
},
|
||||
"configSchema": {
|
||||
|
||||
10
extensions/xai/plugin-registration.contract.test.ts
Normal file
10
extensions/xai/plugin-registration.contract.test.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import { describePluginRegistrationContract } from "../../test/helpers/plugins/plugin-registration-contract.js";
|
||||
|
||||
describePluginRegistrationContract({
|
||||
pluginId: "xai",
|
||||
providerIds: ["xai"],
|
||||
webSearchProviderIds: ["grok"],
|
||||
videoGenerationProviderIds: ["xai"],
|
||||
toolNames: ["code_execution", "x_search"],
|
||||
requireGenerateVideo: true,
|
||||
});
|
||||
146
extensions/xai/video-generation-provider.test.ts
Normal file
146
extensions/xai/video-generation-provider.test.ts
Normal file
@@ -0,0 +1,146 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { buildXaiVideoGenerationProvider } from "./video-generation-provider.js";
|
||||
|
||||
const {
|
||||
resolveApiKeyForProviderMock,
|
||||
postJsonRequestMock,
|
||||
fetchWithTimeoutMock,
|
||||
assertOkOrThrowHttpErrorMock,
|
||||
resolveProviderHttpRequestConfigMock,
|
||||
} = vi.hoisted(() => ({
|
||||
resolveApiKeyForProviderMock: vi.fn(async () => ({ apiKey: "xai-key" })),
|
||||
postJsonRequestMock: vi.fn(),
|
||||
fetchWithTimeoutMock: vi.fn(),
|
||||
assertOkOrThrowHttpErrorMock: vi.fn(async () => {}),
|
||||
resolveProviderHttpRequestConfigMock: vi.fn((params) => ({
|
||||
baseUrl: params.baseUrl ?? params.defaultBaseUrl,
|
||||
allowPrivateNetwork: false,
|
||||
headers: new Headers(params.defaultHeaders),
|
||||
dispatcherPolicy: undefined,
|
||||
})),
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-auth-runtime", () => ({
|
||||
resolveApiKeyForProvider: resolveApiKeyForProviderMock,
|
||||
}));
|
||||
|
||||
vi.mock("openclaw/plugin-sdk/provider-http", () => ({
|
||||
assertOkOrThrowHttpError: assertOkOrThrowHttpErrorMock,
|
||||
fetchWithTimeout: fetchWithTimeoutMock,
|
||||
postJsonRequest: postJsonRequestMock,
|
||||
resolveProviderHttpRequestConfig: resolveProviderHttpRequestConfigMock,
|
||||
}));
|
||||
|
||||
describe("xai video generation provider", () => {
|
||||
afterEach(() => {
|
||||
resolveApiKeyForProviderMock.mockClear();
|
||||
postJsonRequestMock.mockReset();
|
||||
fetchWithTimeoutMock.mockReset();
|
||||
assertOkOrThrowHttpErrorMock.mockClear();
|
||||
resolveProviderHttpRequestConfigMock.mockClear();
|
||||
});
|
||||
|
||||
it("creates, polls, and downloads a generated video", async () => {
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
request_id: "req_123",
|
||||
}),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
fetchWithTimeoutMock
|
||||
.mockResolvedValueOnce({
|
||||
json: async () => ({
|
||||
request_id: "req_123",
|
||||
status: "done",
|
||||
video: { url: "https://cdn.x.ai/video.mp4" },
|
||||
}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
headers: new Headers({ "content-type": "video/mp4" }),
|
||||
arrayBuffer: async () => Buffer.from("mp4-bytes"),
|
||||
});
|
||||
|
||||
const provider = buildXaiVideoGenerationProvider();
|
||||
const result = await provider.generateVideo({
|
||||
provider: "xai",
|
||||
model: "grok-imagine-video",
|
||||
prompt: "A tiny robot crab crossing a moonlit tide pool",
|
||||
cfg: {},
|
||||
durationSeconds: 6,
|
||||
aspectRatio: "16:9",
|
||||
resolution: "720P",
|
||||
});
|
||||
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://api.x.ai/v1/videos/generations",
|
||||
body: expect.objectContaining({
|
||||
model: "grok-imagine-video",
|
||||
prompt: "A tiny robot crab crossing a moonlit tide pool",
|
||||
duration: 6,
|
||||
aspect_ratio: "16:9",
|
||||
resolution: "720p",
|
||||
}),
|
||||
}),
|
||||
);
|
||||
expect(fetchWithTimeoutMock).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"https://api.x.ai/v1/videos/req_123",
|
||||
expect.objectContaining({ method: "GET" }),
|
||||
120000,
|
||||
fetch,
|
||||
);
|
||||
expect(result.videos[0]?.mimeType).toBe("video/mp4");
|
||||
expect(result.metadata).toEqual(
|
||||
expect.objectContaining({
|
||||
requestId: "req_123",
|
||||
mode: "generate",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("routes video inputs to the extension endpoint when duration is set", async () => {
|
||||
postJsonRequestMock.mockResolvedValue({
|
||||
response: {
|
||||
json: async () => ({
|
||||
request_id: "req_extend",
|
||||
}),
|
||||
},
|
||||
release: vi.fn(async () => {}),
|
||||
});
|
||||
fetchWithTimeoutMock
|
||||
.mockResolvedValueOnce({
|
||||
json: async () => ({
|
||||
request_id: "req_extend",
|
||||
status: "done",
|
||||
video: { url: "https://cdn.x.ai/extended.mp4" },
|
||||
}),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
headers: new Headers({ "content-type": "video/mp4" }),
|
||||
arrayBuffer: async () => Buffer.from("extended-bytes"),
|
||||
});
|
||||
|
||||
const provider = buildXaiVideoGenerationProvider();
|
||||
await provider.generateVideo({
|
||||
provider: "xai",
|
||||
model: "grok-imagine-video",
|
||||
prompt: "Continue the shot into a neon alleyway",
|
||||
cfg: {},
|
||||
durationSeconds: 8,
|
||||
inputVideos: [{ url: "https://example.com/input.mp4" }],
|
||||
});
|
||||
|
||||
expect(postJsonRequestMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
url: "https://api.x.ai/v1/videos/extensions",
|
||||
body: expect.objectContaining({
|
||||
video: { url: "https://example.com/input.mp4" },
|
||||
duration: 8,
|
||||
}),
|
||||
}),
|
||||
);
|
||||
});
|
||||
});
|
||||
338
extensions/xai/video-generation-provider.ts
Normal file
338
extensions/xai/video-generation-provider.ts
Normal file
@@ -0,0 +1,338 @@
|
||||
import { isProviderApiKeyConfigured } from "openclaw/plugin-sdk/provider-auth";
|
||||
import { resolveApiKeyForProvider } from "openclaw/plugin-sdk/provider-auth-runtime";
|
||||
import {
|
||||
assertOkOrThrowHttpError,
|
||||
fetchWithTimeout,
|
||||
postJsonRequest,
|
||||
resolveProviderHttpRequestConfig,
|
||||
} from "openclaw/plugin-sdk/provider-http";
|
||||
import type {
|
||||
GeneratedVideoAsset,
|
||||
VideoGenerationProvider,
|
||||
VideoGenerationRequest,
|
||||
VideoGenerationSourceAsset,
|
||||
} from "openclaw/plugin-sdk/video-generation";
|
||||
|
||||
const DEFAULT_XAI_VIDEO_BASE_URL = "https://api.x.ai/v1";
|
||||
const DEFAULT_XAI_VIDEO_MODEL = "grok-imagine-video";
|
||||
const DEFAULT_TIMEOUT_MS = 120_000;
|
||||
const POLL_INTERVAL_MS = 5_000;
|
||||
const MAX_POLL_ATTEMPTS = 120;
|
||||
const XAI_VIDEO_ASPECT_RATIOS = new Set(["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3"]);
|
||||
|
||||
type XaiVideoCreateResponse = {
|
||||
request_id?: string;
|
||||
error?: {
|
||||
code?: string;
|
||||
message?: string;
|
||||
} | null;
|
||||
};
|
||||
|
||||
type XaiVideoStatusResponse = {
|
||||
request_id?: string;
|
||||
status?: "queued" | "processing" | "done" | "failed" | "expired";
|
||||
video?: {
|
||||
url?: string;
|
||||
} | null;
|
||||
error?: {
|
||||
code?: string;
|
||||
message?: string;
|
||||
} | null;
|
||||
};
|
||||
|
||||
function resolveXaiVideoBaseUrl(req: VideoGenerationRequest): string {
|
||||
return req.cfg?.models?.providers?.xai?.baseUrl?.trim() || DEFAULT_XAI_VIDEO_BASE_URL;
|
||||
}
|
||||
|
||||
function toDataUrl(buffer: Buffer, mimeType: string): string {
|
||||
return `data:${mimeType};base64,${buffer.toString("base64")}`;
|
||||
}
|
||||
|
||||
function resolveImageUrl(input: VideoGenerationSourceAsset | undefined): string | undefined {
|
||||
if (!input) {
|
||||
return undefined;
|
||||
}
|
||||
if (input.url?.trim()) {
|
||||
return input.url.trim();
|
||||
}
|
||||
if (!input.buffer) {
|
||||
throw new Error("xAI image-to-video input is missing image data.");
|
||||
}
|
||||
return toDataUrl(input.buffer, input.mimeType?.trim() || "image/png");
|
||||
}
|
||||
|
||||
function resolveInputVideoUrl(input: VideoGenerationSourceAsset | undefined): string | undefined {
|
||||
if (!input) {
|
||||
return undefined;
|
||||
}
|
||||
const url = input.url?.trim();
|
||||
if (url) {
|
||||
return url;
|
||||
}
|
||||
if (input.buffer) {
|
||||
throw new Error("xAI video editing currently requires a remote mp4 URL input.");
|
||||
}
|
||||
throw new Error("xAI video editing input is missing video data.");
|
||||
}
|
||||
|
||||
function resolveDurationSeconds(params: {
|
||||
durationSeconds?: number;
|
||||
min?: number;
|
||||
max?: number;
|
||||
}): number | undefined {
|
||||
if (typeof params.durationSeconds !== "number" || !Number.isFinite(params.durationSeconds)) {
|
||||
return undefined;
|
||||
}
|
||||
const rounded = Math.round(params.durationSeconds);
|
||||
return Math.max(params.min ?? 1, Math.min(params.max ?? 15, rounded));
|
||||
}
|
||||
|
||||
function resolveAspectRatio(value: string | undefined): string | undefined {
|
||||
const trimmed = value?.trim();
|
||||
if (!trimmed || !XAI_VIDEO_ASPECT_RATIOS.has(trimmed)) {
|
||||
return undefined;
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
function resolveResolution(value: string | undefined): "480p" | "720p" | undefined {
|
||||
if (value === "480P") {
|
||||
return "480p";
|
||||
}
|
||||
if (value === "720P" || value === "1080P") {
|
||||
return "720p";
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolveXaiVideoMode(req: VideoGenerationRequest): "generate" | "edit" | "extend" {
|
||||
const hasVideoInput = (req.inputVideos?.length ?? 0) > 0;
|
||||
if (!hasVideoInput) {
|
||||
return "generate";
|
||||
}
|
||||
return typeof resolveDurationSeconds({
|
||||
durationSeconds: req.durationSeconds,
|
||||
min: 2,
|
||||
max: 10,
|
||||
}) === "number"
|
||||
? "extend"
|
||||
: "edit";
|
||||
}
|
||||
|
||||
function buildCreateBody(req: VideoGenerationRequest): Record<string, unknown> {
|
||||
if ((req.inputImages?.length ?? 0) > 1) {
|
||||
throw new Error("xAI video generation supports at most one reference image.");
|
||||
}
|
||||
if ((req.inputVideos?.length ?? 0) > 1) {
|
||||
throw new Error("xAI video generation supports at most one input video.");
|
||||
}
|
||||
if ((req.inputImages?.length ?? 0) > 0 && (req.inputVideos?.length ?? 0) > 0) {
|
||||
throw new Error("xAI video generation does not support image and video inputs together.");
|
||||
}
|
||||
|
||||
const mode = resolveXaiVideoMode(req);
|
||||
const body: Record<string, unknown> = {
|
||||
model: req.model?.trim() || DEFAULT_XAI_VIDEO_MODEL,
|
||||
prompt: req.prompt,
|
||||
};
|
||||
|
||||
if (mode === "generate") {
|
||||
const imageUrl = resolveImageUrl(req.inputImages?.[0]);
|
||||
if (imageUrl) {
|
||||
body.image = { url: imageUrl };
|
||||
}
|
||||
const duration = resolveDurationSeconds({
|
||||
durationSeconds: req.durationSeconds,
|
||||
min: 1,
|
||||
max: 15,
|
||||
});
|
||||
if (typeof duration === "number") {
|
||||
body.duration = duration;
|
||||
}
|
||||
const aspectRatio = resolveAspectRatio(req.aspectRatio);
|
||||
if (aspectRatio) {
|
||||
body.aspect_ratio = aspectRatio;
|
||||
}
|
||||
const resolution = resolveResolution(req.resolution);
|
||||
if (resolution) {
|
||||
body.resolution = resolution;
|
||||
}
|
||||
return body;
|
||||
}
|
||||
|
||||
body.video = { url: resolveInputVideoUrl(req.inputVideos?.[0]) };
|
||||
if (mode === "extend") {
|
||||
const duration = resolveDurationSeconds({
|
||||
durationSeconds: req.durationSeconds,
|
||||
min: 2,
|
||||
max: 10,
|
||||
});
|
||||
if (typeof duration === "number") {
|
||||
body.duration = duration;
|
||||
}
|
||||
}
|
||||
return body;
|
||||
}
|
||||
|
||||
function resolveCreateEndpoint(req: VideoGenerationRequest): string {
|
||||
switch (resolveXaiVideoMode(req)) {
|
||||
case "edit":
|
||||
return "/videos/edits";
|
||||
case "extend":
|
||||
return "/videos/extensions";
|
||||
case "generate":
|
||||
default:
|
||||
return "/videos/generations";
|
||||
}
|
||||
}
|
||||
|
||||
async function pollXaiVideo(params: {
|
||||
requestId: string;
|
||||
headers: Headers;
|
||||
timeoutMs?: number;
|
||||
baseUrl: string;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<XaiVideoStatusResponse> {
|
||||
for (let attempt = 0; attempt < MAX_POLL_ATTEMPTS; attempt += 1) {
|
||||
const response = await fetchWithTimeout(
|
||||
`${params.baseUrl}/videos/${params.requestId}`,
|
||||
{
|
||||
method: "GET",
|
||||
headers: params.headers,
|
||||
},
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "xAI video status request failed");
|
||||
const payload = (await response.json()) as XaiVideoStatusResponse;
|
||||
switch (payload.status) {
|
||||
case "done":
|
||||
return payload;
|
||||
case "failed":
|
||||
case "expired":
|
||||
throw new Error(payload.error?.message?.trim() || `xAI video generation ${payload.status}`);
|
||||
case "queued":
|
||||
case "processing":
|
||||
default:
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
||||
break;
|
||||
}
|
||||
}
|
||||
throw new Error(`xAI video generation task ${params.requestId} did not finish in time`);
|
||||
}
|
||||
|
||||
async function downloadXaiVideo(params: {
|
||||
url: string;
|
||||
timeoutMs?: number;
|
||||
fetchFn: typeof fetch;
|
||||
}): Promise<GeneratedVideoAsset> {
|
||||
const response = await fetchWithTimeout(
|
||||
params.url,
|
||||
{ method: "GET" },
|
||||
params.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
||||
params.fetchFn,
|
||||
);
|
||||
await assertOkOrThrowHttpError(response, "xAI generated video download failed");
|
||||
const mimeType = response.headers.get("content-type")?.trim() || "video/mp4";
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
return {
|
||||
buffer: Buffer.from(arrayBuffer),
|
||||
mimeType,
|
||||
fileName: `video-1.${mimeType.includes("webm") ? "webm" : "mp4"}`,
|
||||
};
|
||||
}
|
||||
|
||||
export function buildXaiVideoGenerationProvider(): VideoGenerationProvider {
|
||||
return {
|
||||
id: "xai",
|
||||
label: "xAI",
|
||||
defaultModel: DEFAULT_XAI_VIDEO_MODEL,
|
||||
models: [DEFAULT_XAI_VIDEO_MODEL],
|
||||
isConfigured: ({ agentDir }) =>
|
||||
isProviderApiKeyConfigured({
|
||||
provider: "xai",
|
||||
agentDir,
|
||||
}),
|
||||
capabilities: {
|
||||
maxVideos: 1,
|
||||
maxInputImages: 1,
|
||||
maxInputVideos: 1,
|
||||
maxDurationSeconds: 15,
|
||||
supportsAspectRatio: true,
|
||||
supportsResolution: true,
|
||||
},
|
||||
async generateVideo(req) {
|
||||
const auth = await resolveApiKeyForProvider({
|
||||
provider: "xai",
|
||||
cfg: req.cfg,
|
||||
agentDir: req.agentDir,
|
||||
store: req.authStore,
|
||||
});
|
||||
if (!auth.apiKey) {
|
||||
throw new Error("xAI API key missing");
|
||||
}
|
||||
|
||||
const fetchFn = fetch;
|
||||
const { baseUrl, allowPrivateNetwork, headers, dispatcherPolicy } =
|
||||
resolveProviderHttpRequestConfig({
|
||||
baseUrl: resolveXaiVideoBaseUrl(req),
|
||||
defaultBaseUrl: DEFAULT_XAI_VIDEO_BASE_URL,
|
||||
allowPrivateNetwork: false,
|
||||
defaultHeaders: {
|
||||
Authorization: `Bearer ${auth.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
provider: "xai",
|
||||
capability: "video",
|
||||
transport: "http",
|
||||
});
|
||||
const { response, release } = await postJsonRequest({
|
||||
url: `${baseUrl}${resolveCreateEndpoint(req)}`,
|
||||
headers,
|
||||
body: buildCreateBody(req),
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
allowPrivateNetwork,
|
||||
dispatcherPolicy,
|
||||
});
|
||||
try {
|
||||
await assertOkOrThrowHttpError(response, "xAI video generation failed");
|
||||
const submitted = (await response.json()) as XaiVideoCreateResponse;
|
||||
const requestId = submitted.request_id?.trim();
|
||||
if (!requestId) {
|
||||
throw new Error(
|
||||
submitted.error?.message?.trim() || "xAI video generation response missing request_id",
|
||||
);
|
||||
}
|
||||
const completed = await pollXaiVideo({
|
||||
requestId,
|
||||
headers,
|
||||
timeoutMs: req.timeoutMs,
|
||||
baseUrl,
|
||||
fetchFn,
|
||||
});
|
||||
const videoUrl = completed.video?.url?.trim();
|
||||
if (!videoUrl) {
|
||||
throw new Error("xAI video generation completed without an output URL");
|
||||
}
|
||||
const video = await downloadXaiVideo({
|
||||
url: videoUrl,
|
||||
timeoutMs: req.timeoutMs,
|
||||
fetchFn,
|
||||
});
|
||||
return {
|
||||
videos: [video],
|
||||
model: req.model?.trim() || DEFAULT_XAI_VIDEO_MODEL,
|
||||
metadata: {
|
||||
requestId,
|
||||
status: completed.status,
|
||||
videoUrl,
|
||||
mode: resolveXaiVideoMode(req),
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
await release();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user