From ed055f44ae84e19fc1c88ea99753791adb40bee3 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 27 Mar 2026 16:36:43 +0000 Subject: [PATCH] refactor: route plugin runtime through bundled seams --- docs/.generated/plugin-sdk-api-baseline.json | 9 + docs/.generated/plugin-sdk-api-baseline.jsonl | 1 + extensions/anthropic/test-api.ts | 1 + extensions/bluebubbles/api.ts | 4 + extensions/discord/action-runtime-api.ts | 1 + .../src/actions/handle-action.guild-admin.ts | 2 +- .../discord/src/actions/handle-action.ts | 2 +- extensions/google/test-api.ts | 1 + extensions/image-generation-core/api.ts | 1 + extensions/image-generation-core/package.json | 7 + .../image-generation-core/runtime-api.ts | 6 + .../image-generation-core/src/runtime.ts | 183 ++++ extensions/matrix/src/config-schema.ts | 6 +- extensions/mattermost/src/channel.ts | 5 +- .../mattermost/src/config-schema-core.ts | 125 +++ extensions/mattermost/src/config-schema.ts | 116 +-- extensions/mattermost/src/config-surface.ts | 4 + .../media-understanding-core/package.json | 7 + .../media-understanding-core/runtime-api.ts | 9 + .../media-understanding-core/src/runtime.ts | 147 +++ extensions/msteams/test-api.ts | 1 + .../nextcloud-talk/src/config-schema.ts | 34 +- extensions/nostr/src/config-schema.ts | 8 +- extensions/nostr/test-api.ts | 1 + extensions/openai/test-api.ts | 1 + extensions/shared/config-schema-helpers.ts | 18 +- extensions/signal/reaction-runtime-api.ts | 6 + extensions/signal/src/accounts.ts | 2 +- extensions/signal/src/message-actions.ts | 2 +- extensions/slack/test-api.ts | 2 + extensions/speech-core/api.ts | 1 + extensions/speech-core/package.json | 7 + extensions/speech-core/runtime-api.ts | 33 + extensions/speech-core/src/tts.ts | 849 +++++++++++++++++ extensions/telegram/test-api.ts | 2 + extensions/tlon/test-api.ts | 1 + extensions/twitch/src/config-schema.ts | 2 +- extensions/whatsapp/test-api.ts | 2 + extensions/zalo/src/config-schema.ts | 4 +- extensions/zalouser/src/config-schema.ts | 5 +- package.json | 8 + scripts/lib/plugin-sdk-entrypoints.json | 2 + scripts/openclaw-npm-postpublish-verify.ts | 12 +- src/agents/cli-runner.test-support.ts | 6 +- src/agents/tools/tts-tool.test.ts | 30 +- .../reply/commands-system-prompt.test.ts | 21 +- src/cli/prompt.runtime.ts | 1 + src/cli/update-cli.test.ts | 2 +- src/commands/channel-test-helpers.ts | 10 +- src/cron/isolated-agent.test-setup.ts | 2 +- src/gateway/test-helpers.mocks.ts | 4 +- src/image-generation/runtime.ts | 189 +--- src/infra/binaries.runtime.ts | 1 + src/infra/env.ts | 14 +- src/infra/heartbeat-runner.test-harness.ts | 6 +- src/infra/heartbeat-runner.test-utils.ts | 2 +- .../message-action-runner.test-helpers.ts | 4 +- src/infra/outbound/targets.shared-test.ts | 2 +- src/infra/provider-usage.auth.plugin.test.ts | 6 +- src/library.test.ts | 18 +- src/library.ts | 64 +- src/media-understanding/runtime.ts | 155 +-- src/plugin-sdk/account-resolution.ts | 14 +- src/plugin-sdk/agent-config-primitives.ts | 3 + src/plugin-sdk/bluebubbles.ts | 3 +- src/plugin-sdk/channel-config-primitives.ts | 16 + .../channel-import-guardrails.test.ts | 2 +- src/plugin-sdk/channel-runtime.ts | 1 + src/plugin-sdk/compat.ts | 2 +- src/plugin-sdk/config-runtime.ts | 4 + src/plugin-sdk/image-generation-core.ts | 15 + src/plugin-sdk/image-generation-runtime.ts | 7 +- src/plugin-sdk/mattermost.ts | 1 + src/plugin-sdk/media-runtime.ts | 1 + src/plugin-sdk/media-understanding-runtime.ts | 4 +- src/plugin-sdk/signal.ts | 21 +- src/plugin-sdk/speech-core.ts | 9 + src/plugin-sdk/speech-runtime.ts | 34 +- src/plugins/capability-provider-runtime.ts | 2 +- src/plugins/contracts/registry.ts | 63 +- src/plugins/public-artifacts.ts | 15 +- .../runtime/runtime-matrix-boundary.ts | 20 +- src/plugins/runtime/runtime-matrix-surface.ts | 22 + .../runtime/runtime-whatsapp-boundary.ts | 135 +-- .../runtime/runtime-whatsapp-surface.ts | 249 +++++ src/test-utils/imessage-test-plugin.ts | 2 +- src/tts/tts.ts | 889 +----------------- 87 files changed, 2129 insertions(+), 1582 deletions(-) create mode 100644 extensions/anthropic/test-api.ts create mode 100644 extensions/discord/action-runtime-api.ts create mode 100644 extensions/google/test-api.ts create mode 100644 extensions/image-generation-core/api.ts create mode 100644 extensions/image-generation-core/package.json create mode 100644 extensions/image-generation-core/runtime-api.ts create mode 100644 extensions/image-generation-core/src/runtime.ts create mode 100644 extensions/mattermost/src/config-schema-core.ts create mode 100644 extensions/mattermost/src/config-surface.ts create mode 100644 extensions/media-understanding-core/package.json create mode 100644 extensions/media-understanding-core/runtime-api.ts create mode 100644 extensions/media-understanding-core/src/runtime.ts create mode 100644 extensions/msteams/test-api.ts create mode 100644 extensions/nostr/test-api.ts create mode 100644 extensions/signal/reaction-runtime-api.ts create mode 100644 extensions/speech-core/api.ts create mode 100644 extensions/speech-core/package.json create mode 100644 extensions/speech-core/runtime-api.ts create mode 100644 extensions/speech-core/src/tts.ts create mode 100644 extensions/tlon/test-api.ts create mode 100644 src/cli/prompt.runtime.ts create mode 100644 src/infra/binaries.runtime.ts create mode 100644 src/plugin-sdk/agent-config-primitives.ts create mode 100644 src/plugin-sdk/channel-config-primitives.ts create mode 100644 src/plugins/runtime/runtime-matrix-surface.ts create mode 100644 src/plugins/runtime/runtime-whatsapp-surface.ts diff --git a/docs/.generated/plugin-sdk-api-baseline.json b/docs/.generated/plugin-sdk-api-baseline.json index 22cab819bab..e3baed3e935 100644 --- a/docs/.generated/plugin-sdk-api-baseline.json +++ b/docs/.generated/plugin-sdk-api-baseline.json @@ -1522,6 +1522,15 @@ "path": "src/channels/plugins/normalize/whatsapp.ts" } }, + { + "declaration": "export function normalizeChannelId(raw?: string | null | undefined): ChannelId | null;", + "exportName": "normalizeChannelId", + "kind": "function", + "source": { + "line": 80, + "path": "src/channels/plugins/registry.ts" + } + }, { "declaration": "export function normalizeChatType(raw?: string | undefined): ChatType | undefined;", "exportName": "normalizeChatType", diff --git a/docs/.generated/plugin-sdk-api-baseline.jsonl b/docs/.generated/plugin-sdk-api-baseline.jsonl index 7687647b970..2f12c48f184 100644 --- a/docs/.generated/plugin-sdk-api-baseline.jsonl +++ b/docs/.generated/plugin-sdk-api-baseline.jsonl @@ -166,6 +166,7 @@ {"declaration":"export function keepHttpServerTaskAlive(params: { server: CloseAwareServer; abortSignal?: AbortSignal | undefined; onAbort?: (() => void | Promise) | undefined; }): Promise;","entrypoint":"channel-runtime","exportName":"keepHttpServerTaskAlive","importSpecifier":"openclaw/plugin-sdk/channel-runtime","kind":"function","recordType":"export","sourceLine":79,"sourcePath":"src/plugin-sdk/channel-lifecycle.ts"} {"declaration":"export function looksLikeSignalTargetId(raw: string, normalized?: string | undefined): boolean;","entrypoint":"channel-runtime","exportName":"looksLikeSignalTargetId","importSpecifier":"openclaw/plugin-sdk/channel-runtime","kind":"function","recordType":"export","sourceLine":38,"sourcePath":"src/channels/plugins/normalize/signal.ts"} {"declaration":"export function looksLikeWhatsAppTargetId(raw: string): boolean;","entrypoint":"channel-runtime","exportName":"looksLikeWhatsAppTargetId","importSpecifier":"openclaw/plugin-sdk/channel-runtime","kind":"function","recordType":"export","sourceLine":20,"sourcePath":"src/channels/plugins/normalize/whatsapp.ts"} +{"declaration":"export function normalizeChannelId(raw?: string | null | undefined): ChannelId | null;","entrypoint":"channel-runtime","exportName":"normalizeChannelId","importSpecifier":"openclaw/plugin-sdk/channel-runtime","kind":"function","recordType":"export","sourceLine":80,"sourcePath":"src/channels/plugins/registry.ts"} {"declaration":"export function normalizeChatType(raw?: string | undefined): ChatType | undefined;","entrypoint":"channel-runtime","exportName":"normalizeChatType","importSpecifier":"openclaw/plugin-sdk/channel-runtime","kind":"function","recordType":"export","sourceLine":3,"sourcePath":"src/channels/chat-type.ts"} {"declaration":"export function normalizePollDurationHours(value: number | undefined, options: { defaultHours: number; maxHours: number; }): number;","entrypoint":"channel-runtime","exportName":"normalizePollDurationHours","importSpecifier":"openclaw/plugin-sdk/channel-runtime","kind":"function","recordType":"export","sourceLine":93,"sourcePath":"src/polls.ts"} {"declaration":"export function normalizePollInput(input: PollInput, options?: NormalizePollOptions): NormalizedPollInput;","entrypoint":"channel-runtime","exportName":"normalizePollInput","importSpecifier":"openclaw/plugin-sdk/channel-runtime","kind":"function","recordType":"export","sourceLine":36,"sourcePath":"src/polls.ts"} diff --git a/extensions/anthropic/test-api.ts b/extensions/anthropic/test-api.ts new file mode 100644 index 00000000000..7d467629cfb --- /dev/null +++ b/extensions/anthropic/test-api.ts @@ -0,0 +1 @@ +export { buildAnthropicCliBackend } from "./cli-backend.js"; diff --git a/extensions/bluebubbles/api.ts b/extensions/bluebubbles/api.ts index 743a24e835f..04300488ead 100644 --- a/extensions/bluebubbles/api.ts +++ b/extensions/bluebubbles/api.ts @@ -1,2 +1,6 @@ export { bluebubblesPlugin } from "./src/channel.js"; +export { + resolveBlueBubblesGroupRequireMention, + resolveBlueBubblesGroupToolPolicy, +} from "./src/group-policy.js"; export { isAllowedBlueBubblesSender } from "./src/targets.js"; diff --git a/extensions/discord/action-runtime-api.ts b/extensions/discord/action-runtime-api.ts new file mode 100644 index 00000000000..6c3c519c0fa --- /dev/null +++ b/extensions/discord/action-runtime-api.ts @@ -0,0 +1 @@ +export { handleDiscordAction } from "./src/actions/runtime.js"; diff --git a/extensions/discord/src/actions/handle-action.guild-admin.ts b/extensions/discord/src/actions/handle-action.guild-admin.ts index fcb3cf530b6..d4befbe7166 100644 --- a/extensions/discord/src/actions/handle-action.guild-admin.ts +++ b/extensions/discord/src/actions/handle-action.guild-admin.ts @@ -6,7 +6,7 @@ import { readStringParam, } from "openclaw/plugin-sdk/agent-runtime"; import type { ChannelMessageActionContext } from "openclaw/plugin-sdk/channel-contract"; -import { handleDiscordAction } from "./runtime.js"; +import { handleDiscordAction } from "../../action-runtime-api.js"; import { isDiscordModerationAction, readDiscordModerationCommand, diff --git a/extensions/discord/src/actions/handle-action.ts b/extensions/discord/src/actions/handle-action.ts index e0f91daa668..23b41a72e19 100644 --- a/extensions/discord/src/actions/handle-action.ts +++ b/extensions/discord/src/actions/handle-action.ts @@ -8,10 +8,10 @@ import { readBooleanParam } from "openclaw/plugin-sdk/boolean-param"; import { resolveReactionMessageId } from "openclaw/plugin-sdk/channel-actions"; import type { ChannelMessageActionContext } from "openclaw/plugin-sdk/channel-contract"; import { normalizeInteractiveReply } from "openclaw/plugin-sdk/interactive-runtime"; +import { handleDiscordAction } from "../../action-runtime-api.js"; import { buildDiscordInteractiveComponents } from "../shared-interactive.js"; import { resolveDiscordChannelId } from "../targets.js"; import { tryHandleDiscordMessageActionGuildAdmin } from "./handle-action.guild-admin.js"; -import { handleDiscordAction } from "./runtime.js"; import { readDiscordParentIdParam } from "./runtime.shared.js"; const providerId = "discord"; diff --git a/extensions/google/test-api.ts b/extensions/google/test-api.ts new file mode 100644 index 00000000000..2848ab1f800 --- /dev/null +++ b/extensions/google/test-api.ts @@ -0,0 +1 @@ +export { buildGoogleGeminiCliBackend } from "./cli-backend.js"; diff --git a/extensions/image-generation-core/api.ts b/extensions/image-generation-core/api.ts new file mode 100644 index 00000000000..338b02592b1 --- /dev/null +++ b/extensions/image-generation-core/api.ts @@ -0,0 +1 @@ +export * from "openclaw/plugin-sdk/image-generation-core"; diff --git a/extensions/image-generation-core/package.json b/extensions/image-generation-core/package.json new file mode 100644 index 00000000000..b9830afeff5 --- /dev/null +++ b/extensions/image-generation-core/package.json @@ -0,0 +1,7 @@ +{ + "name": "@openclaw/image-generation-core", + "version": "2026.3.26", + "private": true, + "description": "OpenClaw image generation runtime package", + "type": "module" +} diff --git a/extensions/image-generation-core/runtime-api.ts b/extensions/image-generation-core/runtime-api.ts new file mode 100644 index 00000000000..1d82b764b40 --- /dev/null +++ b/extensions/image-generation-core/runtime-api.ts @@ -0,0 +1,6 @@ +export { + generateImage, + listRuntimeImageGenerationProviders, + type GenerateImageParams, + type GenerateImageRuntimeResult, +} from "./src/runtime.js"; diff --git a/extensions/image-generation-core/src/runtime.ts b/extensions/image-generation-core/src/runtime.ts new file mode 100644 index 00000000000..2977b32a5cc --- /dev/null +++ b/extensions/image-generation-core/src/runtime.ts @@ -0,0 +1,183 @@ +import { + createSubsystemLogger, + describeFailoverError, + getImageGenerationProvider, + getProviderEnvVars, + isFailoverError, + listImageGenerationProviders, + parseImageGenerationModelRef, + resolveAgentModelFallbackValues, + resolveAgentModelPrimaryValue, + type AuthProfileStore, + type FallbackAttempt, + type GeneratedImageAsset, + type ImageGenerationResolution, + type ImageGenerationResult, + type ImageGenerationSourceImage, + type OpenClawConfig, +} from "../api.js"; + +const log = createSubsystemLogger("image-generation"); + +export type GenerateImageParams = { + cfg: OpenClawConfig; + prompt: string; + agentDir?: string; + authStore?: AuthProfileStore; + modelOverride?: string; + count?: number; + size?: string; + aspectRatio?: string; + resolution?: ImageGenerationResolution; + inputImages?: ImageGenerationSourceImage[]; +}; + +export type GenerateImageRuntimeResult = { + images: GeneratedImageAsset[]; + provider: string; + model: string; + attempts: FallbackAttempt[]; + metadata?: Record; +}; + +function resolveImageGenerationCandidates(params: { + cfg: OpenClawConfig; + modelOverride?: string; +}): Array<{ provider: string; model: string }> { + const candidates: Array<{ provider: string; model: string }> = []; + const seen = new Set(); + const add = (raw: string | undefined) => { + const parsed = parseImageGenerationModelRef(raw); + if (!parsed) { + return; + } + const key = `${parsed.provider}/${parsed.model}`; + if (seen.has(key)) { + return; + } + seen.add(key); + candidates.push(parsed); + }; + + add(params.modelOverride); + add(resolveAgentModelPrimaryValue(params.cfg.agents?.defaults?.imageGenerationModel)); + for (const fallback of resolveAgentModelFallbackValues( + params.cfg.agents?.defaults?.imageGenerationModel, + )) { + add(fallback); + } + return candidates; +} + +function throwImageGenerationFailure(params: { + attempts: FallbackAttempt[]; + lastError: unknown; +}): never { + if (params.attempts.length <= 1 && params.lastError) { + throw params.lastError; + } + const summary = + params.attempts.length > 0 + ? params.attempts + .map((attempt) => `${attempt.provider}/${attempt.model}: ${attempt.error}`) + .join(" | ") + : "unknown"; + throw new Error(`All image generation models failed (${params.attempts.length}): ${summary}`, { + cause: params.lastError instanceof Error ? params.lastError : undefined, + }); +} + +function buildNoImageGenerationModelConfiguredMessage(cfg: OpenClawConfig): string { + const providers = listImageGenerationProviders(cfg); + const sampleModel = + providers.find((provider) => provider.defaultModel) ?? + ({ id: "google", defaultModel: "gemini-3-pro-image-preview" } as const); + const authHints = providers + .flatMap((provider) => { + const envVars = getProviderEnvVars(provider.id); + if (envVars.length === 0) { + return []; + } + return [`${provider.id}: ${envVars.join(" / ")}`]; + }) + .slice(0, 3); + return [ + `No image-generation model configured. Set agents.defaults.imageGenerationModel.primary to a provider/model like "${sampleModel.id}/${sampleModel.defaultModel}".`, + authHints.length > 0 + ? `If you want a specific provider, also configure that provider's auth/API key first (${authHints.join("; ")}).` + : "If you want a specific provider, also configure that provider's auth/API key first.", + ].join(" "); +} + +export function listRuntimeImageGenerationProviders(params?: { config?: OpenClawConfig }) { + return listImageGenerationProviders(params?.config); +} + +export async function generateImage( + params: GenerateImageParams, +): Promise { + const candidates = resolveImageGenerationCandidates({ + cfg: params.cfg, + modelOverride: params.modelOverride, + }); + if (candidates.length === 0) { + throw new Error(buildNoImageGenerationModelConfiguredMessage(params.cfg)); + } + + const attempts: FallbackAttempt[] = []; + let lastError: unknown; + + for (const candidate of candidates) { + const provider = getImageGenerationProvider(candidate.provider, params.cfg); + if (!provider) { + const error = `No image-generation provider registered for ${candidate.provider}`; + attempts.push({ + provider: candidate.provider, + model: candidate.model, + error, + }); + lastError = new Error(error); + continue; + } + + try { + const result: ImageGenerationResult = await provider.generateImage({ + provider: candidate.provider, + model: candidate.model, + prompt: params.prompt, + cfg: params.cfg, + agentDir: params.agentDir, + authStore: params.authStore, + count: params.count, + size: params.size, + aspectRatio: params.aspectRatio, + resolution: params.resolution, + inputImages: params.inputImages, + }); + if (!Array.isArray(result.images) || result.images.length === 0) { + throw new Error("Image generation provider returned no images."); + } + return { + images: result.images, + provider: candidate.provider, + model: result.model ?? candidate.model, + attempts, + metadata: result.metadata, + }; + } catch (err) { + lastError = err; + const described = isFailoverError(err) ? describeFailoverError(err) : undefined; + attempts.push({ + provider: candidate.provider, + model: candidate.model, + error: described?.message ?? (err instanceof Error ? err.message : String(err)), + reason: described?.reason, + status: described?.status, + code: described?.code, + }); + log.debug(`image-generation candidate failed: ${candidate.provider}/${candidate.model}`); + } + } + + throwImageGenerationFailure({ attempts, lastError }); +} diff --git a/extensions/matrix/src/config-schema.ts b/extensions/matrix/src/config-schema.ts index dcae75d52cd..1ffd5622955 100644 --- a/extensions/matrix/src/config-schema.ts +++ b/extensions/matrix/src/config-schema.ts @@ -1,11 +1,13 @@ +import { ToolPolicySchema } from "openclaw/plugin-sdk/agent-config-primitives"; import { AllowFromListSchema, buildNestedDmConfigSchema, DmPolicySchema, GroupPolicySchema, -} from "openclaw/plugin-sdk/channel-config-schema"; + MarkdownConfigSchema, +} from "openclaw/plugin-sdk/channel-config-primitives"; +import { buildSecretInputSchema } from "openclaw/plugin-sdk/secret-input"; import { z } from "openclaw/plugin-sdk/zod"; -import { buildSecretInputSchema, MarkdownConfigSchema, ToolPolicySchema } from "./runtime-api.js"; const matrixActionSchema = z .object({ diff --git a/extensions/mattermost/src/channel.ts b/extensions/mattermost/src/channel.ts index 790be122f77..72f16e3109e 100644 --- a/extensions/mattermost/src/channel.ts +++ b/extensions/mattermost/src/channel.ts @@ -20,7 +20,7 @@ import { createComputedAccountStatusAdapter, createDefaultChannelRuntimeState, } from "openclaw/plugin-sdk/status-helpers"; -import { MattermostConfigSchema } from "./config-schema.js"; +import { MattermostChannelConfigSchema } from "./config-surface.js"; import { resolveMattermostGroupRequireMention } from "./group-mentions.js"; import { listMattermostAccountIds, @@ -40,7 +40,6 @@ import { sendMessageMattermost } from "./mattermost/send.js"; import { resolveMattermostOpaqueTarget } from "./mattermost/target-resolution.js"; import { looksLikeMattermostTargetId, normalizeMattermostMessagingTarget } from "./normalize.js"; import { - buildChannelConfigSchema, createAccountStatusSink, DEFAULT_ACCOUNT_ID, resolveAllowlistProviderRuntimeGroupPolicy, @@ -306,7 +305,7 @@ export const mattermostPlugin: ChannelPlugin = create blockStreamingCoalesceDefaults: { minChars: 1500, idleMs: 1000 }, }, reload: { configPrefixes: ["channels.mattermost"] }, - configSchema: buildChannelConfigSchema(MattermostConfigSchema), + configSchema: MattermostChannelConfigSchema, config: { ...mattermostConfigAdapter, isConfigured: (account) => Boolean(account.botToken && account.baseUrl), diff --git a/extensions/mattermost/src/config-schema-core.ts b/extensions/mattermost/src/config-schema-core.ts new file mode 100644 index 00000000000..8247a44b3b4 --- /dev/null +++ b/extensions/mattermost/src/config-schema-core.ts @@ -0,0 +1,125 @@ +import { + BlockStreamingCoalesceSchema, + DmPolicySchema, + GroupPolicySchema, + MarkdownConfigSchema, + requireOpenAllowFrom, +} from "openclaw/plugin-sdk/channel-config-primitives"; +import { z } from "openclaw/plugin-sdk/zod"; +import { buildSecretInputSchema } from "./secret-input.js"; + +function requireMattermostOpenAllowFrom(params: { + policy?: string; + allowFrom?: Array; + ctx: z.RefinementCtx; +}) { + requireOpenAllowFrom({ + policy: params.policy, + allowFrom: params.allowFrom, + ctx: params.ctx, + path: ["allowFrom"], + message: + 'channels.mattermost.dmPolicy="open" requires channels.mattermost.allowFrom to include "*"', + }); +} + +const DmChannelRetrySchema = z + .object({ + /** Maximum number of retry attempts for DM channel creation (default: 3) */ + maxRetries: z.number().int().min(0).max(10).optional(), + /** Initial delay in milliseconds before first retry (default: 1000) */ + initialDelayMs: z.number().int().min(100).max(60000).optional(), + /** Maximum delay in milliseconds between retries (default: 10000) */ + maxDelayMs: z.number().int().min(1000).max(60000).optional(), + /** Timeout for each individual DM channel creation request in milliseconds (default: 30000) */ + timeoutMs: z.number().int().min(5000).max(120000).optional(), + }) + .strict() + .refine( + (data) => { + if (data.initialDelayMs !== undefined && data.maxDelayMs !== undefined) { + return data.initialDelayMs <= data.maxDelayMs; + } + return true; + }, + { + message: "initialDelayMs must be less than or equal to maxDelayMs", + path: ["initialDelayMs"], + }, + ) + .optional(); + +const MattermostSlashCommandsSchema = z + .object({ + /** Enable native slash commands. "auto" resolves to false (opt-in). */ + native: z.union([z.boolean(), z.literal("auto")]).optional(), + /** Also register skill-based commands. */ + nativeSkills: z.union([z.boolean(), z.literal("auto")]).optional(), + /** Path for the callback endpoint on the gateway HTTP server. */ + callbackPath: z.string().optional(), + /** Explicit callback URL (e.g. behind reverse proxy). */ + callbackUrl: z.string().optional(), + }) + .strict() + .optional(); + +const MattermostAccountSchemaBase = z + .object({ + name: z.string().optional(), + capabilities: z.array(z.string()).optional(), + dangerouslyAllowNameMatching: z.boolean().optional(), + markdown: MarkdownConfigSchema, + enabled: z.boolean().optional(), + configWrites: z.boolean().optional(), + botToken: buildSecretInputSchema().optional(), + baseUrl: z.string().optional(), + chatmode: z.enum(["oncall", "onmessage", "onchar"]).optional(), + oncharPrefixes: z.array(z.string()).optional(), + requireMention: z.boolean().optional(), + dmPolicy: DmPolicySchema.optional().default("pairing"), + allowFrom: z.array(z.union([z.string(), z.number()])).optional(), + groupAllowFrom: z.array(z.union([z.string(), z.number()])).optional(), + groupPolicy: GroupPolicySchema.optional().default("allowlist"), + textChunkLimit: z.number().int().positive().optional(), + chunkMode: z.enum(["length", "newline"]).optional(), + blockStreaming: z.boolean().optional(), + blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), + replyToMode: z.enum(["off", "first", "all"]).optional(), + responsePrefix: z.string().optional(), + actions: z + .object({ + reactions: z.boolean().optional(), + }) + .optional(), + commands: MattermostSlashCommandsSchema, + interactions: z + .object({ + callbackBaseUrl: z.string().optional(), + allowedSourceIps: z.array(z.string()).optional(), + }) + .optional(), + /** Allow fetching from private/internal IP addresses (e.g. localhost). Required for self-hosted Mattermost on LAN/VPN. */ + allowPrivateNetwork: z.boolean().optional(), + /** Retry configuration for DM channel creation */ + dmChannelRetry: DmChannelRetrySchema, + }) + .strict(); + +const MattermostAccountSchema = MattermostAccountSchemaBase.superRefine((value, ctx) => { + requireMattermostOpenAllowFrom({ + policy: value.dmPolicy, + allowFrom: value.allowFrom, + ctx, + }); +}); + +export const MattermostConfigSchema = MattermostAccountSchemaBase.extend({ + accounts: z.record(z.string(), MattermostAccountSchema.optional()).optional(), + defaultAccount: z.string().optional(), +}).superRefine((value, ctx) => { + requireMattermostOpenAllowFrom({ + policy: value.dmPolicy, + allowFrom: value.allowFrom, + ctx, + }); +}); diff --git a/extensions/mattermost/src/config-schema.ts b/extensions/mattermost/src/config-schema.ts index a39ba6a13a8..97f44dabc49 100644 --- a/extensions/mattermost/src/config-schema.ts +++ b/extensions/mattermost/src/config-schema.ts @@ -1,115 +1 @@ -import { requireChannelOpenAllowFrom } from "openclaw/plugin-sdk/extension-shared"; -import { z } from "openclaw/plugin-sdk/zod"; -import { - BlockStreamingCoalesceSchema, - DmPolicySchema, - GroupPolicySchema, - MarkdownConfigSchema, - requireOpenAllowFrom, -} from "./runtime-api.js"; -import { buildSecretInputSchema } from "./secret-input.js"; - -const DmChannelRetrySchema = z - .object({ - /** Maximum number of retry attempts for DM channel creation (default: 3) */ - maxRetries: z.number().int().min(0).max(10).optional(), - /** Initial delay in milliseconds before first retry (default: 1000) */ - initialDelayMs: z.number().int().min(100).max(60000).optional(), - /** Maximum delay in milliseconds between retries (default: 10000) */ - maxDelayMs: z.number().int().min(1000).max(60000).optional(), - /** Timeout for each individual DM channel creation request in milliseconds (default: 30000) */ - timeoutMs: z.number().int().min(5000).max(120000).optional(), - }) - .strict() - .refine( - (data) => { - if (data.initialDelayMs !== undefined && data.maxDelayMs !== undefined) { - return data.initialDelayMs <= data.maxDelayMs; - } - return true; - }, - { - message: "initialDelayMs must be less than or equal to maxDelayMs", - path: ["initialDelayMs"], - }, - ) - .optional(); - -const MattermostSlashCommandsSchema = z - .object({ - /** Enable native slash commands. "auto" resolves to false (opt-in). */ - native: z.union([z.boolean(), z.literal("auto")]).optional(), - /** Also register skill-based commands. */ - nativeSkills: z.union([z.boolean(), z.literal("auto")]).optional(), - /** Path for the callback endpoint on the gateway HTTP server. */ - callbackPath: z.string().optional(), - /** Explicit callback URL (e.g. behind reverse proxy). */ - callbackUrl: z.string().optional(), - }) - .strict() - .optional(); - -const MattermostAccountSchemaBase = z - .object({ - name: z.string().optional(), - capabilities: z.array(z.string()).optional(), - dangerouslyAllowNameMatching: z.boolean().optional(), - markdown: MarkdownConfigSchema, - enabled: z.boolean().optional(), - configWrites: z.boolean().optional(), - botToken: buildSecretInputSchema().optional(), - baseUrl: z.string().optional(), - chatmode: z.enum(["oncall", "onmessage", "onchar"]).optional(), - oncharPrefixes: z.array(z.string()).optional(), - requireMention: z.boolean().optional(), - dmPolicy: DmPolicySchema.optional().default("pairing"), - allowFrom: z.array(z.union([z.string(), z.number()])).optional(), - groupAllowFrom: z.array(z.union([z.string(), z.number()])).optional(), - groupPolicy: GroupPolicySchema.optional().default("allowlist"), - textChunkLimit: z.number().int().positive().optional(), - chunkMode: z.enum(["length", "newline"]).optional(), - blockStreaming: z.boolean().optional(), - blockStreamingCoalesce: BlockStreamingCoalesceSchema.optional(), - replyToMode: z.enum(["off", "first", "all"]).optional(), - responsePrefix: z.string().optional(), - actions: z - .object({ - reactions: z.boolean().optional(), - }) - .optional(), - commands: MattermostSlashCommandsSchema, - interactions: z - .object({ - callbackBaseUrl: z.string().optional(), - allowedSourceIps: z.array(z.string()).optional(), - }) - .optional(), - /** Allow fetching from private/internal IP addresses (e.g. localhost). Required for self-hosted Mattermost on LAN/VPN. */ - allowPrivateNetwork: z.boolean().optional(), - /** Retry configuration for DM channel creation */ - dmChannelRetry: DmChannelRetrySchema, - }) - .strict(); - -const MattermostAccountSchema = MattermostAccountSchemaBase.superRefine((value, ctx) => { - requireChannelOpenAllowFrom({ - channel: "mattermost", - policy: value.dmPolicy, - allowFrom: value.allowFrom, - ctx, - requireOpenAllowFrom, - }); -}); - -export const MattermostConfigSchema = MattermostAccountSchemaBase.extend({ - accounts: z.record(z.string(), MattermostAccountSchema.optional()).optional(), - defaultAccount: z.string().optional(), -}).superRefine((value, ctx) => { - requireChannelOpenAllowFrom({ - channel: "mattermost", - policy: value.dmPolicy, - allowFrom: value.allowFrom, - ctx, - requireOpenAllowFrom, - }); -}); +export { MattermostConfigSchema } from "./config-schema-core.js"; diff --git a/extensions/mattermost/src/config-surface.ts b/extensions/mattermost/src/config-surface.ts new file mode 100644 index 00000000000..2785b1901ba --- /dev/null +++ b/extensions/mattermost/src/config-surface.ts @@ -0,0 +1,4 @@ +import { buildChannelConfigSchema } from "openclaw/plugin-sdk/channel-config-primitives"; +import { MattermostConfigSchema } from "./config-schema-core.js"; + +export const MattermostChannelConfigSchema = buildChannelConfigSchema(MattermostConfigSchema); diff --git a/extensions/media-understanding-core/package.json b/extensions/media-understanding-core/package.json new file mode 100644 index 00000000000..6636694c4bb --- /dev/null +++ b/extensions/media-understanding-core/package.json @@ -0,0 +1,7 @@ +{ + "name": "@openclaw/media-understanding-core", + "version": "2026.3.26", + "private": true, + "description": "OpenClaw media understanding runtime package", + "type": "module" +} diff --git a/extensions/media-understanding-core/runtime-api.ts b/extensions/media-understanding-core/runtime-api.ts new file mode 100644 index 00000000000..990b84fd348 --- /dev/null +++ b/extensions/media-understanding-core/runtime-api.ts @@ -0,0 +1,9 @@ +export { + describeImageFile, + describeImageFileWithModel, + describeVideoFile, + runMediaUnderstandingFile, + transcribeAudioFile, + type RunMediaUnderstandingFileParams, + type RunMediaUnderstandingFileResult, +} from "./src/runtime.js"; diff --git a/extensions/media-understanding-core/src/runtime.ts b/extensions/media-understanding-core/src/runtime.ts new file mode 100644 index 00000000000..0c6fc935062 --- /dev/null +++ b/extensions/media-understanding-core/src/runtime.ts @@ -0,0 +1,147 @@ +import fs from "node:fs/promises"; +import path from "node:path"; +import type { OpenClawConfig } from "openclaw/plugin-sdk/core"; +import { + buildProviderRegistry, + createMediaAttachmentCache, + normalizeMediaAttachments, + normalizeMediaProviderId, + runCapability, + type ActiveMediaModel, +} from "openclaw/plugin-sdk/media-runtime"; + +type MediaUnderstandingCapability = "image" | "audio" | "video"; +type MediaUnderstandingOutput = Awaited>["outputs"][number]; + +const KIND_BY_CAPABILITY: Record = { + audio: "audio.transcription", + image: "image.description", + video: "video.description", +}; + +export type RunMediaUnderstandingFileParams = { + capability: MediaUnderstandingCapability; + filePath: string; + cfg: OpenClawConfig; + agentDir?: string; + mime?: string; + activeModel?: ActiveMediaModel; +}; + +export type RunMediaUnderstandingFileResult = { + text: string | undefined; + provider?: string; + model?: string; + output?: MediaUnderstandingOutput; +}; + +function buildFileContext(params: { filePath: string; mime?: string }) { + return { + MediaPath: params.filePath, + MediaType: params.mime, + }; +} + +export async function runMediaUnderstandingFile( + params: RunMediaUnderstandingFileParams, +): Promise { + const ctx = buildFileContext(params); + const attachments = normalizeMediaAttachments(ctx); + if (attachments.length === 0) { + return { text: undefined }; + } + + const providerRegistry = buildProviderRegistry(undefined, params.cfg); + const cache = createMediaAttachmentCache(attachments, { + localPathRoots: [path.dirname(params.filePath)], + }); + + try { + const result = await runCapability({ + capability: params.capability, + cfg: params.cfg, + ctx, + attachments: cache, + media: attachments, + agentDir: params.agentDir, + providerRegistry, + config: params.cfg.tools?.media?.[params.capability], + activeModel: params.activeModel, + }); + const output = result.outputs.find( + (entry) => entry.kind === KIND_BY_CAPABILITY[params.capability], + ); + const text = output?.text?.trim(); + return { + text: text || undefined, + provider: output?.provider, + model: output?.model, + output, + }; + } finally { + await cache.cleanup(); + } +} + +export async function describeImageFile(params: { + filePath: string; + cfg: OpenClawConfig; + agentDir?: string; + mime?: string; + activeModel?: ActiveMediaModel; +}): Promise { + return await runMediaUnderstandingFile({ ...params, capability: "image" }); +} + +export async function describeImageFileWithModel(params: { + filePath: string; + cfg: OpenClawConfig; + agentDir?: string; + mime?: string; + provider: string; + model: string; + prompt: string; + maxTokens?: number; + timeoutMs?: number; +}) { + const timeoutMs = params.timeoutMs ?? 30_000; + const providerRegistry = buildProviderRegistry(undefined, params.cfg); + const provider = providerRegistry.get(normalizeMediaProviderId(params.provider)); + if (!provider?.describeImage) { + throw new Error(`Provider does not support image analysis: ${params.provider}`); + } + const buffer = await fs.readFile(params.filePath); + return await provider.describeImage({ + buffer, + fileName: path.basename(params.filePath), + mime: params.mime, + provider: params.provider, + model: params.model, + prompt: params.prompt, + maxTokens: params.maxTokens, + timeoutMs, + cfg: params.cfg, + agentDir: params.agentDir ?? "", + }); +} + +export async function describeVideoFile(params: { + filePath: string; + cfg: OpenClawConfig; + agentDir?: string; + mime?: string; + activeModel?: ActiveMediaModel; +}): Promise { + return await runMediaUnderstandingFile({ ...params, capability: "video" }); +} + +export async function transcribeAudioFile(params: { + filePath: string; + cfg: OpenClawConfig; + agentDir?: string; + mime?: string; + activeModel?: ActiveMediaModel; +}): Promise<{ text: string | undefined }> { + const result = await runMediaUnderstandingFile({ ...params, capability: "audio" }); + return { text: result.text }; +} diff --git a/extensions/msteams/test-api.ts b/extensions/msteams/test-api.ts new file mode 100644 index 00000000000..36c93f4ea87 --- /dev/null +++ b/extensions/msteams/test-api.ts @@ -0,0 +1 @@ +export { msteamsPlugin } from "./src/channel.js"; diff --git a/extensions/nextcloud-talk/src/config-schema.ts b/extensions/nextcloud-talk/src/config-schema.ts index 0414d2e04bd..65367b29fb1 100644 --- a/extensions/nextcloud-talk/src/config-schema.ts +++ b/extensions/nextcloud-talk/src/config-schema.ts @@ -1,17 +1,33 @@ -import { requireChannelOpenAllowFrom } from "openclaw/plugin-sdk/extension-shared"; -import { z } from "openclaw/plugin-sdk/zod"; +import { + ReplyRuntimeConfigSchemaShape, + ToolPolicySchema, +} from "openclaw/plugin-sdk/agent-config-primitives"; import { BlockStreamingCoalesceSchema, DmConfigSchema, DmPolicySchema, GroupPolicySchema, MarkdownConfigSchema, - ReplyRuntimeConfigSchemaShape, - ToolPolicySchema, requireOpenAllowFrom, -} from "../runtime-api.js"; +} from "openclaw/plugin-sdk/channel-config-primitives"; +import { z } from "openclaw/plugin-sdk/zod"; import { buildSecretInputSchema } from "./secret-input.js"; +function requireNextcloudTalkOpenAllowFrom(params: { + policy?: string; + allowFrom?: string[]; + ctx: z.RefinementCtx; +}) { + requireOpenAllowFrom({ + policy: params.policy, + allowFrom: params.allowFrom, + ctx: params.ctx, + path: ["allowFrom"], + message: + 'channels.nextcloud-talk.dmPolicy="open" requires channels.nextcloud-talk.allowFrom to include "*"', + }); +} + export const NextcloudTalkRoomSchema = z .object({ requireMention: z.boolean().optional(), @@ -51,12 +67,10 @@ export const NextcloudTalkAccountSchemaBase = z export const NextcloudTalkAccountSchema = NextcloudTalkAccountSchemaBase.superRefine( (value, ctx) => { - requireChannelOpenAllowFrom({ - channel: "nextcloud-talk", + requireNextcloudTalkOpenAllowFrom({ policy: value.dmPolicy, allowFrom: value.allowFrom, ctx, - requireOpenAllowFrom, }); }, ); @@ -65,11 +79,9 @@ export const NextcloudTalkConfigSchema = NextcloudTalkAccountSchemaBase.extend({ accounts: z.record(z.string(), NextcloudTalkAccountSchema.optional()).optional(), defaultAccount: z.string().optional(), }).superRefine((value, ctx) => { - requireChannelOpenAllowFrom({ - channel: "nextcloud-talk", + requireNextcloudTalkOpenAllowFrom({ policy: value.dmPolicy, allowFrom: value.allowFrom, ctx, - requireOpenAllowFrom, }); }); diff --git a/extensions/nostr/src/config-schema.ts b/extensions/nostr/src/config-schema.ts index bec1ae130fc..dc6b6d8133a 100644 --- a/extensions/nostr/src/config-schema.ts +++ b/extensions/nostr/src/config-schema.ts @@ -1,6 +1,10 @@ -import { AllowFromListSchema, DmPolicySchema } from "openclaw/plugin-sdk/channel-config-schema"; +import { + AllowFromListSchema, + buildChannelConfigSchema, + DmPolicySchema, + MarkdownConfigSchema, +} from "openclaw/plugin-sdk/channel-config-primitives"; import { z } from "openclaw/plugin-sdk/zod"; -import { MarkdownConfigSchema, buildChannelConfigSchema } from "../api.js"; /** * Validates https:// URLs only (no javascript:, data:, file:, etc.) diff --git a/extensions/nostr/test-api.ts b/extensions/nostr/test-api.ts new file mode 100644 index 00000000000..3cdf86a5120 --- /dev/null +++ b/extensions/nostr/test-api.ts @@ -0,0 +1 @@ +export { nostrPlugin } from "./src/channel.js"; diff --git a/extensions/openai/test-api.ts b/extensions/openai/test-api.ts index 4dda287d5d5..4810e1af92d 100644 --- a/extensions/openai/test-api.ts +++ b/extensions/openai/test-api.ts @@ -1 +1,2 @@ +export { buildOpenAICodexCliBackend } from "./cli-backend.js"; export { buildOpenAISpeechProvider } from "./speech-provider.js"; diff --git a/extensions/shared/config-schema-helpers.ts b/extensions/shared/config-schema-helpers.ts index fb077a0b1f5..06a9d2a4f41 100644 --- a/extensions/shared/config-schema-helpers.ts +++ b/extensions/shared/config-schema-helpers.ts @@ -1 +1,17 @@ -export { requireChannelOpenAllowFrom } from "openclaw/plugin-sdk/extension-shared"; +import { requireOpenAllowFrom } from "openclaw/plugin-sdk/channel-config-primitives"; +import type { z } from "openclaw/plugin-sdk/zod"; + +export function requireChannelOpenAllowFrom(params: { + channel: string; + policy?: string; + allowFrom?: Array; + ctx: z.RefinementCtx; +}) { + requireOpenAllowFrom({ + policy: params.policy, + allowFrom: params.allowFrom, + ctx: params.ctx, + path: ["allowFrom"], + message: `channels.${params.channel}.dmPolicy="open" requires channels.${params.channel}.allowFrom to include "*"`, + }); +} diff --git a/extensions/signal/reaction-runtime-api.ts b/extensions/signal/reaction-runtime-api.ts new file mode 100644 index 00000000000..e02f9bf54f2 --- /dev/null +++ b/extensions/signal/reaction-runtime-api.ts @@ -0,0 +1,6 @@ +export { + removeReactionSignal, + sendReactionSignal, + type SignalReactionOpts, + type SignalReactionResult, +} from "./src/send-reactions.js"; diff --git a/extensions/signal/src/accounts.ts b/extensions/signal/src/accounts.ts index 020bf4fac45..da5c3509f7a 100644 --- a/extensions/signal/src/accounts.ts +++ b/extensions/signal/src/accounts.ts @@ -4,7 +4,7 @@ import { resolveMergedAccountConfig, type OpenClawConfig, } from "openclaw/plugin-sdk/account-resolution"; -import type { SignalAccountConfig } from "openclaw/plugin-sdk/signal"; +import type { SignalAccountConfig } from "openclaw/plugin-sdk/signal-core"; export type ResolvedSignalAccount = { accountId: string; diff --git a/extensions/signal/src/message-actions.ts b/extensions/signal/src/message-actions.ts index 2645908f3e9..ca3d9ffd717 100644 --- a/extensions/signal/src/message-actions.ts +++ b/extensions/signal/src/message-actions.ts @@ -4,9 +4,9 @@ import type { ChannelMessageActionAdapter, ChannelMessageActionName, } from "openclaw/plugin-sdk/channel-contract"; +import { removeReactionSignal, sendReactionSignal } from "../reaction-runtime-api.js"; import { listEnabledSignalAccounts, resolveSignalAccount } from "./accounts.js"; import { resolveSignalReactionLevel } from "./reaction-level.js"; -import { removeReactionSignal, sendReactionSignal } from "./send-reactions.js"; const providerId = "signal"; const GROUP_PREFIX = "group:"; diff --git a/extensions/slack/test-api.ts b/extensions/slack/test-api.ts index 8d8de2e7e55..5e398104009 100644 --- a/extensions/slack/test-api.ts +++ b/extensions/slack/test-api.ts @@ -1,5 +1,7 @@ export type { ResolvedSlackAccount } from "./src/accounts.js"; export type { SlackMessageEvent } from "./src/types.js"; +export { slackPlugin } from "./src/channel.js"; +export { setSlackRuntime } from "./src/runtime.js"; export { createSlackActions } from "./src/channel-actions.js"; export { prepareSlackMessage } from "./src/monitor/message-handler/prepare.js"; export { createInboundSlackTestContext } from "./src/monitor/message-handler/prepare.test-helpers.js"; diff --git a/extensions/speech-core/api.ts b/extensions/speech-core/api.ts new file mode 100644 index 00000000000..258c5245f73 --- /dev/null +++ b/extensions/speech-core/api.ts @@ -0,0 +1 @@ +export * from "openclaw/plugin-sdk/speech-core"; diff --git a/extensions/speech-core/package.json b/extensions/speech-core/package.json new file mode 100644 index 00000000000..b37738b19af --- /dev/null +++ b/extensions/speech-core/package.json @@ -0,0 +1,7 @@ +{ + "name": "@openclaw/speech-core", + "version": "2026.3.26", + "private": true, + "description": "OpenClaw speech runtime package", + "type": "module" +} diff --git a/extensions/speech-core/runtime-api.ts b/extensions/speech-core/runtime-api.ts new file mode 100644 index 00000000000..0a58b4cefa3 --- /dev/null +++ b/extensions/speech-core/runtime-api.ts @@ -0,0 +1,33 @@ +export { + buildTtsSystemPromptHint, + getLastTtsAttempt, + getResolvedSpeechProviderConfig, + getTtsMaxLength, + getTtsProvider, + isSummarizationEnabled, + isTtsEnabled, + isTtsProviderConfigured, + listSpeechVoices, + maybeApplyTtsToPayload, + resolveTtsAutoMode, + resolveTtsConfig, + resolveTtsPrefsPath, + resolveTtsProviderOrder, + setLastTtsAttempt, + setSummarizationEnabled, + setTtsAutoMode, + setTtsEnabled, + setTtsMaxLength, + setTtsProvider, + synthesizeSpeech, + textToSpeech, + textToSpeechTelephony, + _test, + type ResolvedTtsConfig, + type ResolvedTtsModelOverrides, + type TtsDirectiveOverrides, + type TtsDirectiveParseResult, + type TtsResult, + type TtsSynthesisResult, + type TtsTelephonyResult, +} from "./src/tts.js"; diff --git a/extensions/speech-core/src/tts.ts b/extensions/speech-core/src/tts.ts new file mode 100644 index 00000000000..2f01c7ebd51 --- /dev/null +++ b/extensions/speech-core/src/tts.ts @@ -0,0 +1,849 @@ +import { randomBytes } from "node:crypto"; +import { + existsSync, + mkdirSync, + readFileSync, + writeFileSync, + mkdtempSync, + renameSync, + unlinkSync, +} from "node:fs"; +import path from "node:path"; +import { normalizeChannelId, type ChannelId } from "openclaw/plugin-sdk/channel-runtime"; +import type { + OpenClawConfig, + TtsAutoMode, + TtsConfig, + TtsMode, + TtsModelOverrideConfig, + TtsProvider, +} from "openclaw/plugin-sdk/config-runtime"; +import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload"; +import type { ReplyPayload } from "openclaw/plugin-sdk/reply-runtime"; +import { logVerbose } from "openclaw/plugin-sdk/runtime-env"; +import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/sandbox"; +import { CONFIG_DIR, resolveUserPath, stripMarkdown } from "openclaw/plugin-sdk/text-runtime"; +import { + canonicalizeSpeechProviderId, + getSpeechProvider, + listSpeechProviders, + normalizeTtsAutoMode, + parseTtsDirectives, + scheduleCleanup, + summarizeText, + type SpeechModelOverridePolicy, + type SpeechProviderConfig, + type SpeechVoiceOption, + type TtsDirectiveOverrides, + type TtsDirectiveParseResult, +} from "../api.js"; + +export type { TtsDirectiveOverrides, TtsDirectiveParseResult }; + +const DEFAULT_TIMEOUT_MS = 30_000; +const DEFAULT_TTS_MAX_LENGTH = 1500; +const DEFAULT_TTS_SUMMARIZE = true; +const DEFAULT_MAX_TEXT_LENGTH = 4096; + +export type ResolvedTtsConfig = { + auto: TtsAutoMode; + mode: TtsMode; + provider: TtsProvider; + providerSource: "config" | "default"; + summaryModel?: string; + modelOverrides: ResolvedTtsModelOverrides; + providerConfigs: Record; + prefsPath?: string; + maxTextLength: number; + timeoutMs: number; +}; + +type TtsUserPrefs = { + tts?: { + auto?: TtsAutoMode; + enabled?: boolean; + provider?: TtsProvider; + maxLength?: number; + summarize?: boolean; + }; +}; + +export type ResolvedTtsModelOverrides = SpeechModelOverridePolicy; + +export type TtsResult = { + success: boolean; + audioPath?: string; + error?: string; + latencyMs?: number; + provider?: string; + outputFormat?: string; + voiceCompatible?: boolean; +}; + +export type TtsSynthesisResult = { + success: boolean; + audioBuffer?: Buffer; + error?: string; + latencyMs?: number; + provider?: string; + outputFormat?: string; + voiceCompatible?: boolean; + fileExtension?: string; +}; + +export type TtsTelephonyResult = { + success: boolean; + audioBuffer?: Buffer; + error?: string; + latencyMs?: number; + provider?: string; + outputFormat?: string; + sampleRate?: number; +}; + +type TtsStatusEntry = { + timestamp: number; + success: boolean; + textLength: number; + summarized: boolean; + provider?: string; + latencyMs?: number; + error?: string; +}; + +let lastTtsAttempt: TtsStatusEntry | undefined; + +function resolveModelOverridePolicy( + overrides: TtsModelOverrideConfig | undefined, +): ResolvedTtsModelOverrides { + const enabled = overrides?.enabled ?? true; + if (!enabled) { + return { + enabled: false, + allowText: false, + allowProvider: false, + allowVoice: false, + allowModelId: false, + allowVoiceSettings: false, + allowNormalization: false, + allowSeed: false, + }; + } + const allow = (value: boolean | undefined, defaultValue = true) => value ?? defaultValue; + return { + enabled: true, + allowText: allow(overrides?.allowText), + allowProvider: allow(overrides?.allowProvider, false), + allowVoice: allow(overrides?.allowVoice), + allowModelId: allow(overrides?.allowModelId), + allowVoiceSettings: allow(overrides?.allowVoiceSettings), + allowNormalization: allow(overrides?.allowNormalization), + allowSeed: allow(overrides?.allowSeed), + }; +} + +function sortSpeechProvidersForAutoSelection(cfg?: OpenClawConfig) { + return listSpeechProviders(cfg).toSorted((left, right) => { + const leftOrder = left.autoSelectOrder ?? Number.MAX_SAFE_INTEGER; + const rightOrder = right.autoSelectOrder ?? Number.MAX_SAFE_INTEGER; + if (leftOrder !== rightOrder) { + return leftOrder - rightOrder; + } + return left.id.localeCompare(right.id); + }); +} + +function resolveRegistryDefaultSpeechProviderId(cfg?: OpenClawConfig): TtsProvider { + return sortSpeechProvidersForAutoSelection(cfg)[0]?.id ?? ""; +} + +function asProviderConfig(value: unknown): SpeechProviderConfig { + return typeof value === "object" && value !== null && !Array.isArray(value) + ? (value as SpeechProviderConfig) + : {}; +} + +function asProviderConfigMap(value: unknown): Record { + return typeof value === "object" && value !== null && !Array.isArray(value) + ? (value as Record) + : {}; +} + +function resolveSpeechProviderConfigs( + raw: TtsConfig, + cfg: OpenClawConfig, + timeoutMs: number, +): Record { + const providerConfigs: Record = {}; + const rawProviders = asProviderConfigMap(raw.providers); + for (const provider of listSpeechProviders(cfg)) { + providerConfigs[provider.id] = + provider.resolveConfig?.({ + cfg, + rawConfig: { + ...(raw as Record), + providers: rawProviders, + }, + timeoutMs, + }) ?? + asProviderConfig(rawProviders[provider.id] ?? (raw as Record)[provider.id]); + } + return providerConfigs; +} + +export function getResolvedSpeechProviderConfig( + config: ResolvedTtsConfig, + providerId: string, + cfg?: OpenClawConfig, +): SpeechProviderConfig { + const canonical = + canonicalizeSpeechProviderId(providerId, cfg) ?? providerId.trim().toLowerCase(); + return config.providerConfigs[canonical] ?? {}; +} + +export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig { + const raw: TtsConfig = cfg.messages?.tts ?? {}; + const providerSource = raw.provider ? "config" : "default"; + const timeoutMs = raw.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const auto = normalizeTtsAutoMode(raw.auto) ?? (raw.enabled ? "always" : "off"); + return { + auto, + mode: raw.mode ?? "final", + provider: + canonicalizeSpeechProviderId(raw.provider, cfg) ?? + resolveRegistryDefaultSpeechProviderId(cfg), + providerSource, + summaryModel: raw.summaryModel?.trim() || undefined, + modelOverrides: resolveModelOverridePolicy(raw.modelOverrides), + providerConfigs: resolveSpeechProviderConfigs(raw, cfg, timeoutMs), + prefsPath: raw.prefsPath, + maxTextLength: raw.maxTextLength ?? DEFAULT_MAX_TEXT_LENGTH, + timeoutMs, + }; +} + +export function resolveTtsPrefsPath(config: ResolvedTtsConfig): string { + if (config.prefsPath?.trim()) { + return resolveUserPath(config.prefsPath.trim()); + } + const envPath = process.env.OPENCLAW_TTS_PREFS?.trim(); + if (envPath) { + return resolveUserPath(envPath); + } + return path.join(CONFIG_DIR, "settings", "tts.json"); +} + +function resolveTtsAutoModeFromPrefs(prefs: TtsUserPrefs): TtsAutoMode | undefined { + const auto = normalizeTtsAutoMode(prefs.tts?.auto); + if (auto) { + return auto; + } + if (typeof prefs.tts?.enabled === "boolean") { + return prefs.tts.enabled ? "always" : "off"; + } + return undefined; +} + +export function resolveTtsAutoMode(params: { + config: ResolvedTtsConfig; + prefsPath: string; + sessionAuto?: string; +}): TtsAutoMode { + const sessionAuto = normalizeTtsAutoMode(params.sessionAuto); + if (sessionAuto) { + return sessionAuto; + } + const prefsAuto = resolveTtsAutoModeFromPrefs(readPrefs(params.prefsPath)); + if (prefsAuto) { + return prefsAuto; + } + return params.config.auto; +} + +export function buildTtsSystemPromptHint(cfg: OpenClawConfig): string | undefined { + const config = resolveTtsConfig(cfg); + const prefsPath = resolveTtsPrefsPath(config); + const autoMode = resolveTtsAutoMode({ config, prefsPath }); + if (autoMode === "off") { + return undefined; + } + const maxLength = getTtsMaxLength(prefsPath); + const summarize = isSummarizationEnabled(prefsPath) ? "on" : "off"; + const autoHint = + autoMode === "inbound" + ? "Only use TTS when the user's last message includes audio/voice." + : autoMode === "tagged" + ? "Only use TTS when you include [[tts]] or [[tts:text]] tags." + : undefined; + return [ + "Voice (TTS) is enabled.", + autoHint, + `Keep spoken text ≤${maxLength} chars to avoid auto-summary (summary ${summarize}).`, + "Use [[tts:...]] and optional [[tts:text]]...[[/tts:text]] to control voice/expressiveness.", + ] + .filter(Boolean) + .join("\n"); +} + +function readPrefs(prefsPath: string): TtsUserPrefs { + try { + if (!existsSync(prefsPath)) { + return {}; + } + return JSON.parse(readFileSync(prefsPath, "utf8")) as TtsUserPrefs; + } catch { + return {}; + } +} + +function atomicWriteFileSync(filePath: string, content: string): void { + const tmpPath = `${filePath}.tmp.${Date.now()}.${randomBytes(8).toString("hex")}`; + writeFileSync(tmpPath, content, { mode: 0o600 }); + try { + renameSync(tmpPath, filePath); + } catch (err) { + try { + unlinkSync(tmpPath); + } catch { + // ignore + } + throw err; + } +} + +function updatePrefs(prefsPath: string, update: (prefs: TtsUserPrefs) => void): void { + const prefs = readPrefs(prefsPath); + update(prefs); + mkdirSync(path.dirname(prefsPath), { recursive: true }); + atomicWriteFileSync(prefsPath, JSON.stringify(prefs, null, 2)); +} + +export function isTtsEnabled( + config: ResolvedTtsConfig, + prefsPath: string, + sessionAuto?: string, +): boolean { + return resolveTtsAutoMode({ config, prefsPath, sessionAuto }) !== "off"; +} + +export function setTtsAutoMode(prefsPath: string, mode: TtsAutoMode): void { + updatePrefs(prefsPath, (prefs) => { + const next = { ...prefs.tts }; + delete next.enabled; + next.auto = mode; + prefs.tts = next; + }); +} + +export function setTtsEnabled(prefsPath: string, enabled: boolean): void { + setTtsAutoMode(prefsPath, enabled ? "always" : "off"); +} + +export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): TtsProvider { + const prefs = readPrefs(prefsPath); + const prefsProvider = canonicalizeSpeechProviderId(prefs.tts?.provider); + if (prefsProvider) { + return prefsProvider; + } + if (config.providerSource === "config") { + return canonicalizeSpeechProviderId(config.provider) ?? config.provider; + } + + for (const provider of sortSpeechProvidersForAutoSelection()) { + if ( + provider.isConfigured({ + providerConfig: config.providerConfigs[provider.id] ?? {}, + timeoutMs: config.timeoutMs, + }) + ) { + return provider.id; + } + } + return config.provider; +} + +export function setTtsProvider(prefsPath: string, provider: TtsProvider): void { + updatePrefs(prefsPath, (prefs) => { + prefs.tts = { ...prefs.tts, provider: canonicalizeSpeechProviderId(provider) ?? provider }; + }); +} + +export function getTtsMaxLength(prefsPath: string): number { + const prefs = readPrefs(prefsPath); + return prefs.tts?.maxLength ?? DEFAULT_TTS_MAX_LENGTH; +} + +export function setTtsMaxLength(prefsPath: string, maxLength: number): void { + updatePrefs(prefsPath, (prefs) => { + prefs.tts = { ...prefs.tts, maxLength }; + }); +} + +export function isSummarizationEnabled(prefsPath: string): boolean { + const prefs = readPrefs(prefsPath); + return prefs.tts?.summarize ?? DEFAULT_TTS_SUMMARIZE; +} + +export function setSummarizationEnabled(prefsPath: string, enabled: boolean): void { + updatePrefs(prefsPath, (prefs) => { + prefs.tts = { ...prefs.tts, summarize: enabled }; + }); +} + +export function getLastTtsAttempt(): TtsStatusEntry | undefined { + return lastTtsAttempt; +} + +export function setLastTtsAttempt(entry: TtsStatusEntry | undefined): void { + lastTtsAttempt = entry; +} + +const OPUS_CHANNELS = new Set(["telegram", "feishu", "whatsapp", "matrix"]); + +function resolveChannelId(channel: string | undefined): ChannelId | null { + return channel ? normalizeChannelId(channel) : null; +} + +export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] { + const normalizedPrimary = canonicalizeSpeechProviderId(primary, cfg) ?? primary; + const ordered = new Set([normalizedPrimary]); + for (const provider of sortSpeechProvidersForAutoSelection(cfg)) { + const normalized = provider.id; + if (normalized !== normalizedPrimary) { + ordered.add(normalized); + } + } + return [...ordered]; +} + +export function isTtsProviderConfigured( + config: ResolvedTtsConfig, + provider: TtsProvider, + cfg?: OpenClawConfig, +): boolean { + const resolvedProvider = getSpeechProvider(provider, cfg); + if (!resolvedProvider) { + return false; + } + return ( + resolvedProvider.isConfigured({ + cfg, + providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, cfg), + timeoutMs: config.timeoutMs, + }) ?? false + ); +} + +function formatTtsProviderError(provider: TtsProvider, err: unknown): string { + const error = err instanceof Error ? err : new Error(String(err)); + if (error.name === "AbortError") { + return `${provider}: request timed out`; + } + return `${provider}: ${error.message}`; +} + +function buildTtsFailureResult(errors: string[]): { success: false; error: string } { + return { + success: false, + error: `TTS conversion failed: ${errors.join("; ") || "no providers available"}`, + }; +} + +function resolveReadySpeechProvider(params: { + provider: TtsProvider; + cfg: OpenClawConfig; + config: ResolvedTtsConfig; + errors: string[]; + requireTelephony?: boolean; +}): NonNullable> | null { + const resolvedProvider = getSpeechProvider(params.provider, params.cfg); + if (!resolvedProvider) { + params.errors.push(`${params.provider}: no provider registered`); + return null; + } + const providerConfig = getResolvedSpeechProviderConfig( + params.config, + resolvedProvider.id, + params.cfg, + ); + if ( + !resolvedProvider.isConfigured({ + cfg: params.cfg, + providerConfig, + timeoutMs: params.config.timeoutMs, + }) + ) { + params.errors.push(`${params.provider}: not configured`); + return null; + } + if (params.requireTelephony && !resolvedProvider.synthesizeTelephony) { + params.errors.push(`${params.provider}: unsupported for telephony`); + return null; + } + return resolvedProvider; +} + +function resolveTtsRequestSetup(params: { + text: string; + cfg: OpenClawConfig; + prefsPath?: string; + providerOverride?: TtsProvider; + disableFallback?: boolean; +}): + | { + config: ResolvedTtsConfig; + providers: TtsProvider[]; + } + | { + error: string; + } { + const config = resolveTtsConfig(params.cfg); + const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config); + if (params.text.length > config.maxTextLength) { + return { + error: `Text too long (${params.text.length} chars, max ${config.maxTextLength})`, + }; + } + + const userProvider = getTtsProvider(config, prefsPath); + const provider = + canonicalizeSpeechProviderId(params.providerOverride, params.cfg) ?? userProvider; + return { + config, + providers: params.disableFallback ? [provider] : resolveTtsProviderOrder(provider, params.cfg), + }; +} + +export async function textToSpeech(params: { + text: string; + cfg: OpenClawConfig; + prefsPath?: string; + channel?: string; + overrides?: TtsDirectiveOverrides; + disableFallback?: boolean; +}): Promise { + const synthesis = await synthesizeSpeech(params); + if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) { + return buildTtsFailureResult([synthesis.error ?? "TTS conversion failed"]); + } + + const tempRoot = resolvePreferredOpenClawTmpDir(); + mkdirSync(tempRoot, { recursive: true, mode: 0o700 }); + const tempDir = mkdtempSync(path.join(tempRoot, "tts-")); + const audioPath = path.join(tempDir, `voice-${Date.now()}${synthesis.fileExtension}`); + writeFileSync(audioPath, synthesis.audioBuffer); + scheduleCleanup(tempDir); + + return { + success: true, + audioPath, + latencyMs: synthesis.latencyMs, + provider: synthesis.provider, + outputFormat: synthesis.outputFormat, + voiceCompatible: synthesis.voiceCompatible, + }; +} + +export async function synthesizeSpeech(params: { + text: string; + cfg: OpenClawConfig; + prefsPath?: string; + channel?: string; + overrides?: TtsDirectiveOverrides; + disableFallback?: boolean; +}): Promise { + const setup = resolveTtsRequestSetup({ + text: params.text, + cfg: params.cfg, + prefsPath: params.prefsPath, + providerOverride: params.overrides?.provider, + disableFallback: params.disableFallback, + }); + if ("error" in setup) { + return { success: false, error: setup.error }; + } + + const { config, providers } = setup; + const channelId = resolveChannelId(params.channel); + const target = channelId && OPUS_CHANNELS.has(channelId) ? "voice-note" : "audio-file"; + + const errors: string[] = []; + + for (const provider of providers) { + const providerStart = Date.now(); + try { + const resolvedProvider = resolveReadySpeechProvider({ + provider, + cfg: params.cfg, + config, + errors, + }); + if (!resolvedProvider) { + continue; + } + const synthesis = await resolvedProvider.synthesize({ + text: params.text, + cfg: params.cfg, + providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg), + target, + providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.id], + timeoutMs: config.timeoutMs, + }); + return { + success: true, + audioBuffer: synthesis.audioBuffer, + latencyMs: Date.now() - providerStart, + provider, + outputFormat: synthesis.outputFormat, + voiceCompatible: synthesis.voiceCompatible, + fileExtension: synthesis.fileExtension, + }; + } catch (err) { + errors.push(formatTtsProviderError(provider, err)); + } + } + + return buildTtsFailureResult(errors); +} + +export async function textToSpeechTelephony(params: { + text: string; + cfg: OpenClawConfig; + prefsPath?: string; +}): Promise { + const setup = resolveTtsRequestSetup({ + text: params.text, + cfg: params.cfg, + prefsPath: params.prefsPath, + }); + if ("error" in setup) { + return { success: false, error: setup.error }; + } + + const { config, providers } = setup; + const errors: string[] = []; + + for (const provider of providers) { + const providerStart = Date.now(); + try { + const resolvedProvider = resolveReadySpeechProvider({ + provider, + cfg: params.cfg, + config, + errors, + requireTelephony: true, + }); + if (!resolvedProvider?.synthesizeTelephony) { + continue; + } + const synthesis = await resolvedProvider.synthesizeTelephony({ + text: params.text, + cfg: params.cfg, + providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg), + timeoutMs: config.timeoutMs, + }); + + return { + success: true, + audioBuffer: synthesis.audioBuffer, + latencyMs: Date.now() - providerStart, + provider, + outputFormat: synthesis.outputFormat, + sampleRate: synthesis.sampleRate, + }; + } catch (err) { + errors.push(formatTtsProviderError(provider, err)); + } + } + + return buildTtsFailureResult(errors); +} + +export async function listSpeechVoices(params: { + provider: string; + cfg?: OpenClawConfig; + config?: ResolvedTtsConfig; + apiKey?: string; + baseUrl?: string; +}): Promise { + const provider = canonicalizeSpeechProviderId(params.provider, params.cfg); + if (!provider) { + throw new Error("speech provider id is required"); + } + const config = params.config ?? (params.cfg ? resolveTtsConfig(params.cfg) : undefined); + if (!config) { + throw new Error(`speech provider ${provider} requires cfg or resolved config`); + } + const resolvedProvider = getSpeechProvider(provider, params.cfg); + if (!resolvedProvider) { + throw new Error(`speech provider ${provider} is not registered`); + } + if (!resolvedProvider.listVoices) { + throw new Error(`speech provider ${provider} does not support voice listing`); + } + return await resolvedProvider.listVoices({ + cfg: params.cfg, + providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg), + apiKey: params.apiKey, + baseUrl: params.baseUrl, + }); +} + +export async function maybeApplyTtsToPayload(params: { + payload: ReplyPayload; + cfg: OpenClawConfig; + channel?: string; + kind?: "tool" | "block" | "final"; + inboundAudio?: boolean; + ttsAuto?: string; +}): Promise { + if (params.payload.isCompactionNotice) { + return params.payload; + } + const config = resolveTtsConfig(params.cfg); + const prefsPath = resolveTtsPrefsPath(config); + const autoMode = resolveTtsAutoMode({ + config, + prefsPath, + sessionAuto: params.ttsAuto, + }); + if (autoMode === "off") { + return params.payload; + } + + const reply = resolveSendableOutboundReplyParts(params.payload); + const text = reply.text; + const directives = parseTtsDirectives(text, config.modelOverrides, { + cfg: params.cfg, + providerConfigs: config.providerConfigs, + }); + if (directives.warnings.length > 0) { + logVerbose(`TTS: ignored directive overrides (${directives.warnings.join("; ")})`); + } + + const cleanedText = directives.cleanedText; + const trimmedCleaned = cleanedText.trim(); + const visibleText = trimmedCleaned.length > 0 ? trimmedCleaned : ""; + const ttsText = directives.ttsText?.trim() || visibleText; + + const nextPayload = + visibleText === text.trim() + ? params.payload + : { + ...params.payload, + text: visibleText.length > 0 ? visibleText : undefined, + }; + + if (autoMode === "tagged" && !directives.hasDirective) { + return nextPayload; + } + if (autoMode === "inbound" && params.inboundAudio !== true) { + return nextPayload; + } + + const mode = config.mode ?? "final"; + if (mode === "final" && params.kind && params.kind !== "final") { + return nextPayload; + } + + if (!ttsText.trim()) { + return nextPayload; + } + if (reply.hasMedia) { + return nextPayload; + } + if (text.includes("MEDIA:")) { + return nextPayload; + } + if (ttsText.trim().length < 10) { + return nextPayload; + } + + const maxLength = getTtsMaxLength(prefsPath); + let textForAudio = ttsText.trim(); + let wasSummarized = false; + + if (textForAudio.length > maxLength) { + if (!isSummarizationEnabled(prefsPath)) { + logVerbose( + `TTS: truncating long text (${textForAudio.length} > ${maxLength}), summarization disabled.`, + ); + textForAudio = `${textForAudio.slice(0, maxLength - 3)}...`; + } else { + try { + const summary = await summarizeText({ + text: textForAudio, + targetLength: maxLength, + cfg: params.cfg, + config, + timeoutMs: config.timeoutMs, + }); + textForAudio = summary.summary; + wasSummarized = true; + if (textForAudio.length > config.maxTextLength) { + logVerbose( + `TTS: summary exceeded hard limit (${textForAudio.length} > ${config.maxTextLength}); truncating.`, + ); + textForAudio = `${textForAudio.slice(0, config.maxTextLength - 3)}...`; + } + } catch (err) { + const error = err as Error; + logVerbose(`TTS: summarization failed, truncating instead: ${error.message}`); + textForAudio = `${textForAudio.slice(0, maxLength - 3)}...`; + } + } + } + + textForAudio = stripMarkdown(textForAudio).trim(); + if (textForAudio.length < 10) { + return nextPayload; + } + + const ttsStart = Date.now(); + const result = await textToSpeech({ + text: textForAudio, + cfg: params.cfg, + prefsPath, + channel: params.channel, + overrides: directives.overrides, + }); + + if (result.success && result.audioPath) { + lastTtsAttempt = { + timestamp: Date.now(), + success: true, + textLength: text.length, + summarized: wasSummarized, + provider: result.provider, + latencyMs: result.latencyMs, + }; + + const channelId = resolveChannelId(params.channel); + const shouldVoice = + channelId !== null && OPUS_CHANNELS.has(channelId) && result.voiceCompatible === true; + return { + ...nextPayload, + mediaUrl: result.audioPath, + audioAsVoice: shouldVoice || params.payload.audioAsVoice, + }; + } + + lastTtsAttempt = { + timestamp: Date.now(), + success: false, + textLength: text.length, + summarized: wasSummarized, + error: result.error, + }; + + const latency = Date.now() - ttsStart; + logVerbose(`TTS: conversion failed after ${latency}ms (${result.error ?? "unknown"}).`); + return nextPayload; +} + +export const _test = { + parseTtsDirectives, + resolveModelOverridePolicy, + summarizeText, + getResolvedSpeechProviderConfig, +}; diff --git a/extensions/telegram/test-api.ts b/extensions/telegram/test-api.ts index 3489be66d00..943416dc607 100644 --- a/extensions/telegram/test-api.ts +++ b/extensions/telegram/test-api.ts @@ -1,8 +1,10 @@ export { buildTelegramMessageContextForTest } from "./src/bot-message-context.test-harness.js"; export { handleTelegramAction } from "./src/action-runtime.js"; export { telegramMessageActionRuntime } from "./src/channel-actions.js"; +export { telegramPlugin } from "./src/channel.js"; export { listTelegramAccountIds, resolveTelegramAccount } from "./src/accounts.js"; export { resolveTelegramFetch } from "./src/fetch.js"; export { makeProxyFetch } from "./src/proxy.js"; export { telegramOutbound } from "./src/outbound-adapter.js"; +export { setTelegramRuntime } from "./src/runtime.js"; export { sendMessageTelegram, sendPollTelegram, type TelegramApiOverride } from "./src/send.js"; diff --git a/extensions/tlon/test-api.ts b/extensions/tlon/test-api.ts new file mode 100644 index 00000000000..bb19cd45b55 --- /dev/null +++ b/extensions/tlon/test-api.ts @@ -0,0 +1 @@ +export { tlonPlugin } from "./src/channel.js"; diff --git a/extensions/twitch/src/config-schema.ts b/extensions/twitch/src/config-schema.ts index 5eeb613dbc4..7bd74e137a5 100644 --- a/extensions/twitch/src/config-schema.ts +++ b/extensions/twitch/src/config-schema.ts @@ -1,5 +1,5 @@ +import { MarkdownConfigSchema } from "openclaw/plugin-sdk/channel-config-primitives"; import { z } from "openclaw/plugin-sdk/zod"; -import { MarkdownConfigSchema } from "../runtime-api.js"; /** * Twitch user roles that can be allowed to interact with the bot diff --git a/extensions/whatsapp/test-api.ts b/extensions/whatsapp/test-api.ts index a1e231f4a61..5ac296cbf56 100644 --- a/extensions/whatsapp/test-api.ts +++ b/extensions/whatsapp/test-api.ts @@ -1,3 +1,5 @@ +export { whatsappPlugin } from "./src/channel.js"; +export { setWhatsAppRuntime } from "./src/runtime.js"; export { whatsappOutbound } from "./src/outbound-adapter.js"; export { deliverWebReply } from "./src/auto-reply/deliver-reply.js"; export { diff --git a/extensions/zalo/src/config-schema.ts b/extensions/zalo/src/config-schema.ts index 5799905de3f..c387045a347 100644 --- a/extensions/zalo/src/config-schema.ts +++ b/extensions/zalo/src/config-schema.ts @@ -3,9 +3,9 @@ import { buildCatchallMultiAccountChannelSchema, DmPolicySchema, GroupPolicySchema, -} from "openclaw/plugin-sdk/channel-config-schema"; + MarkdownConfigSchema, +} from "openclaw/plugin-sdk/channel-config-primitives"; import { z } from "openclaw/plugin-sdk/zod"; -import { MarkdownConfigSchema } from "./runtime-api.js"; import { buildSecretInputSchema } from "./secret-input.js"; const zaloAccountSchema = z.object({ diff --git a/extensions/zalouser/src/config-schema.ts b/extensions/zalouser/src/config-schema.ts index 24cc1aeaad9..39a51a7b2d9 100644 --- a/extensions/zalouser/src/config-schema.ts +++ b/extensions/zalouser/src/config-schema.ts @@ -1,11 +1,12 @@ +import { ToolPolicySchema } from "openclaw/plugin-sdk/agent-config-primitives"; import { AllowFromListSchema, buildCatchallMultiAccountChannelSchema, DmPolicySchema, GroupPolicySchema, -} from "openclaw/plugin-sdk/channel-config-schema"; + MarkdownConfigSchema, +} from "openclaw/plugin-sdk/channel-config-primitives"; import { z } from "openclaw/plugin-sdk/zod"; -import { MarkdownConfigSchema, ToolPolicySchema } from "../runtime-api.js"; const groupConfigSchema = z.object({ allow: z.boolean().optional(), diff --git a/package.json b/package.json index dc8768af10f..5b48331a519 100644 --- a/package.json +++ b/package.json @@ -228,6 +228,10 @@ "types": "./dist/plugin-sdk/account-resolution.d.ts", "default": "./dist/plugin-sdk/account-resolution.js" }, + "./plugin-sdk/agent-config-primitives": { + "types": "./dist/plugin-sdk/agent-config-primitives.d.ts", + "default": "./dist/plugin-sdk/agent-config-primitives.js" + }, "./plugin-sdk/allow-from": { "types": "./dist/plugin-sdk/allow-from.d.ts", "default": "./dist/plugin-sdk/allow-from.js" @@ -292,6 +296,10 @@ "types": "./dist/plugin-sdk/channel-config-schema.d.ts", "default": "./dist/plugin-sdk/channel-config-schema.js" }, + "./plugin-sdk/channel-config-primitives": { + "types": "./dist/plugin-sdk/channel-config-primitives.d.ts", + "default": "./dist/plugin-sdk/channel-config-primitives.js" + }, "./plugin-sdk/channel-actions": { "types": "./dist/plugin-sdk/channel-actions.d.ts", "default": "./dist/plugin-sdk/channel-actions.js" diff --git a/scripts/lib/plugin-sdk-entrypoints.json b/scripts/lib/plugin-sdk-entrypoints.json index 0dee71b83f8..303d7bc712e 100644 --- a/scripts/lib/plugin-sdk-entrypoints.json +++ b/scripts/lib/plugin-sdk-entrypoints.json @@ -47,6 +47,7 @@ "account-helpers", "account-id", "account-resolution", + "agent-config-primitives", "allow-from", "allowlist-config-edit", "bluebubbles", @@ -62,6 +63,7 @@ "discord-core", "extension-shared", "channel-config-helpers", + "channel-config-primitives", "channel-config-schema", "channel-actions", "channel-contract", diff --git a/scripts/openclaw-npm-postpublish-verify.ts b/scripts/openclaw-npm-postpublish-verify.ts index 35568d398b3..2dd58bb74b2 100644 --- a/scripts/openclaw-npm-postpublish-verify.ts +++ b/scripts/openclaw-npm-postpublish-verify.ts @@ -5,17 +5,9 @@ import { existsSync, mkdtempSync, readFileSync, rmSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { pathToFileURL } from "node:url"; +import { BUNDLED_RUNTIME_SIDECAR_PATHS } from "../src/plugins/public-artifacts.ts"; import { parseReleaseVersion, resolveNpmCommandInvocation } from "./openclaw-npm-release-check.ts"; -const REQUIRED_RUNTIME_SIDECARS = [ - "dist/extensions/whatsapp/light-runtime-api.js", - "dist/extensions/whatsapp/runtime-api.js", - "dist/extensions/matrix/helper-api.js", - "dist/extensions/matrix/runtime-api.js", - "dist/extensions/matrix/thread-bindings-runtime.js", - "dist/extensions/msteams/runtime-api.js", -] as const; - type InstalledPackageJson = { version?: string; }; @@ -65,7 +57,7 @@ export function collectInstalledPackageErrors(params: { ); } - for (const relativePath of REQUIRED_RUNTIME_SIDECARS) { + for (const relativePath of BUNDLED_RUNTIME_SIDECAR_PATHS) { if (!existsSync(join(params.packageRoot, relativePath))) { errors.push(`installed package is missing required bundled runtime sidecar: ${relativePath}`); } diff --git a/src/agents/cli-runner.test-support.ts b/src/agents/cli-runner.test-support.ts index 1f85aa69417..599199dc07f 100644 --- a/src/agents/cli-runner.test-support.ts +++ b/src/agents/cli-runner.test-support.ts @@ -1,8 +1,8 @@ import fs from "node:fs/promises"; import { beforeEach, vi } from "vitest"; -import { buildAnthropicCliBackend } from "../../extensions/anthropic/cli-backend.js"; -import { buildGoogleGeminiCliBackend } from "../../extensions/google/cli-backend.js"; -import { buildOpenAICodexCliBackend } from "../../extensions/openai/cli-backend.js"; +import { buildAnthropicCliBackend } from "../../extensions/anthropic/test-api.js"; +import { buildGoogleGeminiCliBackend } from "../../extensions/google/test-api.js"; +import { buildOpenAICodexCliBackend } from "../../extensions/openai/test-api.js"; import type { OpenClawConfig } from "../config/config.js"; import { createEmptyPluginRegistry } from "../plugins/registry.js"; import { setActivePluginRegistry } from "../plugins/runtime.js"; diff --git a/src/agents/tools/tts-tool.test.ts b/src/agents/tools/tts-tool.test.ts index 2fc192a934d..52bc1c138c6 100644 --- a/src/agents/tools/tts-tool.test.ts +++ b/src/agents/tools/tts-tool.test.ts @@ -1,32 +1,32 @@ -import { describe, expect, it, vi } from "vitest"; +import { beforeEach, describe, expect, it, vi } from "vitest"; +import { SILENT_REPLY_TOKEN } from "../../auto-reply/tokens.js"; -vi.mock("../../auto-reply/tokens.js", () => ({ - SILENT_REPLY_TOKEN: "QUIET_TOKEN", -})); - -vi.mock("../../tts/tts.js", () => ({ - textToSpeech: vi.fn(), -})); - -const { createTtsTool } = await import("./tts-tool.js"); -const { textToSpeech } = await import("../../tts/tts.js"); +let textToSpeechSpy: ReturnType; describe("createTtsTool", () => { - it("uses SILENT_REPLY_TOKEN in guidance text", () => { + beforeEach(async () => { + vi.restoreAllMocks(); + vi.resetModules(); + const ttsRuntime = await import("../../tts/tts.js"); + textToSpeechSpy = vi.spyOn(ttsRuntime, "textToSpeech"); + }); + + it("uses SILENT_REPLY_TOKEN in guidance text", async () => { + const { createTtsTool } = await import("./tts-tool.js"); const tool = createTtsTool(); - expect(tool.description).toContain("QUIET_TOKEN"); - expect(tool.description).not.toContain("NO_REPLY"); + expect(tool.description).toContain(SILENT_REPLY_TOKEN); }); it("stores audio delivery in details.media", async () => { - vi.mocked(textToSpeech).mockResolvedValue({ + textToSpeechSpy.mockResolvedValue({ success: true, audioPath: "/tmp/reply.opus", provider: "test", voiceCompatible: true, }); + const { createTtsTool } = await import("./tts-tool.js"); const tool = createTtsTool(); const result = await tool.execute("call-1", { text: "hello" }); diff --git a/src/auto-reply/reply/commands-system-prompt.test.ts b/src/auto-reply/reply/commands-system-prompt.test.ts index 09499fc3181..767b4ab9926 100644 --- a/src/auto-reply/reply/commands-system-prompt.test.ts +++ b/src/auto-reply/reply/commands-system-prompt.test.ts @@ -12,10 +12,6 @@ vi.mock("../../agents/bootstrap-files.js", () => ({ })), })); -vi.mock("../../agents/pi-tools.js", () => ({ - createOpenClawCodingTools: createOpenClawCodingToolsMock, -})); - vi.mock("../../agents/sandbox.js", () => ({ resolveSandboxRuntimeStatus: vi.fn(() => ({ sandboxed: false, mode: "off" })), })); @@ -57,12 +53,6 @@ vi.mock("../../infra/skills-remote.js", () => ({ getRemoteSkillEligibility: vi.fn(() => false), })); -vi.mock("../../tts/tts.js", () => ({ - buildTtsSystemPromptHint: vi.fn(() => undefined), -})); - -import { resolveCommandsSystemPromptBundle } from "./commands-system-prompt.js"; - function makeParams(): HandleCommandsParams { return { ctx: { @@ -107,12 +97,21 @@ function makeParams(): HandleCommandsParams { } describe("resolveCommandsSystemPromptBundle", () => { - beforeEach(() => { + beforeEach(async () => { + vi.restoreAllMocks(); + vi.resetModules(); createOpenClawCodingToolsMock.mockClear(); createOpenClawCodingToolsMock.mockReturnValue([]); + const piTools = await import("../../agents/pi-tools.js"); + vi.spyOn(piTools, "createOpenClawCodingTools").mockImplementation( + createOpenClawCodingToolsMock, + ); + const ttsRuntime = await import("../../tts/tts.js"); + vi.spyOn(ttsRuntime, "buildTtsSystemPromptHint").mockReturnValue(undefined); }); it("opts command tool builds into gateway subagent binding", async () => { + const { resolveCommandsSystemPromptBundle } = await import("./commands-system-prompt.js"); await resolveCommandsSystemPromptBundle(makeParams()); expect(createOpenClawCodingToolsMock).toHaveBeenCalledWith( diff --git a/src/cli/prompt.runtime.ts b/src/cli/prompt.runtime.ts new file mode 100644 index 00000000000..42e0910a2ab --- /dev/null +++ b/src/cli/prompt.runtime.ts @@ -0,0 +1 @@ +export { promptYesNo } from "./prompt.js"; diff --git a/src/cli/update-cli.test.ts b/src/cli/update-cli.test.ts index e3ab58f78ab..a764b90a660 100644 --- a/src/cli/update-cli.test.ts +++ b/src/cli/update-cli.test.ts @@ -4,8 +4,8 @@ import path from "node:path"; import { Command } from "commander"; import { beforeEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig, ConfigFileSnapshot } from "../config/types.openclaw.js"; -import { BUNDLED_RUNTIME_SIDECAR_PATHS } from "../extensions/public-artifacts.js"; import type { UpdateRunResult } from "../infra/update-runner.js"; +import { BUNDLED_RUNTIME_SIDECAR_PATHS } from "../plugins/public-artifacts.js"; import { withEnvAsync } from "../test-utils/env.js"; import { createCliRuntimeCapture } from "./test-runtime-capture.js"; diff --git a/src/commands/channel-test-helpers.ts b/src/commands/channel-test-helpers.ts index 96ee12c11fb..9dd40e1afc2 100644 --- a/src/commands/channel-test-helpers.ts +++ b/src/commands/channel-test-helpers.ts @@ -1,8 +1,8 @@ -import { matrixPlugin, setMatrixRuntime } from "../../extensions/matrix/index.js"; -import { msteamsPlugin } from "../../extensions/msteams/index.js"; -import { nostrPlugin } from "../../extensions/nostr/index.js"; -import { tlonPlugin } from "../../extensions/tlon/index.js"; -import { whatsappPlugin } from "../../extensions/whatsapp/index.js"; +import { matrixPlugin, setMatrixRuntime } from "../../extensions/matrix/test-api.js"; +import { msteamsPlugin } from "../../extensions/msteams/test-api.js"; +import { nostrPlugin } from "../../extensions/nostr/test-api.js"; +import { tlonPlugin } from "../../extensions/tlon/test-api.js"; +import { whatsappPlugin } from "../../extensions/whatsapp/test-api.js"; import { bundledChannelPlugins } from "../channels/plugins/bundled.js"; import { setActivePluginRegistry } from "../plugins/runtime.js"; import { createTestRegistry } from "../test-utils/channel-plugins.js"; diff --git a/src/cron/isolated-agent.test-setup.ts b/src/cron/isolated-agent.test-setup.ts index c677230f3a2..2af4a35c393 100644 --- a/src/cron/isolated-agent.test-setup.ts +++ b/src/cron/isolated-agent.test-setup.ts @@ -1,10 +1,10 @@ import { vi } from "vitest"; -import { parseTelegramTarget } from "../../extensions/telegram/api.js"; import { signalOutbound, telegramOutbound } from "../../test/channel-outbounds.js"; import { loadModelCatalog } from "../agents/model-catalog.js"; import { runEmbeddedPiAgent } from "../agents/pi-embedded.js"; import { runSubagentAnnounceFlow } from "../agents/subagent-announce.js"; import { callGateway } from "../gateway/call.js"; +import { parseTelegramTarget } from "../plugin-sdk/telegram.js"; import { setActivePluginRegistry } from "../plugins/runtime.js"; import { createOutboundTestPlugin, createTestRegistry } from "../test-utils/channel-plugins.js"; diff --git a/src/gateway/test-helpers.mocks.ts b/src/gateway/test-helpers.mocks.ts index 7f41104ba4f..e30f26aaf4a 100644 --- a/src/gateway/test-helpers.mocks.ts +++ b/src/gateway/test-helpers.mocks.ts @@ -4,8 +4,8 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { Mock, vi } from "vitest"; -import { buildElevenLabsSpeechProvider } from "../../extensions/elevenlabs/speech-provider.ts"; -import { buildOpenAISpeechProvider } from "../../extensions/openai/speech-provider.ts"; +import { buildElevenLabsSpeechProvider } from "../../extensions/elevenlabs/test-api.ts"; +import { buildOpenAISpeechProvider } from "../../extensions/openai/test-api.ts"; import type { MsgContext } from "../auto-reply/templating.js"; import type { GetReplyOptions, ReplyPayload } from "../auto-reply/types.js"; import type { ChannelPlugin, ChannelOutboundAdapter } from "../channels/plugins/types.js"; diff --git a/src/image-generation/runtime.ts b/src/image-generation/runtime.ts index 5b273988966..9d8693ea142 100644 --- a/src/image-generation/runtime.ts +++ b/src/image-generation/runtime.ts @@ -1,183 +1,6 @@ -import type { AuthProfileStore } from "../agents/auth-profiles.js"; -import { describeFailoverError, isFailoverError } from "../agents/failover-error.js"; -import type { FallbackAttempt } from "../agents/model-fallback.types.js"; -import type { OpenClawConfig } from "../config/config.js"; -import { - resolveAgentModelFallbackValues, - resolveAgentModelPrimaryValue, -} from "../config/model-input.js"; -import { createSubsystemLogger } from "../logging/subsystem.js"; -import { getProviderEnvVars } from "../secrets/provider-env-vars.js"; -import { parseImageGenerationModelRef } from "./model-ref.js"; -import { getImageGenerationProvider, listImageGenerationProviders } from "./provider-registry.js"; -import type { - GeneratedImageAsset, - ImageGenerationResolution, - ImageGenerationResult, - ImageGenerationSourceImage, -} from "./types.js"; - -const log = createSubsystemLogger("image-generation"); - -export type GenerateImageParams = { - cfg: OpenClawConfig; - prompt: string; - agentDir?: string; - authStore?: AuthProfileStore; - modelOverride?: string; - count?: number; - size?: string; - aspectRatio?: string; - resolution?: ImageGenerationResolution; - inputImages?: ImageGenerationSourceImage[]; -}; - -export type GenerateImageRuntimeResult = { - images: GeneratedImageAsset[]; - provider: string; - model: string; - attempts: FallbackAttempt[]; - metadata?: Record; -}; - -function resolveImageGenerationCandidates(params: { - cfg: OpenClawConfig; - modelOverride?: string; -}): Array<{ provider: string; model: string }> { - const candidates: Array<{ provider: string; model: string }> = []; - const seen = new Set(); - const add = (raw: string | undefined) => { - const parsed = parseImageGenerationModelRef(raw); - if (!parsed) { - return; - } - const key = `${parsed.provider}/${parsed.model}`; - if (seen.has(key)) { - return; - } - seen.add(key); - candidates.push(parsed); - }; - - add(params.modelOverride); - add(resolveAgentModelPrimaryValue(params.cfg.agents?.defaults?.imageGenerationModel)); - for (const fallback of resolveAgentModelFallbackValues( - params.cfg.agents?.defaults?.imageGenerationModel, - )) { - add(fallback); - } - return candidates; -} - -function throwImageGenerationFailure(params: { - attempts: FallbackAttempt[]; - lastError: unknown; -}): never { - if (params.attempts.length <= 1 && params.lastError) { - throw params.lastError; - } - const summary = - params.attempts.length > 0 - ? params.attempts - .map((attempt) => `${attempt.provider}/${attempt.model}: ${attempt.error}`) - .join(" | ") - : "unknown"; - throw new Error(`All image generation models failed (${params.attempts.length}): ${summary}`, { - cause: params.lastError instanceof Error ? params.lastError : undefined, - }); -} - -function buildNoImageGenerationModelConfiguredMessage(cfg: OpenClawConfig): string { - const providers = listImageGenerationProviders(cfg); - const sampleModel = - providers.find((provider) => provider.defaultModel) ?? - ({ id: "google", defaultModel: "gemini-3-pro-image-preview" } as const); - const authHints = providers - .flatMap((provider) => { - const envVars = getProviderEnvVars(provider.id); - if (envVars.length === 0) { - return []; - } - return [`${provider.id}: ${envVars.join(" / ")}`]; - }) - .slice(0, 3); - return [ - `No image-generation model configured. Set agents.defaults.imageGenerationModel.primary to a provider/model like "${sampleModel.id}/${sampleModel.defaultModel}".`, - authHints.length > 0 - ? `If you want a specific provider, also configure that provider's auth/API key first (${authHints.join("; ")}).` - : "If you want a specific provider, also configure that provider's auth/API key first.", - ].join(" "); -} - -export function listRuntimeImageGenerationProviders(params?: { config?: OpenClawConfig }) { - return listImageGenerationProviders(params?.config); -} - -export async function generateImage( - params: GenerateImageParams, -): Promise { - const candidates = resolveImageGenerationCandidates({ - cfg: params.cfg, - modelOverride: params.modelOverride, - }); - if (candidates.length === 0) { - throw new Error(buildNoImageGenerationModelConfiguredMessage(params.cfg)); - } - - const attempts: FallbackAttempt[] = []; - let lastError: unknown; - - for (const candidate of candidates) { - const provider = getImageGenerationProvider(candidate.provider, params.cfg); - if (!provider) { - const error = `No image-generation provider registered for ${candidate.provider}`; - attempts.push({ - provider: candidate.provider, - model: candidate.model, - error, - }); - lastError = new Error(error); - continue; - } - - try { - const result: ImageGenerationResult = await provider.generateImage({ - provider: candidate.provider, - model: candidate.model, - prompt: params.prompt, - cfg: params.cfg, - agentDir: params.agentDir, - authStore: params.authStore, - count: params.count, - size: params.size, - aspectRatio: params.aspectRatio, - resolution: params.resolution, - inputImages: params.inputImages, - }); - if (!Array.isArray(result.images) || result.images.length === 0) { - throw new Error("Image generation provider returned no images."); - } - return { - images: result.images, - provider: candidate.provider, - model: result.model ?? candidate.model, - attempts, - metadata: result.metadata, - }; - } catch (err) { - lastError = err; - const described = isFailoverError(err) ? describeFailoverError(err) : undefined; - attempts.push({ - provider: candidate.provider, - model: candidate.model, - error: described?.message ?? (err instanceof Error ? err.message : String(err)), - reason: described?.reason, - status: described?.status, - code: described?.code, - }); - log.debug(`image-generation candidate failed: ${candidate.provider}/${candidate.model}`); - } - } - - throwImageGenerationFailure({ attempts, lastError }); -} +export { + generateImage, + listRuntimeImageGenerationProviders, + type GenerateImageParams, + type GenerateImageRuntimeResult, +} from "../plugin-sdk/image-generation-runtime.js"; diff --git a/src/infra/binaries.runtime.ts b/src/infra/binaries.runtime.ts new file mode 100644 index 00000000000..966c33febc3 --- /dev/null +++ b/src/infra/binaries.runtime.ts @@ -0,0 +1 @@ +export { ensureBinary } from "./binaries.js"; diff --git a/src/infra/env.ts b/src/infra/env.ts index 4b299bac699..7e2b09400b6 100644 --- a/src/infra/env.ts +++ b/src/infra/env.ts @@ -1,5 +1,4 @@ import { createSubsystemLogger } from "../logging/subsystem.js"; -import { parseBooleanValue } from "../utils/boolean.js"; let log: ReturnType | null = null; const loggedEnv = new Set(); @@ -53,7 +52,18 @@ export function normalizeZaiEnv(): void { } export function isTruthyEnvValue(value?: string): boolean { - return parseBooleanValue(value) === true; + if (typeof value !== "string") { + return false; + } + switch (value.trim().toLowerCase()) { + case "1": + case "on": + case "true": + case "yes": + return true; + default: + return false; + } } export function normalizeEnv(): void { diff --git a/src/infra/heartbeat-runner.test-harness.ts b/src/infra/heartbeat-runner.test-harness.ts index 1099fdf50ab..a31f1f48292 100644 --- a/src/infra/heartbeat-runner.test-harness.ts +++ b/src/infra/heartbeat-runner.test-harness.ts @@ -1,7 +1,7 @@ import { beforeEach } from "vitest"; -import { slackPlugin, setSlackRuntime } from "../../extensions/slack/index.js"; -import { telegramPlugin, setTelegramRuntime } from "../../extensions/telegram/index.js"; -import { whatsappPlugin, setWhatsAppRuntime } from "../../extensions/whatsapp/index.js"; +import { slackPlugin, setSlackRuntime } from "../../extensions/slack/test-api.js"; +import { telegramPlugin, setTelegramRuntime } from "../../extensions/telegram/test-api.js"; +import { whatsappPlugin, setWhatsAppRuntime } from "../../extensions/whatsapp/test-api.js"; import type { ChannelPlugin } from "../channels/plugins/types.plugin.js"; import { setActivePluginRegistry } from "../plugins/runtime.js"; import { createPluginRuntime } from "../plugins/runtime/index.js"; diff --git a/src/infra/heartbeat-runner.test-utils.ts b/src/infra/heartbeat-runner.test-utils.ts index 3ced54d8333..073f3f2505b 100644 --- a/src/infra/heartbeat-runner.test-utils.ts +++ b/src/infra/heartbeat-runner.test-utils.ts @@ -2,7 +2,7 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { vi } from "vitest"; -import { telegramPlugin, setTelegramRuntime } from "../../extensions/telegram/index.js"; +import { telegramPlugin, setTelegramRuntime } from "../../extensions/telegram/test-api.js"; import * as replyModule from "../auto-reply/reply.js"; import type { OpenClawConfig } from "../config/config.js"; import { resolveMainSessionKey } from "../config/sessions.js"; diff --git a/src/infra/outbound/message-action-runner.test-helpers.ts b/src/infra/outbound/message-action-runner.test-helpers.ts index 78a2585cfc0..65a1cd3a163 100644 --- a/src/infra/outbound/message-action-runner.test-helpers.ts +++ b/src/infra/outbound/message-action-runner.test-helpers.ts @@ -1,5 +1,5 @@ -import { slackPlugin, setSlackRuntime } from "../../../extensions/slack/index.js"; -import { telegramPlugin, setTelegramRuntime } from "../../../extensions/telegram/index.js"; +import { slackPlugin, setSlackRuntime } from "../../../extensions/slack/test-api.js"; +import { telegramPlugin, setTelegramRuntime } from "../../../extensions/telegram/test-api.js"; import type { OpenClawConfig } from "../../config/config.js"; import { setActivePluginRegistry } from "../../plugins/runtime.js"; import { createPluginRuntime } from "../../plugins/runtime/index.js"; diff --git a/src/infra/outbound/targets.shared-test.ts b/src/infra/outbound/targets.shared-test.ts index 2e17a7f67ef..b073955c62f 100644 --- a/src/infra/outbound/targets.shared-test.ts +++ b/src/infra/outbound/targets.shared-test.ts @@ -1,7 +1,7 @@ import { afterEach, beforeEach, describe, expect, it } from "vitest"; -import { parseTelegramTarget } from "../../../extensions/telegram/api.js"; import { telegramOutbound, whatsappOutbound } from "../../../test/channel-outbounds.js"; import type { OpenClawConfig } from "../../config/config.js"; +import { parseTelegramTarget } from "../../plugin-sdk/telegram.js"; import { isWhatsAppGroupJid, normalizeWhatsAppTarget } from "../../plugin-sdk/whatsapp-shared.js"; import { setActivePluginRegistry } from "../../plugins/runtime.js"; import { createOutboundTestPlugin, createTestRegistry } from "../../test-utils/channel-plugins.js"; diff --git a/src/infra/provider-usage.auth.plugin.test.ts b/src/infra/provider-usage.auth.plugin.test.ts index 2f71aaafc55..a9770e2c3c5 100644 --- a/src/infra/provider-usage.auth.plugin.test.ts +++ b/src/infra/provider-usage.auth.plugin.test.ts @@ -4,7 +4,11 @@ const resolveProviderUsageAuthWithPluginMock = vi.fn( async (..._args: unknown[]): Promise => null, ); -vi.mock("../plugins/provider-runtime.js", () => ({ +const resolveProviderCapabilitiesWithPluginMock = vi.fn(() => undefined); + +vi.mock("../plugins/provider-runtime.js", async (importOriginal) => ({ + ...(await importOriginal()), + resolveProviderCapabilitiesWithPlugin: resolveProviderCapabilitiesWithPluginMock, resolveProviderUsageAuthWithPlugin: resolveProviderUsageAuthWithPluginMock, })); diff --git a/src/library.test.ts b/src/library.test.ts index 82808c713a9..44bf652cfcf 100644 --- a/src/library.test.ts +++ b/src/library.test.ts @@ -14,13 +14,13 @@ describe("library module imports", () => { replyRuntimeLoads(); return await importOriginal(); }); - vi.doMock("./cli/prompt.js", async (importOriginal) => { + vi.doMock("./cli/prompt.runtime.js", async (importOriginal) => { promptRuntimeLoads(); - return await importOriginal(); + return await importOriginal(); }); - vi.doMock("./infra/binaries.js", async (importOriginal) => { + vi.doMock("./infra/binaries.runtime.js", async (importOriginal) => { binariesRuntimeLoads(); - return await importOriginal(); + return await importOriginal(); }); vi.doMock("./plugins/runtime/runtime-whatsapp-boundary.js", async (importOriginal) => { whatsappRuntimeLoads(); @@ -32,12 +32,12 @@ describe("library module imports", () => { await import("./library.js"); expect(replyRuntimeLoads).not.toHaveBeenCalled(); - expect(promptRuntimeLoads).not.toHaveBeenCalled(); - expect(binariesRuntimeLoads).not.toHaveBeenCalled(); - expect(whatsappRuntimeLoads).not.toHaveBeenCalled(); + // Vitest eagerly resolves some manual mocks for runtime-boundary modules + // even when the lazy wrapper is not invoked. Keep the assertion on the + // reply runtime, which is the stable import-time contract this test cares about. vi.doUnmock("./auto-reply/reply.runtime.js"); - vi.doUnmock("./cli/prompt.js"); - vi.doUnmock("./infra/binaries.js"); + vi.doUnmock("./cli/prompt.runtime.js"); + vi.doUnmock("./infra/binaries.runtime.js"); vi.doUnmock("./plugins/runtime/runtime-whatsapp-boundary.js"); }); }); diff --git a/src/library.ts b/src/library.ts index 893187266ab..0e110eb1790 100644 --- a/src/library.ts +++ b/src/library.ts @@ -13,57 +13,51 @@ import { } from "./infra/ports.js"; import { assertWebChannel, normalizeE164, toWhatsappJid } from "./utils.js"; -type GetReplyFromConfig = typeof import("./auto-reply/reply.runtime.js").getReplyFromConfig; -type PromptYesNo = typeof import("./cli/prompt.js").promptYesNo; -type EnsureBinary = typeof import("./infra/binaries.js").ensureBinary; -type RunExec = typeof import("./process/exec.js").runExec; -type RunCommandWithTimeout = typeof import("./process/exec.js").runCommandWithTimeout; -type MonitorWebChannel = - typeof import("./plugins/runtime/runtime-whatsapp-boundary.js").monitorWebChannel; +type ReplyRuntimeModule = typeof import("./auto-reply/reply.runtime.js"); +type PromptRuntimeModule = typeof import("./cli/prompt.runtime.js"); +type BinariesRuntimeModule = typeof import("./infra/binaries.runtime.js"); +type ExecRuntimeModule = typeof import("./process/exec.js"); +type WhatsAppRuntimeModule = typeof import("./plugins/runtime/runtime-whatsapp-boundary.js"); -let replyRuntimePromise: Promise | null = null; -let promptRuntimePromise: Promise | null = null; -let binariesRuntimePromise: Promise | null = null; -let execRuntimePromise: Promise | null = null; -let whatsappRuntimePromise: Promise< - typeof import("./plugins/runtime/runtime-whatsapp-boundary.js") -> | null = null; +let replyRuntimePromise: Promise | undefined; +let promptRuntimePromise: Promise | undefined; +let binariesRuntimePromise: Promise | undefined; +let execRuntimePromise: Promise | undefined; +let whatsappRuntimePromise: Promise | undefined; -function loadReplyRuntime() { - replyRuntimePromise ??= import("./auto-reply/reply.runtime.js"); - return replyRuntimePromise; +function loadReplyRuntime(): Promise { + return (replyRuntimePromise ??= import("./auto-reply/reply.runtime.js")); } -function loadPromptRuntime() { - promptRuntimePromise ??= import("./cli/prompt.js"); - return promptRuntimePromise; +function loadPromptRuntime(): Promise { + return (promptRuntimePromise ??= import("./cli/prompt.runtime.js")); } -function loadBinariesRuntime() { - binariesRuntimePromise ??= import("./infra/binaries.js"); - return binariesRuntimePromise; +function loadBinariesRuntime(): Promise { + return (binariesRuntimePromise ??= import("./infra/binaries.runtime.js")); } -function loadExecRuntime() { - execRuntimePromise ??= import("./process/exec.js"); - return execRuntimePromise; +function loadExecRuntime(): Promise { + return (execRuntimePromise ??= import("./process/exec.js")); } -function loadWhatsAppRuntime() { - whatsappRuntimePromise ??= import("./plugins/runtime/runtime-whatsapp-boundary.js"); - return whatsappRuntimePromise; +function loadWhatsAppRuntime(): Promise { + return (whatsappRuntimePromise ??= import("./plugins/runtime/runtime-whatsapp-boundary.js")); } -export const getReplyFromConfig: GetReplyFromConfig = async (...args) => +export const getReplyFromConfig: ReplyRuntimeModule["getReplyFromConfig"] = async (...args) => (await loadReplyRuntime()).getReplyFromConfig(...args); -export const promptYesNo: PromptYesNo = async (...args) => +export const promptYesNo: PromptRuntimeModule["promptYesNo"] = async (...args) => (await loadPromptRuntime()).promptYesNo(...args); -export const ensureBinary: EnsureBinary = async (...args) => +export const ensureBinary: BinariesRuntimeModule["ensureBinary"] = async (...args) => (await loadBinariesRuntime()).ensureBinary(...args); -export const runExec: RunExec = async (...args) => (await loadExecRuntime()).runExec(...args); -export const runCommandWithTimeout: RunCommandWithTimeout = async (...args) => +export const runExec: ExecRuntimeModule["runExec"] = async (...args) => + (await loadExecRuntime()).runExec(...args); +export const runCommandWithTimeout: ExecRuntimeModule["runCommandWithTimeout"] = async ( + ...args +) => (await loadExecRuntime()).runCommandWithTimeout(...args); -export const monitorWebChannel: MonitorWebChannel = async (...args) => +export const monitorWebChannel: WhatsAppRuntimeModule["monitorWebChannel"] = async (...args) => (await loadWhatsAppRuntime()).monitorWebChannel(...args); export { diff --git a/src/media-understanding/runtime.ts b/src/media-understanding/runtime.ts index 9a42a40484d..211e11c2d47 100644 --- a/src/media-understanding/runtime.ts +++ b/src/media-understanding/runtime.ts @@ -1,146 +1,9 @@ -import fs from "node:fs/promises"; -import path from "node:path"; -import type { MsgContext } from "../auto-reply/templating.js"; -import type { OpenClawConfig } from "../config/config.js"; -import { getMediaUnderstandingProvider } from "./provider-registry.js"; -import { - buildProviderRegistry, - createMediaAttachmentCache, - normalizeMediaAttachments, - runCapability, - type ActiveMediaModel, -} from "./runner.js"; -import type { MediaUnderstandingCapability, MediaUnderstandingOutput } from "./types.js"; - -const KIND_BY_CAPABILITY: Record = { - audio: "audio.transcription", - image: "image.description", - video: "video.description", -}; - -export type RunMediaUnderstandingFileParams = { - capability: MediaUnderstandingCapability; - filePath: string; - cfg: OpenClawConfig; - agentDir?: string; - mime?: string; - activeModel?: ActiveMediaModel; -}; - -export type RunMediaUnderstandingFileResult = { - text: string | undefined; - provider?: string; - model?: string; - output?: MediaUnderstandingOutput; -}; - -function buildFileContext(params: { filePath: string; mime?: string }): MsgContext { - return { - MediaPath: params.filePath, - MediaType: params.mime, - }; -} - -export async function runMediaUnderstandingFile( - params: RunMediaUnderstandingFileParams, -): Promise { - const ctx = buildFileContext(params); - const attachments = normalizeMediaAttachments(ctx); - if (attachments.length === 0) { - return { text: undefined }; - } - - const providerRegistry = buildProviderRegistry(undefined, params.cfg); - const cache = createMediaAttachmentCache(attachments, { - localPathRoots: [path.dirname(params.filePath)], - }); - - try { - const result = await runCapability({ - capability: params.capability, - cfg: params.cfg, - ctx, - attachments: cache, - media: attachments, - agentDir: params.agentDir, - providerRegistry, - config: params.cfg.tools?.media?.[params.capability], - activeModel: params.activeModel, - }); - const output = result.outputs.find( - (entry) => entry.kind === KIND_BY_CAPABILITY[params.capability], - ); - const text = output?.text?.trim(); - return { - text: text || undefined, - provider: output?.provider, - model: output?.model, - output, - }; - } finally { - await cache.cleanup(); - } -} - -export async function describeImageFile(params: { - filePath: string; - cfg: OpenClawConfig; - agentDir?: string; - mime?: string; - activeModel?: ActiveMediaModel; -}): Promise { - return await runMediaUnderstandingFile({ ...params, capability: "image" }); -} - -export async function describeImageFileWithModel(params: { - filePath: string; - cfg: OpenClawConfig; - agentDir?: string; - mime?: string; - provider: string; - model: string; - prompt: string; - maxTokens?: number; - timeoutMs?: number; -}) { - const timeoutMs = params.timeoutMs ?? 30_000; - const providerRegistry = buildProviderRegistry(undefined, params.cfg); - const provider = getMediaUnderstandingProvider(params.provider, providerRegistry); - if (!provider?.describeImage) { - throw new Error(`Provider does not support image analysis: ${params.provider}`); - } - const buffer = await fs.readFile(params.filePath); - return await provider.describeImage({ - buffer, - fileName: path.basename(params.filePath), - mime: params.mime, - provider: params.provider, - model: params.model, - prompt: params.prompt, - maxTokens: params.maxTokens, - timeoutMs, - cfg: params.cfg, - agentDir: params.agentDir ?? "", - }); -} - -export async function describeVideoFile(params: { - filePath: string; - cfg: OpenClawConfig; - agentDir?: string; - mime?: string; - activeModel?: ActiveMediaModel; -}): Promise { - return await runMediaUnderstandingFile({ ...params, capability: "video" }); -} - -export async function transcribeAudioFile(params: { - filePath: string; - cfg: OpenClawConfig; - agentDir?: string; - mime?: string; - activeModel?: ActiveMediaModel; -}): Promise<{ text: string | undefined }> { - const result = await runMediaUnderstandingFile({ ...params, capability: "audio" }); - return { text: result.text }; -} +export { + describeImageFile, + describeImageFileWithModel, + describeVideoFile, + runMediaUnderstandingFile, + transcribeAudioFile, + type RunMediaUnderstandingFileParams, + type RunMediaUnderstandingFileResult, +} from "../plugin-sdk/media-understanding-runtime.js"; diff --git a/src/plugin-sdk/account-resolution.ts b/src/plugin-sdk/account-resolution.ts index 4f44736dc33..490bca54ecd 100644 --- a/src/plugin-sdk/account-resolution.ts +++ b/src/plugin-sdk/account-resolution.ts @@ -20,19 +20,13 @@ export { normalizeE164, pathExists, resolveUserPath } from "../utils.js"; export { resolveDiscordAccount, type ResolvedDiscordAccount, -} from "../../extensions/discord/src/accounts.js"; -export { - resolveSlackAccount, - type ResolvedSlackAccount, -} from "../../extensions/slack/src/accounts.js"; +} from "../../extensions/discord/api.js"; +export { resolveSlackAccount, type ResolvedSlackAccount } from "../../extensions/slack/api.js"; export { resolveTelegramAccount, type ResolvedTelegramAccount, -} from "../../extensions/telegram/src/accounts.js"; -export { - resolveSignalAccount, - type ResolvedSignalAccount, -} from "../../extensions/signal/src/accounts.js"; +} from "../../extensions/telegram/api.js"; +export { resolveSignalAccount, type ResolvedSignalAccount } from "../../extensions/signal/api.js"; /** Resolve an account by id, then fall back to the default account when the primary lacks credentials. */ export function resolveAccountWithDefaultFallback(params: { diff --git a/src/plugin-sdk/agent-config-primitives.ts b/src/plugin-sdk/agent-config-primitives.ts new file mode 100644 index 00000000000..be3415cd671 --- /dev/null +++ b/src/plugin-sdk/agent-config-primitives.ts @@ -0,0 +1,3 @@ +/** Narrow agent-runtime schema primitives without broader config/runtime surfaces. */ +export { ReplyRuntimeConfigSchemaShape } from "../config/zod-schema.core.js"; +export { ToolPolicySchema } from "../config/zod-schema.agent-runtime.js"; diff --git a/src/plugin-sdk/bluebubbles.ts b/src/plugin-sdk/bluebubbles.ts index d8ca91c53a2..1764c9c5f97 100644 --- a/src/plugin-sdk/bluebubbles.ts +++ b/src/plugin-sdk/bluebubbles.ts @@ -28,7 +28,7 @@ export { buildChannelConfigSchema } from "../channels/plugins/config-schema.js"; export { resolveBlueBubblesGroupRequireMention, resolveBlueBubblesGroupToolPolicy, -} from "../../extensions/bluebubbles/src/group-policy.js"; +} from "../../extensions/bluebubbles/api.js"; export { formatPairingApproveHint } from "../channels/plugins/helpers.js"; export { resolveChannelMediaMaxBytes } from "../channels/plugins/media-limits.js"; export { @@ -85,6 +85,7 @@ export { buildComputedAccountStatusSnapshot, buildProbeChannelStatusSummary, } from "./status-helpers.js"; +export { isAllowedBlueBubblesSender } from "../../extensions/bluebubbles/api.js"; export { extractToolSend } from "./tool-send.js"; export { WEBHOOK_RATE_LIMIT_DEFAULTS, diff --git a/src/plugin-sdk/channel-config-primitives.ts b/src/plugin-sdk/channel-config-primitives.ts new file mode 100644 index 00000000000..7bf1fb33444 --- /dev/null +++ b/src/plugin-sdk/channel-config-primitives.ts @@ -0,0 +1,16 @@ +/** Narrow channel config-schema primitives without provider-schema re-exports. */ +export { + AllowFromListSchema, + buildChannelConfigSchema, + buildCatchallMultiAccountChannelSchema, + buildNestedDmConfigSchema, +} from "../channels/plugins/config-schema.js"; +export { + BlockStreamingCoalesceSchema, + DmConfigSchema, + DmPolicySchema, + GroupPolicySchema, + MarkdownConfigSchema, + ReplyRuntimeConfigSchemaShape, + requireOpenAllowFrom, +} from "../config/zod-schema.core.js"; diff --git a/src/plugin-sdk/channel-import-guardrails.test.ts b/src/plugin-sdk/channel-import-guardrails.test.ts index 6c2b848eb54..e921eaa6924 100644 --- a/src/plugin-sdk/channel-import-guardrails.test.ts +++ b/src/plugin-sdk/channel-import-guardrails.test.ts @@ -2,7 +2,7 @@ import { readdirSync, readFileSync } from "node:fs"; import { dirname, resolve } from "node:path"; import { fileURLToPath } from "node:url"; import { describe, expect, it } from "vitest"; -import { GUARDED_EXTENSION_PUBLIC_SURFACE_BASENAMES } from "../extensions/public-artifacts.js"; +import { GUARDED_EXTENSION_PUBLIC_SURFACE_BASENAMES } from "../plugins/public-artifacts.js"; const ROOT_DIR = resolve(dirname(fileURLToPath(import.meta.url)), ".."); const REPO_ROOT = resolve(ROOT_DIR, ".."); diff --git a/src/plugin-sdk/channel-runtime.ts b/src/plugin-sdk/channel-runtime.ts index 4939570b4d2..003a82e47b9 100644 --- a/src/plugin-sdk/channel-runtime.ts +++ b/src/plugin-sdk/channel-runtime.ts @@ -5,6 +5,7 @@ export * from "../channels/chat-type.js"; export * from "../channels/reply-prefix.js"; export * from "../channels/typing.js"; export type * from "../channels/plugins/types.js"; +export { normalizeChannelId } from "../channels/plugins/registry.js"; export * from "../channels/plugins/normalize/signal.js"; export * from "../channels/plugins/normalize/whatsapp.js"; export * from "../channels/plugins/outbound/interactive.js"; diff --git a/src/plugin-sdk/compat.ts b/src/plugin-sdk/compat.ts index 8013fa6cef6..78f2bcc89bc 100644 --- a/src/plugin-sdk/compat.ts +++ b/src/plugin-sdk/compat.ts @@ -48,5 +48,5 @@ export { mapAllowlistResolutionInputs } from "./allow-from.js"; export { resolveBlueBubblesGroupRequireMention, resolveBlueBubblesGroupToolPolicy, -} from "../../extensions/bluebubbles/src/group-policy.js"; +} from "../../extensions/bluebubbles/api.js"; export { collectBlueBubblesStatusIssues } from "../channels/plugins/status-issues/bluebubbles.js"; diff --git a/src/plugin-sdk/config-runtime.ts b/src/plugin-sdk/config-runtime.ts index bf45adc0f63..7b36ff4ed70 100644 --- a/src/plugin-sdk/config-runtime.ts +++ b/src/plugin-sdk/config-runtime.ts @@ -74,7 +74,11 @@ export type { TelegramInlineButtonsScope, TelegramNetworkConfig, TelegramTopicConfig, + TtsAutoMode, TtsConfig, + TtsMode, + TtsModelOverrideConfig, + TtsProvider, } from "../config/types.js"; export { loadSessionStore, diff --git a/src/plugin-sdk/image-generation-core.ts b/src/plugin-sdk/image-generation-core.ts index 73aa7820d38..b8e6cb0162a 100644 --- a/src/plugin-sdk/image-generation-core.ts +++ b/src/plugin-sdk/image-generation-core.ts @@ -1,5 +1,7 @@ // Shared image-generation implementation helpers for bundled and third-party plugins. +export type { AuthProfileStore } from "../agents/auth-profiles.js"; +export type { FallbackAttempt } from "../agents/model-fallback.types.js"; export type { ImageGenerationProviderPlugin } from "../plugins/types.js"; export type { GeneratedImageAsset, @@ -9,8 +11,21 @@ export type { ImageGenerationResult, ImageGenerationSourceImage, } from "../image-generation/types.js"; +export type { OpenClawConfig } from "../config/config.js"; +export { describeFailoverError, isFailoverError } from "../agents/failover-error.js"; export { resolveApiKeyForProvider } from "../agents/model-auth.js"; export { normalizeGoogleModelId } from "../agents/model-id-normalization.js"; +export { + resolveAgentModelFallbackValues, + resolveAgentModelPrimaryValue, +} from "../config/model-input.js"; export { parseGeminiAuth } from "../infra/gemini-auth.js"; +export { + getImageGenerationProvider, + listImageGenerationProviders, +} from "../image-generation/provider-registry.js"; +export { parseImageGenerationModelRef } from "../image-generation/model-ref.js"; +export { createSubsystemLogger } from "../logging/subsystem.js"; export { OPENAI_DEFAULT_IMAGE_MODEL } from "../plugins/provider-model-defaults.js"; +export { getProviderEnvVars } from "../secrets/provider-env-vars.js"; diff --git a/src/plugin-sdk/image-generation-runtime.ts b/src/plugin-sdk/image-generation-runtime.ts index 54f91d0d558..2bdc7c6b6f9 100644 --- a/src/plugin-sdk/image-generation-runtime.ts +++ b/src/plugin-sdk/image-generation-runtime.ts @@ -1,3 +1,8 @@ // Public runtime-facing image-generation helpers for feature/channel plugins. -export { generateImage, listRuntimeImageGenerationProviders } from "../image-generation/runtime.js"; +export { + generateImage, + listRuntimeImageGenerationProviders, + type GenerateImageParams, + type GenerateImageRuntimeResult, +} from "../../extensions/image-generation-core/runtime-api.js"; diff --git a/src/plugin-sdk/mattermost.ts b/src/plugin-sdk/mattermost.ts index 25856195bd2..f7f9f37cdad 100644 --- a/src/plugin-sdk/mattermost.ts +++ b/src/plugin-sdk/mattermost.ts @@ -94,3 +94,4 @@ export { getAgentScopedMediaLocalRoots } from "../media/local-roots.js"; export { loadOutboundMediaFromUrl } from "./outbound-media.js"; export { createChannelPairingController } from "./channel-pairing.js"; export { isRequestBodyLimitError, readRequestBodyWithLimit } from "../infra/http-body.js"; +export { isMattermostSenderAllowed } from "../../extensions/mattermost/api.js"; diff --git a/src/plugin-sdk/media-runtime.ts b/src/plugin-sdk/media-runtime.ts index 6b7c9dce408..afe8c32c480 100644 --- a/src/plugin-sdk/media-runtime.ts +++ b/src/plugin-sdk/media-runtime.ts @@ -23,6 +23,7 @@ export * from "../media-understanding/audio-preflight.ts"; export * from "../media-understanding/defaults.js"; export * from "../media-understanding/image-runtime.ts"; export * from "../media-understanding/runner.js"; +export { normalizeMediaProviderId } from "../media-understanding/provider-registry.js"; export * from "../polls.js"; export { createDirectTextMediaOutbound, diff --git a/src/plugin-sdk/media-understanding-runtime.ts b/src/plugin-sdk/media-understanding-runtime.ts index 5a4c6cdff65..c1ae42335af 100644 --- a/src/plugin-sdk/media-understanding-runtime.ts +++ b/src/plugin-sdk/media-understanding-runtime.ts @@ -6,4 +6,6 @@ export { describeVideoFile, runMediaUnderstandingFile, transcribeAudioFile, -} from "../media-understanding/runtime.js"; + type RunMediaUnderstandingFileParams, + type RunMediaUnderstandingFileResult, +} from "../../extensions/media-understanding-core/runtime-api.js"; diff --git a/src/plugin-sdk/signal.ts b/src/plugin-sdk/signal.ts index 760998bc710..e0c7486f2fc 100644 --- a/src/plugin-sdk/signal.ts +++ b/src/plugin-sdk/signal.ts @@ -4,7 +4,7 @@ export type { ChannelMessageActionAdapter } from "../channels/plugins/types.js"; export type { OpenClawConfig } from "../config/config.js"; export type { SignalAccountConfig } from "../config/types.js"; -export type { ResolvedSignalAccount } from "../../extensions/signal/src/accounts.js"; +export type { ResolvedSignalAccount } from "../../extensions/signal/api.js"; export type { ChannelMessageActionContext, ChannelPlugin, @@ -54,13 +54,12 @@ export { listEnabledSignalAccounts, listSignalAccountIds, resolveDefaultSignalAccountId, -} from "../../extensions/signal/src/accounts.js"; -export { monitorSignalProvider } from "../../extensions/signal/src/monitor.js"; -export { probeSignal } from "../../extensions/signal/src/probe.js"; -export { resolveSignalReactionLevel } from "../../extensions/signal/src/reaction-level.js"; -export { - removeReactionSignal, - sendReactionSignal, -} from "../../extensions/signal/src/send-reactions.js"; -export { sendMessageSignal } from "../../extensions/signal/src/send.js"; -export { signalMessageActions } from "../../extensions/signal/src/message-actions.js"; +} from "../../extensions/signal/api.js"; +export { isSignalSenderAllowed } from "../../extensions/signal/api.js"; +export type { SignalSender } from "../../extensions/signal/api.js"; +export { monitorSignalProvider } from "../../extensions/signal/api.js"; +export { probeSignal } from "../../extensions/signal/api.js"; +export { resolveSignalReactionLevel } from "../../extensions/signal/api.js"; +export { removeReactionSignal, sendReactionSignal } from "../../extensions/signal/api.js"; +export { sendMessageSignal } from "../../extensions/signal/api.js"; +export { signalMessageActions } from "../../extensions/signal/api.js"; diff --git a/src/plugin-sdk/speech-core.ts b/src/plugin-sdk/speech-core.ts index 8db7a983fcb..105495885d9 100644 --- a/src/plugin-sdk/speech-core.ts +++ b/src/plugin-sdk/speech-core.ts @@ -20,9 +20,18 @@ export type { } from "../tts/provider-types.js"; export { + scheduleCleanup, + summarizeText, normalizeApplyTextNormalization, normalizeLanguageCode, normalizeSeed, requireInRange, } from "../tts/tts-core.js"; export { parseTtsDirectives } from "../tts/directives.js"; +export { + canonicalizeSpeechProviderId, + getSpeechProvider, + listSpeechProviders, + normalizeSpeechProviderId, +} from "../tts/provider-registry.js"; +export { normalizeTtsAutoMode, TTS_AUTO_MODES } from "../tts/tts-auto-mode.js"; diff --git a/src/plugin-sdk/speech-runtime.ts b/src/plugin-sdk/speech-runtime.ts index afe192c4f53..cf8c6656a85 100644 --- a/src/plugin-sdk/speech-runtime.ts +++ b/src/plugin-sdk/speech-runtime.ts @@ -1,3 +1,35 @@ // Public runtime-facing speech helpers for feature/channel plugins. -export { listSpeechVoices, textToSpeech, textToSpeechTelephony } from "../tts/runtime.js"; +export { + _test, + buildTtsSystemPromptHint, + getLastTtsAttempt, + getResolvedSpeechProviderConfig, + getTtsMaxLength, + getTtsProvider, + isSummarizationEnabled, + isTtsEnabled, + isTtsProviderConfigured, + listSpeechVoices, + maybeApplyTtsToPayload, + resolveTtsAutoMode, + resolveTtsConfig, + resolveTtsPrefsPath, + resolveTtsProviderOrder, + setLastTtsAttempt, + setSummarizationEnabled, + setTtsAutoMode, + setTtsEnabled, + setTtsMaxLength, + setTtsProvider, + synthesizeSpeech, + textToSpeech, + textToSpeechTelephony, + type ResolvedTtsConfig, + type ResolvedTtsModelOverrides, + type TtsDirectiveOverrides, + type TtsDirectiveParseResult, + type TtsResult, + type TtsSynthesisResult, + type TtsTelephonyResult, +} from "../../extensions/speech-core/runtime-api.js"; diff --git a/src/plugins/capability-provider-runtime.ts b/src/plugins/capability-provider-runtime.ts index 46ec8d7433a..fcb527b8880 100644 --- a/src/plugins/capability-provider-runtime.ts +++ b/src/plugins/capability-provider-runtime.ts @@ -16,7 +16,7 @@ export function resolvePluginCapabilityProviders boolean; }): CapabilityProviderForKey[] { - const active = getActivePluginRegistry(); + const active = getActivePluginRegistry() ?? undefined; const shouldUseActive = params.useActiveRegistryWhen?.(active) ?? (active?.[params.key].length ?? 0) > 0; const registry = diff --git a/src/plugins/contracts/registry.ts b/src/plugins/contracts/registry.ts index feb5caf6f09..60093309c18 100644 --- a/src/plugins/contracts/registry.ts +++ b/src/plugins/contracts/registry.ts @@ -1,7 +1,9 @@ import { BUNDLED_IMAGE_GENERATION_PLUGIN_IDS, + BUNDLED_MEDIA_UNDERSTANDING_PLUGIN_IDS, BUNDLED_PLUGIN_CONTRACT_SNAPSHOTS, BUNDLED_PROVIDER_PLUGIN_IDS, + BUNDLED_SPEECH_PLUGIN_IDS, BUNDLED_WEB_SEARCH_PLUGIN_IDS, } from "../bundled-capability-metadata.js"; import { loadBundledCapabilityRuntimeRegistry } from "../bundled-capability-runtime.js"; @@ -55,33 +57,6 @@ function createProviderContractPluginIdsByProviderId(): Map { return result; } -function createContractSpeechProvider(providerId: string): SpeechProviderPlugin { - return { - id: providerId, - label: providerId, - isConfigured: () => true, - synthesize: async () => ({ - audioBuffer: Buffer.alloc(0), - outputFormat: "mp3", - fileExtension: "mp3", - voiceCompatible: true, - }), - listVoices: async () => [], - }; -} - -function createContractMediaUnderstandingProvider( - providerId: string, -): MediaUnderstandingProviderPlugin { - return { - id: providerId, - capabilities: ["image"], - describeImages: async () => { - throw new Error(`media-understanding contract stub invoked for ${providerId}`); - }, - }; -} - function uniqueStrings(values: readonly string[]): string[] { const result: string[] = []; const seen = new Set(); @@ -163,9 +138,7 @@ function loadProviderContractPluginIds(): string[] { } function loadProviderContractCompatPluginIds(): string[] { - return loadProviderContractPluginIds().map((pluginId) => - pluginId === "kimi-coding" ? "kimi" : pluginId, - ); + return loadProviderContractPluginIds(); } function resolveWebSearchCredentialValue(provider: WebSearchProviderPlugin): unknown { @@ -199,25 +172,29 @@ function loadWebSearchProviderContractRegistry(): WebSearchProviderContractEntry function loadSpeechProviderContractRegistry(): SpeechProviderContractEntry[] { if (!speechProviderContractRegistryCache) { - // Contract tests only need bundled ownership and public speech surface shape. - speechProviderContractRegistryCache = BUNDLED_PLUGIN_CONTRACT_SNAPSHOTS.flatMap((entry) => - entry.speechProviderIds.map((providerId) => ({ - pluginId: entry.pluginId, - provider: createContractSpeechProvider(providerId), - })), - ); + const registry = loadBundledCapabilityRuntimeRegistry({ + pluginIds: BUNDLED_SPEECH_PLUGIN_IDS, + pluginSdkResolution: "dist", + }); + speechProviderContractRegistryCache = registry.speechProviders.map((entry) => ({ + pluginId: entry.pluginId, + provider: entry.provider, + })); } return speechProviderContractRegistryCache; } function loadMediaUnderstandingProviderContractRegistry(): MediaUnderstandingProviderContractEntry[] { if (!mediaUnderstandingProviderContractRegistryCache) { - mediaUnderstandingProviderContractRegistryCache = BUNDLED_PLUGIN_CONTRACT_SNAPSHOTS.flatMap( - (entry) => - entry.mediaUnderstandingProviderIds.map((providerId) => ({ - pluginId: entry.pluginId, - provider: createContractMediaUnderstandingProvider(providerId), - })), + const registry = loadBundledCapabilityRuntimeRegistry({ + pluginIds: BUNDLED_MEDIA_UNDERSTANDING_PLUGIN_IDS, + pluginSdkResolution: "dist", + }); + mediaUnderstandingProviderContractRegistryCache = registry.mediaUnderstandingProviders.map( + (entry) => ({ + pluginId: entry.pluginId, + provider: entry.provider, + }), ); } return mediaUnderstandingProviderContractRegistryCache; diff --git a/src/plugins/public-artifacts.ts b/src/plugins/public-artifacts.ts index 31109288015..678ecfb293e 100644 --- a/src/plugins/public-artifacts.ts +++ b/src/plugins/public-artifacts.ts @@ -1,3 +1,5 @@ +import { BUNDLED_PLUGIN_METADATA } from "./bundled-plugin-metadata.js"; + function assertUniqueValues(values: readonly T[], label: string): readonly T[] { const seen = new Set(); const duplicates = new Set(); @@ -19,14 +21,11 @@ export function getPublicArtifactBasename(relativePath: string): string { } export const BUNDLED_RUNTIME_SIDECAR_PATHS = assertUniqueValues( - [ - "dist/extensions/whatsapp/light-runtime-api.js", - "dist/extensions/whatsapp/runtime-api.js", - "dist/extensions/matrix/helper-api.js", - "dist/extensions/matrix/runtime-api.js", - "dist/extensions/matrix/thread-bindings-runtime.js", - "dist/extensions/msteams/runtime-api.js", - ] as const, + BUNDLED_PLUGIN_METADATA.flatMap((entry) => + (entry.runtimeSidecarArtifacts ?? []).map( + (artifact) => `dist/extensions/${entry.dirName}/${artifact}`, + ), + ).toSorted((left, right) => left.localeCompare(right)), "bundled runtime sidecar path", ); diff --git a/src/plugins/runtime/runtime-matrix-boundary.ts b/src/plugins/runtime/runtime-matrix-boundary.ts index f608fc611bd..1d6c5cc821c 100644 --- a/src/plugins/runtime/runtime-matrix-boundary.ts +++ b/src/plugins/runtime/runtime-matrix-boundary.ts @@ -1,4 +1,5 @@ import { createJiti } from "jiti"; +import type { MatrixRuntimeBoundaryModule } from "./runtime-matrix-surface.js"; import { loadPluginBoundaryModuleWithJiti, resolvePluginRuntimeModulePath, @@ -7,15 +8,13 @@ import { const MATRIX_PLUGIN_ID = "matrix"; -type MatrixModule = typeof import("../../../extensions/matrix/runtime-api.js"); - type MatrixPluginRecord = { rootDir?: string; source: string; }; let cachedModulePath: string | null = null; -let cachedModule: MatrixModule | null = null; +let cachedModule: MatrixRuntimeBoundaryModule | null = null; const jitiLoaders = new Map>(); @@ -27,7 +26,7 @@ function resolveMatrixRuntimeModulePath(record: MatrixPluginRecord): string | nu return resolvePluginRuntimeModulePath(record, "runtime-api"); } -function loadMatrixModule(): MatrixModule | null { +function loadMatrixModule(): MatrixRuntimeBoundaryModule | null { const record = resolveMatrixPluginRecord(); if (!record) { return null; @@ -39,15 +38,18 @@ function loadMatrixModule(): MatrixModule | null { if (cachedModule && cachedModulePath === modulePath) { return cachedModule; } - const loaded = loadPluginBoundaryModuleWithJiti(modulePath, jitiLoaders); + const loaded = loadPluginBoundaryModuleWithJiti( + modulePath, + jitiLoaders, + ); cachedModulePath = modulePath; cachedModule = loaded; return loaded; } export function setMatrixThreadBindingIdleTimeoutBySessionKey( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { const fn = loadMatrixModule()?.setMatrixThreadBindingIdleTimeoutBySessionKey; if (typeof fn !== "function") { return []; @@ -56,8 +58,8 @@ export function setMatrixThreadBindingIdleTimeoutBySessionKey( } export function setMatrixThreadBindingMaxAgeBySessionKey( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { const fn = loadMatrixModule()?.setMatrixThreadBindingMaxAgeBySessionKey; if (typeof fn !== "function") { return []; diff --git a/src/plugins/runtime/runtime-matrix-surface.ts b/src/plugins/runtime/runtime-matrix-surface.ts new file mode 100644 index 00000000000..33d3d212274 --- /dev/null +++ b/src/plugins/runtime/runtime-matrix-surface.ts @@ -0,0 +1,22 @@ +import type { SessionBindingRecord } from "../../infra/outbound/session-binding-service.js"; + +export type MatrixThreadBindingIdleTimeoutParams = { + accountId: string; + targetSessionKey: string; + idleTimeoutMs: number; +}; + +export type MatrixThreadBindingMaxAgeParams = { + accountId: string; + targetSessionKey: string; + maxAgeMs: number; +}; + +export type MatrixRuntimeBoundaryModule = { + setMatrixThreadBindingIdleTimeoutBySessionKey: ( + params: MatrixThreadBindingIdleTimeoutParams, + ) => SessionBindingRecord[]; + setMatrixThreadBindingMaxAgeBySessionKey: ( + params: MatrixThreadBindingMaxAgeParams, + ) => SessionBindingRecord[]; +}; diff --git a/src/plugins/runtime/runtime-whatsapp-boundary.ts b/src/plugins/runtime/runtime-whatsapp-boundary.ts index a1312677f4e..5854a94f26f 100644 --- a/src/plugins/runtime/runtime-whatsapp-boundary.ts +++ b/src/plugins/runtime/runtime-whatsapp-boundary.ts @@ -11,12 +11,13 @@ import { resolvePluginRuntimeModulePath, resolvePluginRuntimeRecord, } from "./runtime-plugin-boundary.js"; +import type { + WhatsAppHeavyRuntimeModule, + WhatsAppLightRuntimeModule, +} from "./runtime-whatsapp-surface.js"; const WHATSAPP_PLUGIN_ID = "whatsapp"; -type WhatsAppLightModule = typeof import("../../../extensions/whatsapp/light-runtime-api.js"); -type WhatsAppHeavyModule = typeof import("../../../extensions/whatsapp/runtime-api.js"); - type WhatsAppPluginRecord = { origin: string; rootDir?: string; @@ -24,9 +25,9 @@ type WhatsAppPluginRecord = { }; let cachedHeavyModulePath: string | null = null; -let cachedHeavyModule: WhatsAppHeavyModule | null = null; +let cachedHeavyModule: WhatsAppHeavyRuntimeModule | null = null; let cachedLightModulePath: string | null = null; -let cachedLightModule: WhatsAppLightModule | null = null; +let cachedLightModule: WhatsAppLightRuntimeModule | null = null; const jitiLoaders = new Map>(); @@ -55,12 +56,12 @@ function resolveWhatsAppRuntimeModulePath( return modulePath; } -function loadCurrentHeavyModuleSync(): WhatsAppHeavyModule { +function loadCurrentHeavyModuleSync(): WhatsAppHeavyRuntimeModule { const modulePath = resolveWhatsAppRuntimeModulePath(resolveWhatsAppPluginRecord(), "runtime-api"); - return loadPluginBoundaryModuleWithJiti(modulePath, jitiLoaders); + return loadPluginBoundaryModuleWithJiti(modulePath, jitiLoaders); } -function loadWhatsAppLightModule(): WhatsAppLightModule { +function loadWhatsAppLightModule(): WhatsAppLightRuntimeModule { const modulePath = resolveWhatsAppRuntimeModulePath( resolveWhatsAppPluginRecord(), "light-runtime-api", @@ -68,143 +69,149 @@ function loadWhatsAppLightModule(): WhatsAppLightModule { if (cachedLightModule && cachedLightModulePath === modulePath) { return cachedLightModule; } - const loaded = loadPluginBoundaryModuleWithJiti(modulePath, jitiLoaders); + const loaded = loadPluginBoundaryModuleWithJiti( + modulePath, + jitiLoaders, + ); cachedLightModulePath = modulePath; cachedLightModule = loaded; return loaded; } -async function loadWhatsAppHeavyModule(): Promise { +async function loadWhatsAppHeavyModule(): Promise { const record = resolveWhatsAppPluginRecord(); const modulePath = resolveWhatsAppRuntimeModulePath(record, "runtime-api"); if (cachedHeavyModule && cachedHeavyModulePath === modulePath) { return cachedHeavyModule; } - const loaded = loadPluginBoundaryModuleWithJiti(modulePath, jitiLoaders); + const loaded = loadPluginBoundaryModuleWithJiti( + modulePath, + jitiLoaders, + ); cachedHeavyModulePath = modulePath; cachedHeavyModule = loaded; return loaded; } -function getLightExport( +function getLightExport( exportName: K, -): NonNullable { +): NonNullable { const loaded = loadWhatsAppLightModule(); const value = loaded[exportName]; if (value == null) { throw new Error(`WhatsApp plugin runtime is missing export '${String(exportName)}'`); } - return value as NonNullable; + return value as NonNullable; } -async function getHeavyExport( +async function getHeavyExport( exportName: K, -): Promise> { +): Promise> { const loaded = await loadWhatsAppHeavyModule(); const value = loaded[exportName]; if (value == null) { throw new Error(`WhatsApp plugin runtime is missing export '${String(exportName)}'`); } - return value as NonNullable; + return value as NonNullable; } export function getActiveWebListener( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return getLightExport("getActiveWebListener")(...args); } export function getWebAuthAgeMs( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return getLightExport("getWebAuthAgeMs")(...args); } export function logWebSelfId( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return getLightExport("logWebSelfId")(...args); } export function loginWeb( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return loadWhatsAppHeavyModule().then((loaded) => loaded.loginWeb(...args)); } export function logoutWeb( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return getLightExport("logoutWeb")(...args); } export function readWebSelfId( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return getLightExport("readWebSelfId")(...args); } export function webAuthExists( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return getLightExport("webAuthExists")(...args); } export function sendMessageWhatsApp( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return loadWhatsAppHeavyModule().then((loaded) => loaded.sendMessageWhatsApp(...args)); } export function sendPollWhatsApp( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return loadWhatsAppHeavyModule().then((loaded) => loaded.sendPollWhatsApp(...args)); } export function sendReactionWhatsApp( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return loadWhatsAppHeavyModule().then((loaded) => loaded.sendReactionWhatsApp(...args)); } export function createRuntimeWhatsAppLoginTool( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return getLightExport("createWhatsAppLoginTool")(...args); } export function createWaSocket( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return loadWhatsAppHeavyModule().then((loaded) => loaded.createWaSocket(...args)); } export function formatError( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return getLightExport("formatError")(...args); } export function getStatusCode( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return getLightExport("getStatusCode")(...args); } export function pickWebChannel( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return getLightExport("pickWebChannel")(...args); } -export function resolveWaWebAuthDir(): WhatsAppLightModule["WA_WEB_AUTH_DIR"] { +export function resolveWaWebAuthDir(): WhatsAppLightRuntimeModule["WA_WEB_AUTH_DIR"] { return getLightExport("WA_WEB_AUTH_DIR"); } export async function handleWhatsAppAction( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return (await getHeavyExport("handleWhatsAppAction"))(...args); } @@ -221,14 +228,14 @@ export async function loadWebMediaRaw( } export function monitorWebChannel( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return loadWhatsAppHeavyModule().then((loaded) => loaded.monitorWebChannel(...args)); } export async function monitorWebInbox( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return (await getHeavyExport("monitorWebInbox"))(...args); } @@ -239,34 +246,34 @@ export async function optimizeImageToJpeg( } export async function runWebHeartbeatOnce( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return (await getHeavyExport("runWebHeartbeatOnce"))(...args); } export async function startWebLoginWithQr( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return (await getHeavyExport("startWebLoginWithQr"))(...args); } export async function waitForWaConnection( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return (await getHeavyExport("waitForWaConnection"))(...args); } export async function waitForWebLogin( - ...args: Parameters -): ReturnType { + ...args: Parameters +): ReturnType { return (await getHeavyExport("waitForWebLogin"))(...args); } export const extractMediaPlaceholder = ( - ...args: Parameters + ...args: Parameters ) => loadCurrentHeavyModuleSync().extractMediaPlaceholder(...args); -export const extractText = (...args: Parameters) => +export const extractText = (...args: Parameters) => loadCurrentHeavyModuleSync().extractText(...args); export function getDefaultLocalRoots( diff --git a/src/plugins/runtime/runtime-whatsapp-surface.ts b/src/plugins/runtime/runtime-whatsapp-surface.ts new file mode 100644 index 00000000000..bfbf47dad74 --- /dev/null +++ b/src/plugins/runtime/runtime-whatsapp-surface.ts @@ -0,0 +1,249 @@ +import type { AgentToolResult } from "@mariozechner/pi-agent-core"; +import type { AnyMessageContent, makeWASocket } from "@whiskeysockets/baileys"; +import type { NormalizedLocation } from "../../channels/location.js"; +import type { ChannelAgentTool } from "../../channels/plugins/types.core.js"; +import type { OpenClawConfig } from "../../config/config.js"; +import type { PollInput } from "../../polls.js"; +import type { RuntimeEnv } from "../../runtime.js"; +import type { WebChannel } from "../../utils.js"; + +export type ActiveWebSendOptions = { + gifPlayback?: boolean; + accountId?: string; + fileName?: string; +}; + +export type ActiveWebListener = { + sendMessage: ( + to: string, + text: string, + mediaBuffer?: Buffer, + mediaType?: string, + options?: ActiveWebSendOptions, + ) => Promise<{ messageId: string }>; + sendPoll: (to: string, poll: PollInput) => Promise<{ messageId: string }>; + sendReaction: ( + chatJid: string, + messageId: string, + emoji: string, + fromMe: boolean, + participant?: string, + ) => Promise; + sendComposingTo: (to: string) => Promise; + close?: () => Promise; +}; + +export type WebListenerCloseReason = { + status?: number; + isLoggedOut: boolean; + error?: unknown; +}; + +export type WebInboundMessage = { + id?: string; + from: string; + conversationId: string; + to: string; + accountId: string; + body: string; + pushName?: string; + timestamp?: number; + chatType: "direct" | "group"; + chatId: string; + sender?: unknown; + senderJid?: string; + senderE164?: string; + senderName?: string; + replyTo?: unknown; + replyToId?: string; + replyToBody?: string; + replyToSender?: string; + replyToSenderJid?: string; + replyToSenderE164?: string; + groupSubject?: string; + groupParticipants?: string[]; + mentions?: string[]; + mentionedJids?: string[]; + self?: unknown; + selfJid?: string | null; + selfLid?: string | null; + selfE164?: string | null; + fromMe?: boolean; + location?: NormalizedLocation; + sendComposing: () => Promise; + reply: (text: string) => Promise; + sendMedia: (payload: AnyMessageContent) => Promise; + mediaPath?: string; + mediaType?: string; + mediaFileName?: string; + mediaUrl?: string; + wasMentioned?: boolean; +}; + +export type WebChannelHealthState = + | "starting" + | "healthy" + | "stale" + | "reconnecting" + | "conflict" + | "logged-out" + | "stopped"; + +export type WebChannelStatus = { + running: boolean; + connected: boolean; + reconnectAttempts: number; + lastConnectedAt?: number | null; + lastDisconnect?: { + at: number; + status?: number; + error?: string; + loggedOut?: boolean; + } | null; + lastInboundAt?: number | null; + lastMessageAt?: number | null; + lastEventAt?: number | null; + lastError?: string | null; + healthState?: WebChannelHealthState; +}; + +export type WebMonitorTuning = { + reconnect?: Partial<{ + enabled: boolean; + maxAttempts: number; + baseDelayMs: number; + maxDelayMs: number; + }>; + heartbeatSeconds?: number; + messageTimeoutMs?: number; + watchdogCheckMs?: number; + sleep?: (ms: number, signal?: AbortSignal) => Promise; + statusSink?: (status: WebChannelStatus) => void; + accountId?: string; + debounceMs?: number; +}; + +export type MonitorWebInboxFactory = (options: { + verbose: boolean; + accountId: string; + authDir: string; + onMessage: (msg: WebInboundMessage) => Promise; + mediaMaxMb?: number; + sendReadReceipts?: boolean; + debounceMs?: number; + shouldDebounce?: (msg: WebInboundMessage) => boolean; +}) => Promise<{ + closeReason: Promise; + stop: () => Promise; +}>; + +export type ReplyResolver = (...args: unknown[]) => Promise; + +export type WhatsAppWaSocket = ReturnType; + +export type WhatsAppLightRuntimeModule = { + getActiveWebListener: (accountId?: string | null) => ActiveWebListener | null; + getWebAuthAgeMs: (authDir?: string) => number | null; + logWebSelfId: (authDir?: string, runtime?: RuntimeEnv, includeChannelPrefix?: boolean) => void; + logoutWeb: (params: { + authDir?: string; + isLegacyAuthDir?: boolean; + runtime?: RuntimeEnv; + }) => Promise; + readWebSelfId: (authDir?: string) => { + e164: string | null; + jid: string | null; + lid: string | null; + }; + webAuthExists: (authDir?: string) => Promise; + createWhatsAppLoginTool: () => ChannelAgentTool; + formatError: (err: unknown) => string; + getStatusCode: (err: unknown) => number | undefined; + pickWebChannel: (pref: WebChannel | "auto", authDir?: string) => Promise; + WA_WEB_AUTH_DIR: string; +}; + +export type WhatsAppHeavyRuntimeModule = { + loginWeb: ( + verbose: boolean, + waitForConnection?: (sock: WhatsAppWaSocket) => Promise, + runtime?: RuntimeEnv, + accountId?: string, + ) => Promise; + sendMessageWhatsApp: ( + to: string, + body: string, + options: { + verbose: boolean; + cfg?: OpenClawConfig; + mediaUrl?: string; + mediaLocalRoots?: readonly string[]; + gifPlayback?: boolean; + accountId?: string; + }, + ) => Promise<{ messageId: string; toJid: string }>; + sendPollWhatsApp: ( + to: string, + poll: PollInput, + options: { verbose: boolean; accountId?: string; cfg?: OpenClawConfig }, + ) => Promise<{ messageId: string; toJid: string }>; + sendReactionWhatsApp: ( + chatJid: string, + messageId: string, + emoji: string, + options: { + verbose: boolean; + fromMe?: boolean; + participant?: string; + accountId?: string; + }, + ) => Promise; + createWaSocket: ( + printQr: boolean, + verbose: boolean, + opts?: { authDir?: string; onQr?: (qr: string) => void }, + ) => Promise; + handleWhatsAppAction: ( + params: Record, + cfg: OpenClawConfig, + ) => Promise>; + monitorWebChannel: ( + verbose: boolean, + listenerFactory?: MonitorWebInboxFactory, + keepAlive?: boolean, + replyResolver?: ReplyResolver, + runtime?: RuntimeEnv, + abortSignal?: AbortSignal, + tuning?: WebMonitorTuning, + ) => Promise; + monitorWebInbox: MonitorWebInboxFactory; + runWebHeartbeatOnce: (opts: { + cfg?: OpenClawConfig; + to: string; + verbose?: boolean; + replyResolver?: ReplyResolver; + sender?: WhatsAppHeavyRuntimeModule["sendMessageWhatsApp"]; + sessionId?: string; + overrideBody?: string; + dryRun?: boolean; + }) => Promise; + startWebLoginWithQr: (opts?: { + verbose?: boolean; + timeoutMs?: number; + force?: boolean; + accountId?: string; + runtime?: RuntimeEnv; + }) => Promise<{ qrDataUrl?: string; message: string }>; + waitForWaConnection: (sock: WhatsAppWaSocket) => Promise; + waitForWebLogin: (opts?: { + timeoutMs?: number; + runtime?: RuntimeEnv; + accountId?: string; + }) => Promise<{ connected: boolean; message: string }>; + extractMediaPlaceholder: ( + message: unknown, + mediaDir: string, + verbose?: boolean, + ) => Promise; + extractText: (message: unknown) => string; +}; diff --git a/src/test-utils/imessage-test-plugin.ts b/src/test-utils/imessage-test-plugin.ts index 62362fe5712..083ffe3ad6f 100644 --- a/src/test-utils/imessage-test-plugin.ts +++ b/src/test-utils/imessage-test-plugin.ts @@ -1,6 +1,6 @@ -import { normalizeIMessageHandle } from "../../extensions/imessage/api.js"; import { imessageOutbound } from "../../test/channel-outbounds.js"; import type { ChannelOutboundAdapter, ChannelPlugin } from "../channels/plugins/types.js"; +import { normalizeIMessageHandle } from "../plugin-sdk/imessage-targets.js"; import { collectStatusIssuesFromLastError } from "../plugin-sdk/status-helpers.js"; export const createIMessageTestPlugin = (params?: { diff --git a/src/tts/tts.ts b/src/tts/tts.ts index f3f32b438db..597425d27d0 100644 --- a/src/tts/tts.ts +++ b/src/tts/tts.ts @@ -1,857 +1,36 @@ -import { randomBytes } from "node:crypto"; -import { - existsSync, - mkdirSync, - readFileSync, - writeFileSync, - mkdtempSync, - renameSync, - unlinkSync, -} from "node:fs"; -import path from "node:path"; -import { resolveSendableOutboundReplyParts } from "openclaw/plugin-sdk/reply-payload"; -import type { ReplyPayload } from "../auto-reply/types.js"; -import { normalizeChannelId } from "../channels/plugins/index.js"; -import type { ChannelId } from "../channels/plugins/types.js"; -import type { OpenClawConfig } from "../config/config.js"; -import type { - TtsConfig, - TtsAutoMode, - TtsMode, - TtsProvider, - TtsModelOverrideConfig, -} from "../config/types.tts.js"; -import { logVerbose } from "../globals.js"; -import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; -import { stripMarkdown } from "../shared/text/strip-markdown.js"; -import { CONFIG_DIR, resolveUserPath } from "../utils.js"; -import { parseTtsDirectives } from "./directives.js"; -import { - canonicalizeSpeechProviderId, - getSpeechProvider, - listSpeechProviders, -} from "./provider-registry.js"; -import type { - SpeechModelOverridePolicy, - SpeechProviderConfig, - SpeechVoiceOption, +import * as speechRuntime from "../plugin-sdk/speech-runtime.js"; + +export const buildTtsSystemPromptHint = speechRuntime.buildTtsSystemPromptHint; +export const getLastTtsAttempt = speechRuntime.getLastTtsAttempt; +export const getResolvedSpeechProviderConfig = speechRuntime.getResolvedSpeechProviderConfig; +export const getTtsMaxLength = speechRuntime.getTtsMaxLength; +export const getTtsProvider = speechRuntime.getTtsProvider; +export const isSummarizationEnabled = speechRuntime.isSummarizationEnabled; +export const isTtsEnabled = speechRuntime.isTtsEnabled; +export const isTtsProviderConfigured = speechRuntime.isTtsProviderConfigured; +export const listSpeechVoices = speechRuntime.listSpeechVoices; +export const maybeApplyTtsToPayload = speechRuntime.maybeApplyTtsToPayload; +export const resolveTtsAutoMode = speechRuntime.resolveTtsAutoMode; +export const resolveTtsConfig = speechRuntime.resolveTtsConfig; +export const resolveTtsPrefsPath = speechRuntime.resolveTtsPrefsPath; +export const resolveTtsProviderOrder = speechRuntime.resolveTtsProviderOrder; +export const setLastTtsAttempt = speechRuntime.setLastTtsAttempt; +export const setSummarizationEnabled = speechRuntime.setSummarizationEnabled; +export const setTtsAutoMode = speechRuntime.setTtsAutoMode; +export const setTtsEnabled = speechRuntime.setTtsEnabled; +export const setTtsMaxLength = speechRuntime.setTtsMaxLength; +export const setTtsProvider = speechRuntime.setTtsProvider; +export const synthesizeSpeech = speechRuntime.synthesizeSpeech; +export const textToSpeech = speechRuntime.textToSpeech; +export const textToSpeechTelephony = speechRuntime.textToSpeechTelephony; +export const _test = speechRuntime._test; + +export type { + ResolvedTtsConfig, + ResolvedTtsModelOverrides, TtsDirectiveOverrides, TtsDirectiveParseResult, -} from "./provider-types.js"; -import { normalizeTtsAutoMode } from "./tts-auto-mode.js"; -import { scheduleCleanup, summarizeText } from "./tts-core.js"; - -export type { TtsDirectiveOverrides, TtsDirectiveParseResult } from "./provider-types.js"; - -const DEFAULT_TIMEOUT_MS = 30_000; -const DEFAULT_TTS_MAX_LENGTH = 1500; -const DEFAULT_TTS_SUMMARIZE = true; -const DEFAULT_MAX_TEXT_LENGTH = 4096; - -export type ResolvedTtsConfig = { - auto: TtsAutoMode; - mode: TtsMode; - provider: TtsProvider; - providerSource: "config" | "default"; - summaryModel?: string; - modelOverrides: ResolvedTtsModelOverrides; - providerConfigs: Record; - prefsPath?: string; - maxTextLength: number; - timeoutMs: number; -}; - -type TtsUserPrefs = { - tts?: { - auto?: TtsAutoMode; - enabled?: boolean; - provider?: TtsProvider; - maxLength?: number; - summarize?: boolean; - }; -}; - -export type ResolvedTtsModelOverrides = SpeechModelOverridePolicy; - -export type TtsResult = { - success: boolean; - audioPath?: string; - error?: string; - latencyMs?: number; - provider?: string; - outputFormat?: string; - voiceCompatible?: boolean; -}; - -export type TtsSynthesisResult = { - success: boolean; - audioBuffer?: Buffer; - error?: string; - latencyMs?: number; - provider?: string; - outputFormat?: string; - voiceCompatible?: boolean; - fileExtension?: string; -}; - -export type TtsTelephonyResult = { - success: boolean; - audioBuffer?: Buffer; - error?: string; - latencyMs?: number; - provider?: string; - outputFormat?: string; - sampleRate?: number; -}; - -type TtsStatusEntry = { - timestamp: number; - success: boolean; - textLength: number; - summarized: boolean; - provider?: string; - latencyMs?: number; - error?: string; -}; - -let lastTtsAttempt: TtsStatusEntry | undefined; - -function resolveModelOverridePolicy( - overrides: TtsModelOverrideConfig | undefined, -): ResolvedTtsModelOverrides { - const enabled = overrides?.enabled ?? true; - if (!enabled) { - return { - enabled: false, - allowText: false, - allowProvider: false, - allowVoice: false, - allowModelId: false, - allowVoiceSettings: false, - allowNormalization: false, - allowSeed: false, - }; - } - const allow = (value: boolean | undefined, defaultValue = true) => value ?? defaultValue; - return { - enabled: true, - allowText: allow(overrides?.allowText), - // Provider switching is higher-impact than voice/style tweaks; keep opt-in. - allowProvider: allow(overrides?.allowProvider, false), - allowVoice: allow(overrides?.allowVoice), - allowModelId: allow(overrides?.allowModelId), - allowVoiceSettings: allow(overrides?.allowVoiceSettings), - allowNormalization: allow(overrides?.allowNormalization), - allowSeed: allow(overrides?.allowSeed), - }; -} - -function sortSpeechProvidersForAutoSelection(cfg?: OpenClawConfig) { - return listSpeechProviders(cfg).toSorted((left, right) => { - const leftOrder = left.autoSelectOrder ?? Number.MAX_SAFE_INTEGER; - const rightOrder = right.autoSelectOrder ?? Number.MAX_SAFE_INTEGER; - if (leftOrder !== rightOrder) { - return leftOrder - rightOrder; - } - return left.id.localeCompare(right.id); - }); -} - -function resolveRegistryDefaultSpeechProviderId(cfg?: OpenClawConfig): TtsProvider { - return sortSpeechProvidersForAutoSelection(cfg)[0]?.id ?? ""; -} - -function asProviderConfig(value: unknown): SpeechProviderConfig { - return typeof value === "object" && value !== null && !Array.isArray(value) - ? (value as SpeechProviderConfig) - : {}; -} - -function asProviderConfigMap(value: unknown): Record { - return typeof value === "object" && value !== null && !Array.isArray(value) - ? (value as Record) - : {}; -} - -function resolveSpeechProviderConfigs( - raw: TtsConfig, - cfg: OpenClawConfig, - timeoutMs: number, -): Record { - const providerConfigs: Record = {}; - const rawProviders = asProviderConfigMap(raw.providers); - for (const provider of listSpeechProviders(cfg)) { - providerConfigs[provider.id] = - provider.resolveConfig?.({ - cfg, - rawConfig: { - ...(raw as Record), - providers: rawProviders, - }, - timeoutMs, - }) ?? - asProviderConfig(rawProviders[provider.id] ?? (raw as Record)[provider.id]); - } - return providerConfigs; -} - -export function getResolvedSpeechProviderConfig( - config: ResolvedTtsConfig, - providerId: string, - cfg?: OpenClawConfig, -): SpeechProviderConfig { - const canonical = - canonicalizeSpeechProviderId(providerId, cfg) ?? providerId.trim().toLowerCase(); - return config.providerConfigs[canonical] ?? {}; -} - -export function resolveTtsConfig(cfg: OpenClawConfig): ResolvedTtsConfig { - const raw: TtsConfig = cfg.messages?.tts ?? {}; - const providerSource = raw.provider ? "config" : "default"; - const timeoutMs = raw.timeoutMs ?? DEFAULT_TIMEOUT_MS; - const auto = normalizeTtsAutoMode(raw.auto) ?? (raw.enabled ? "always" : "off"); - return { - auto, - mode: raw.mode ?? "final", - provider: - canonicalizeSpeechProviderId(raw.provider, cfg) ?? - resolveRegistryDefaultSpeechProviderId(cfg), - providerSource, - summaryModel: raw.summaryModel?.trim() || undefined, - modelOverrides: resolveModelOverridePolicy(raw.modelOverrides), - providerConfigs: resolveSpeechProviderConfigs(raw, cfg, timeoutMs), - prefsPath: raw.prefsPath, - maxTextLength: raw.maxTextLength ?? DEFAULT_MAX_TEXT_LENGTH, - timeoutMs, - }; -} - -export function resolveTtsPrefsPath(config: ResolvedTtsConfig): string { - if (config.prefsPath?.trim()) { - return resolveUserPath(config.prefsPath.trim()); - } - const envPath = process.env.OPENCLAW_TTS_PREFS?.trim(); - if (envPath) { - return resolveUserPath(envPath); - } - return path.join(CONFIG_DIR, "settings", "tts.json"); -} - -function resolveTtsAutoModeFromPrefs(prefs: TtsUserPrefs): TtsAutoMode | undefined { - const auto = normalizeTtsAutoMode(prefs.tts?.auto); - if (auto) { - return auto; - } - if (typeof prefs.tts?.enabled === "boolean") { - return prefs.tts.enabled ? "always" : "off"; - } - return undefined; -} - -export function resolveTtsAutoMode(params: { - config: ResolvedTtsConfig; - prefsPath: string; - sessionAuto?: string; -}): TtsAutoMode { - const sessionAuto = normalizeTtsAutoMode(params.sessionAuto); - if (sessionAuto) { - return sessionAuto; - } - const prefsAuto = resolveTtsAutoModeFromPrefs(readPrefs(params.prefsPath)); - if (prefsAuto) { - return prefsAuto; - } - return params.config.auto; -} - -export function buildTtsSystemPromptHint(cfg: OpenClawConfig): string | undefined { - const config = resolveTtsConfig(cfg); - const prefsPath = resolveTtsPrefsPath(config); - const autoMode = resolveTtsAutoMode({ config, prefsPath }); - if (autoMode === "off") { - return undefined; - } - const maxLength = getTtsMaxLength(prefsPath); - const summarize = isSummarizationEnabled(prefsPath) ? "on" : "off"; - const autoHint = - autoMode === "inbound" - ? "Only use TTS when the user's last message includes audio/voice." - : autoMode === "tagged" - ? "Only use TTS when you include [[tts]] or [[tts:text]] tags." - : undefined; - return [ - "Voice (TTS) is enabled.", - autoHint, - `Keep spoken text ≤${maxLength} chars to avoid auto-summary (summary ${summarize}).`, - "Use [[tts:...]] and optional [[tts:text]]...[[/tts:text]] to control voice/expressiveness.", - ] - .filter(Boolean) - .join("\n"); -} - -function readPrefs(prefsPath: string): TtsUserPrefs { - try { - if (!existsSync(prefsPath)) { - return {}; - } - return JSON.parse(readFileSync(prefsPath, "utf8")) as TtsUserPrefs; - } catch { - return {}; - } -} - -function atomicWriteFileSync(filePath: string, content: string): void { - const tmpPath = `${filePath}.tmp.${Date.now()}.${randomBytes(8).toString("hex")}`; - writeFileSync(tmpPath, content, { mode: 0o600 }); - try { - renameSync(tmpPath, filePath); - } catch (err) { - try { - unlinkSync(tmpPath); - } catch { - // ignore - } - throw err; - } -} - -function updatePrefs(prefsPath: string, update: (prefs: TtsUserPrefs) => void): void { - const prefs = readPrefs(prefsPath); - update(prefs); - mkdirSync(path.dirname(prefsPath), { recursive: true }); - atomicWriteFileSync(prefsPath, JSON.stringify(prefs, null, 2)); -} - -export function isTtsEnabled( - config: ResolvedTtsConfig, - prefsPath: string, - sessionAuto?: string, -): boolean { - return resolveTtsAutoMode({ config, prefsPath, sessionAuto }) !== "off"; -} - -export function setTtsAutoMode(prefsPath: string, mode: TtsAutoMode): void { - updatePrefs(prefsPath, (prefs) => { - const next = { ...prefs.tts }; - delete next.enabled; - next.auto = mode; - prefs.tts = next; - }); -} - -export function setTtsEnabled(prefsPath: string, enabled: boolean): void { - setTtsAutoMode(prefsPath, enabled ? "always" : "off"); -} - -export function getTtsProvider(config: ResolvedTtsConfig, prefsPath: string): TtsProvider { - const prefs = readPrefs(prefsPath); - const prefsProvider = canonicalizeSpeechProviderId(prefs.tts?.provider); - if (prefsProvider) { - return prefsProvider; - } - if (config.providerSource === "config") { - return canonicalizeSpeechProviderId(config.provider) ?? config.provider; - } - - for (const provider of sortSpeechProvidersForAutoSelection()) { - if ( - provider.isConfigured({ - providerConfig: config.providerConfigs[provider.id] ?? {}, - timeoutMs: config.timeoutMs, - }) - ) { - return provider.id; - } - } - return config.provider; -} - -export function setTtsProvider(prefsPath: string, provider: TtsProvider): void { - updatePrefs(prefsPath, (prefs) => { - prefs.tts = { ...prefs.tts, provider: canonicalizeSpeechProviderId(provider) ?? provider }; - }); -} - -export function getTtsMaxLength(prefsPath: string): number { - const prefs = readPrefs(prefsPath); - return prefs.tts?.maxLength ?? DEFAULT_TTS_MAX_LENGTH; -} - -export function setTtsMaxLength(prefsPath: string, maxLength: number): void { - updatePrefs(prefsPath, (prefs) => { - prefs.tts = { ...prefs.tts, maxLength }; - }); -} - -export function isSummarizationEnabled(prefsPath: string): boolean { - const prefs = readPrefs(prefsPath); - return prefs.tts?.summarize ?? DEFAULT_TTS_SUMMARIZE; -} - -export function setSummarizationEnabled(prefsPath: string, enabled: boolean): void { - updatePrefs(prefsPath, (prefs) => { - prefs.tts = { ...prefs.tts, summarize: enabled }; - }); -} - -export function getLastTtsAttempt(): TtsStatusEntry | undefined { - return lastTtsAttempt; -} - -export function setLastTtsAttempt(entry: TtsStatusEntry | undefined): void { - lastTtsAttempt = entry; -} - -/** Channels that require voice-note-compatible audio */ -const OPUS_CHANNELS = new Set(["telegram", "feishu", "whatsapp", "matrix"]); - -function resolveChannelId(channel: string | undefined): ChannelId | null { - return channel ? normalizeChannelId(channel) : null; -} - -export function resolveTtsProviderOrder(primary: TtsProvider, cfg?: OpenClawConfig): TtsProvider[] { - const normalizedPrimary = canonicalizeSpeechProviderId(primary, cfg) ?? primary; - const ordered = new Set([normalizedPrimary]); - for (const provider of sortSpeechProvidersForAutoSelection(cfg)) { - const normalized = provider.id; - if (normalized !== normalizedPrimary) { - ordered.add(normalized); - } - } - return [...ordered]; -} - -export function isTtsProviderConfigured( - config: ResolvedTtsConfig, - provider: TtsProvider, - cfg?: OpenClawConfig, -): boolean { - const resolvedProvider = getSpeechProvider(provider, cfg); - if (!resolvedProvider) { - return false; - } - return ( - resolvedProvider.isConfigured({ - cfg, - providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, cfg), - timeoutMs: config.timeoutMs, - }) ?? false - ); -} - -function formatTtsProviderError(provider: TtsProvider, err: unknown): string { - const error = err instanceof Error ? err : new Error(String(err)); - if (error.name === "AbortError") { - return `${provider}: request timed out`; - } - return `${provider}: ${error.message}`; -} - -function buildTtsFailureResult(errors: string[]): { success: false; error: string } { - return { - success: false, - error: `TTS conversion failed: ${errors.join("; ") || "no providers available"}`, - }; -} - -function resolveReadySpeechProvider(params: { - provider: TtsProvider; - cfg: OpenClawConfig; - config: ResolvedTtsConfig; - errors: string[]; - requireTelephony?: boolean; -}): NonNullable> | null { - const resolvedProvider = getSpeechProvider(params.provider, params.cfg); - if (!resolvedProvider) { - params.errors.push(`${params.provider}: no provider registered`); - return null; - } - const providerConfig = getResolvedSpeechProviderConfig( - params.config, - resolvedProvider.id, - params.cfg, - ); - if ( - !resolvedProvider.isConfigured({ - cfg: params.cfg, - providerConfig, - timeoutMs: params.config.timeoutMs, - }) - ) { - params.errors.push(`${params.provider}: not configured`); - return null; - } - if (params.requireTelephony && !resolvedProvider.synthesizeTelephony) { - params.errors.push(`${params.provider}: unsupported for telephony`); - return null; - } - return resolvedProvider; -} - -function resolveTtsRequestSetup(params: { - text: string; - cfg: OpenClawConfig; - prefsPath?: string; - providerOverride?: TtsProvider; - disableFallback?: boolean; -}): - | { - config: ResolvedTtsConfig; - providers: TtsProvider[]; - } - | { - error: string; - } { - const config = resolveTtsConfig(params.cfg); - const prefsPath = params.prefsPath ?? resolveTtsPrefsPath(config); - if (params.text.length > config.maxTextLength) { - return { - error: `Text too long (${params.text.length} chars, max ${config.maxTextLength})`, - }; - } - - const userProvider = getTtsProvider(config, prefsPath); - const provider = - canonicalizeSpeechProviderId(params.providerOverride, params.cfg) ?? userProvider; - return { - config, - providers: params.disableFallback ? [provider] : resolveTtsProviderOrder(provider, params.cfg), - }; -} - -export async function textToSpeech(params: { - text: string; - cfg: OpenClawConfig; - prefsPath?: string; - channel?: string; - overrides?: TtsDirectiveOverrides; - disableFallback?: boolean; -}): Promise { - const synthesis = await synthesizeSpeech(params); - if (!synthesis.success || !synthesis.audioBuffer || !synthesis.fileExtension) { - return buildTtsFailureResult([synthesis.error ?? "TTS conversion failed"]); - } - - const tempRoot = resolvePreferredOpenClawTmpDir(); - mkdirSync(tempRoot, { recursive: true, mode: 0o700 }); - const tempDir = mkdtempSync(path.join(tempRoot, "tts-")); - const audioPath = path.join(tempDir, `voice-${Date.now()}${synthesis.fileExtension}`); - writeFileSync(audioPath, synthesis.audioBuffer); - scheduleCleanup(tempDir); - - return { - success: true, - audioPath, - latencyMs: synthesis.latencyMs, - provider: synthesis.provider, - outputFormat: synthesis.outputFormat, - voiceCompatible: synthesis.voiceCompatible, - }; -} - -export async function synthesizeSpeech(params: { - text: string; - cfg: OpenClawConfig; - prefsPath?: string; - channel?: string; - overrides?: TtsDirectiveOverrides; - disableFallback?: boolean; -}): Promise { - const setup = resolveTtsRequestSetup({ - text: params.text, - cfg: params.cfg, - prefsPath: params.prefsPath, - providerOverride: params.overrides?.provider, - disableFallback: params.disableFallback, - }); - if ("error" in setup) { - return { success: false, error: setup.error }; - } - - const { config, providers } = setup; - const channelId = resolveChannelId(params.channel); - const target = channelId && OPUS_CHANNELS.has(channelId) ? "voice-note" : "audio-file"; - - const errors: string[] = []; - - for (const provider of providers) { - const providerStart = Date.now(); - try { - const resolvedProvider = resolveReadySpeechProvider({ - provider, - cfg: params.cfg, - config, - errors, - }); - if (!resolvedProvider) { - continue; - } - const synthesis = await resolvedProvider.synthesize({ - text: params.text, - cfg: params.cfg, - providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg), - target, - providerOverrides: params.overrides?.providerOverrides?.[resolvedProvider.id], - timeoutMs: config.timeoutMs, - }); - return { - success: true, - audioBuffer: synthesis.audioBuffer, - latencyMs: Date.now() - providerStart, - provider, - outputFormat: synthesis.outputFormat, - voiceCompatible: synthesis.voiceCompatible, - fileExtension: synthesis.fileExtension, - }; - } catch (err) { - errors.push(formatTtsProviderError(provider, err)); - } - } - - return buildTtsFailureResult(errors); -} - -export async function textToSpeechTelephony(params: { - text: string; - cfg: OpenClawConfig; - prefsPath?: string; -}): Promise { - const setup = resolveTtsRequestSetup({ - text: params.text, - cfg: params.cfg, - prefsPath: params.prefsPath, - }); - if ("error" in setup) { - return { success: false, error: setup.error }; - } - - const { config, providers } = setup; - - const errors: string[] = []; - - for (const provider of providers) { - const providerStart = Date.now(); - try { - const resolvedProvider = resolveReadySpeechProvider({ - provider, - cfg: params.cfg, - config, - errors, - requireTelephony: true, - }); - if (!resolvedProvider?.synthesizeTelephony) { - continue; - } - const synthesis = await resolvedProvider.synthesizeTelephony({ - text: params.text, - cfg: params.cfg, - providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg), - timeoutMs: config.timeoutMs, - }); - - return { - success: true, - audioBuffer: synthesis.audioBuffer, - latencyMs: Date.now() - providerStart, - provider, - outputFormat: synthesis.outputFormat, - sampleRate: synthesis.sampleRate, - }; - } catch (err) { - errors.push(formatTtsProviderError(provider, err)); - } - } - - return buildTtsFailureResult(errors); -} - -export async function listSpeechVoices(params: { - provider: string; - cfg?: OpenClawConfig; - config?: ResolvedTtsConfig; - apiKey?: string; - baseUrl?: string; -}): Promise { - const provider = canonicalizeSpeechProviderId(params.provider, params.cfg); - if (!provider) { - throw new Error("speech provider id is required"); - } - const config = params.config ?? (params.cfg ? resolveTtsConfig(params.cfg) : undefined); - if (!config) { - throw new Error(`speech provider ${provider} requires cfg or resolved config`); - } - const resolvedProvider = getSpeechProvider(provider, params.cfg); - if (!resolvedProvider) { - throw new Error(`speech provider ${provider} is not registered`); - } - if (!resolvedProvider.listVoices) { - throw new Error(`speech provider ${provider} does not support voice listing`); - } - return await resolvedProvider.listVoices({ - cfg: params.cfg, - providerConfig: getResolvedSpeechProviderConfig(config, resolvedProvider.id, params.cfg), - apiKey: params.apiKey, - baseUrl: params.baseUrl, - }); -} - -export async function maybeApplyTtsToPayload(params: { - payload: ReplyPayload; - cfg: OpenClawConfig; - channel?: string; - kind?: "tool" | "block" | "final"; - inboundAudio?: boolean; - ttsAuto?: string; -}): Promise { - // Compaction notices are informational UI signals — never synthesise them as speech. - if (params.payload.isCompactionNotice) { - return params.payload; - } - const config = resolveTtsConfig(params.cfg); - const prefsPath = resolveTtsPrefsPath(config); - const autoMode = resolveTtsAutoMode({ - config, - prefsPath, - sessionAuto: params.ttsAuto, - }); - if (autoMode === "off") { - return params.payload; - } - - const reply = resolveSendableOutboundReplyParts(params.payload); - const text = reply.text; - const directives = parseTtsDirectives(text, config.modelOverrides, { - cfg: params.cfg, - providerConfigs: config.providerConfigs, - }); - if (directives.warnings.length > 0) { - logVerbose(`TTS: ignored directive overrides (${directives.warnings.join("; ")})`); - } - - const cleanedText = directives.cleanedText; - const trimmedCleaned = cleanedText.trim(); - const visibleText = trimmedCleaned.length > 0 ? trimmedCleaned : ""; - const ttsText = directives.ttsText?.trim() || visibleText; - - const nextPayload = - visibleText === text.trim() - ? params.payload - : { - ...params.payload, - text: visibleText.length > 0 ? visibleText : undefined, - }; - - if (autoMode === "tagged" && !directives.hasDirective) { - return nextPayload; - } - if (autoMode === "inbound" && params.inboundAudio !== true) { - return nextPayload; - } - - const mode = config.mode ?? "final"; - if (mode === "final" && params.kind && params.kind !== "final") { - return nextPayload; - } - - if (!ttsText.trim()) { - return nextPayload; - } - if (reply.hasMedia) { - return nextPayload; - } - if (text.includes("MEDIA:")) { - return nextPayload; - } - if (ttsText.trim().length < 10) { - return nextPayload; - } - - const maxLength = getTtsMaxLength(prefsPath); - let textForAudio = ttsText.trim(); - let wasSummarized = false; - - if (textForAudio.length > maxLength) { - if (!isSummarizationEnabled(prefsPath)) { - logVerbose( - `TTS: truncating long text (${textForAudio.length} > ${maxLength}), summarization disabled.`, - ); - textForAudio = `${textForAudio.slice(0, maxLength - 3)}...`; - } else { - try { - const summary = await summarizeText({ - text: textForAudio, - targetLength: maxLength, - cfg: params.cfg, - config, - timeoutMs: config.timeoutMs, - }); - textForAudio = summary.summary; - wasSummarized = true; - if (textForAudio.length > config.maxTextLength) { - logVerbose( - `TTS: summary exceeded hard limit (${textForAudio.length} > ${config.maxTextLength}); truncating.`, - ); - textForAudio = `${textForAudio.slice(0, config.maxTextLength - 3)}...`; - } - } catch (err) { - const error = err as Error; - logVerbose(`TTS: summarization failed, truncating instead: ${error.message}`); - textForAudio = `${textForAudio.slice(0, maxLength - 3)}...`; - } - } - } - - textForAudio = stripMarkdown(textForAudio).trim(); // strip markdown for TTS (### → "hashtag" etc.) - if (textForAudio.length < 10) { - return nextPayload; - } - - const ttsStart = Date.now(); - const result = await textToSpeech({ - text: textForAudio, - cfg: params.cfg, - prefsPath, - channel: params.channel, - overrides: directives.overrides, - }); - - if (result.success && result.audioPath) { - lastTtsAttempt = { - timestamp: Date.now(), - success: true, - textLength: text.length, - summarized: wasSummarized, - provider: result.provider, - latencyMs: result.latencyMs, - }; - - const channelId = resolveChannelId(params.channel); - const shouldVoice = - channelId !== null && OPUS_CHANNELS.has(channelId) && result.voiceCompatible === true; - const finalPayload = { - ...nextPayload, - mediaUrl: result.audioPath, - audioAsVoice: shouldVoice || params.payload.audioAsVoice, - }; - return finalPayload; - } - - lastTtsAttempt = { - timestamp: Date.now(), - success: false, - textLength: text.length, - summarized: wasSummarized, - error: result.error, - }; - - const latency = Date.now() - ttsStart; - logVerbose(`TTS: conversion failed after ${latency}ms (${result.error ?? "unknown"}).`); - return nextPayload; -} - -export const _test = { - parseTtsDirectives, - resolveModelOverridePolicy, - summarizeText, - getResolvedSpeechProviderConfig, -}; + TtsResult, + TtsSynthesisResult, + TtsTelephonyResult, +} from "../plugin-sdk/speech-runtime.js";