Files
moltbot/src/agents/tools/nodes-tool.ts
2026-02-26 03:41:31 +01:00

580 lines
23 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import crypto from "node:crypto";
import type { AgentToolResult } from "@mariozechner/pi-agent-core";
import { Type } from "@sinclair/typebox";
import {
type CameraFacing,
cameraTempPath,
parseCameraClipPayload,
parseCameraSnapPayload,
writeCameraClipPayloadToFile,
writeBase64ToFile,
writeUrlToFile,
} from "../../cli/nodes-camera.js";
import { parseEnvPairs, parseTimeoutMs } from "../../cli/nodes-run.js";
import {
parseScreenRecordPayload,
screenRecordTempPath,
writeScreenRecordToFile,
} from "../../cli/nodes-screen.js";
import { parseDurationMs } from "../../cli/parse-duration.js";
import type { OpenClawConfig } from "../../config/config.js";
import { formatExecCommand } from "../../infra/system-run-command.js";
import { imageMimeFromFormat } from "../../media/mime.js";
import { resolveSessionAgentId } from "../agent-scope.js";
import { resolveImageSanitizationLimits } from "../image-sanitization.js";
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
import { sanitizeToolResultImages } from "../tool-images.js";
import { type AnyAgentTool, jsonResult, readStringParam } from "./common.js";
import { callGatewayTool, readGatewayCallOptions } from "./gateway.js";
import { listNodes, resolveNodeIdFromList, resolveNodeId } from "./nodes-utils.js";
const NODES_TOOL_ACTIONS = [
"status",
"describe",
"pending",
"approve",
"reject",
"notify",
"camera_snap",
"camera_list",
"camera_clip",
"screen_record",
"location_get",
"run",
"invoke",
] as const;
const NOTIFY_PRIORITIES = ["passive", "active", "timeSensitive"] as const;
const NOTIFY_DELIVERIES = ["system", "overlay", "auto"] as const;
const CAMERA_FACING = ["front", "back", "both"] as const;
const LOCATION_ACCURACY = ["coarse", "balanced", "precise"] as const;
function isPairingRequiredMessage(message: string): boolean {
const lower = message.toLowerCase();
return lower.includes("pairing required") || lower.includes("not_paired");
}
function extractPairingRequestId(message: string): string | null {
const match = message.match(/\(requestId:\s*([^)]+)\)/i);
if (!match) {
return null;
}
const value = (match[1] ?? "").trim();
return value.length > 0 ? value : null;
}
// Flattened schema: runtime validates per-action requirements.
const NodesToolSchema = Type.Object({
action: stringEnum(NODES_TOOL_ACTIONS),
gatewayUrl: Type.Optional(Type.String()),
gatewayToken: Type.Optional(Type.String()),
timeoutMs: Type.Optional(Type.Number()),
node: Type.Optional(Type.String()),
requestId: Type.Optional(Type.String()),
// notify
title: Type.Optional(Type.String()),
body: Type.Optional(Type.String()),
sound: Type.Optional(Type.String()),
priority: optionalStringEnum(NOTIFY_PRIORITIES),
delivery: optionalStringEnum(NOTIFY_DELIVERIES),
// camera_snap / camera_clip
facing: optionalStringEnum(CAMERA_FACING, {
description: "camera_snap: front/back/both; camera_clip: front/back only.",
}),
maxWidth: Type.Optional(Type.Number()),
quality: Type.Optional(Type.Number()),
delayMs: Type.Optional(Type.Number()),
deviceId: Type.Optional(Type.String()),
duration: Type.Optional(Type.String()),
durationMs: Type.Optional(Type.Number()),
includeAudio: Type.Optional(Type.Boolean()),
// screen_record
fps: Type.Optional(Type.Number()),
screenIndex: Type.Optional(Type.Number()),
outPath: Type.Optional(Type.String()),
// location_get
maxAgeMs: Type.Optional(Type.Number()),
locationTimeoutMs: Type.Optional(Type.Number()),
desiredAccuracy: optionalStringEnum(LOCATION_ACCURACY),
// run
command: Type.Optional(Type.Array(Type.String())),
cwd: Type.Optional(Type.String()),
env: Type.Optional(Type.Array(Type.String())),
commandTimeoutMs: Type.Optional(Type.Number()),
invokeTimeoutMs: Type.Optional(Type.Number()),
needsScreenRecording: Type.Optional(Type.Boolean()),
// invoke
invokeCommand: Type.Optional(Type.String()),
invokeParamsJson: Type.Optional(Type.String()),
});
export function createNodesTool(options?: {
agentSessionKey?: string;
config?: OpenClawConfig;
}): AnyAgentTool {
const sessionKey = options?.agentSessionKey?.trim() || undefined;
const agentId = resolveSessionAgentId({
sessionKey: options?.agentSessionKey,
config: options?.config,
});
const imageSanitization = resolveImageSanitizationLimits(options?.config);
return {
label: "Nodes",
name: "nodes",
description:
"Discover and control paired nodes (status/describe/pairing/notify/camera/screen/location/run/invoke).",
parameters: NodesToolSchema,
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
const action = readStringParam(params, "action", { required: true });
const gatewayOpts = readGatewayCallOptions(params);
try {
switch (action) {
case "status":
return jsonResult(await callGatewayTool("node.list", gatewayOpts, {}));
case "describe": {
const node = readStringParam(params, "node", { required: true });
const nodeId = await resolveNodeId(gatewayOpts, node);
return jsonResult(await callGatewayTool("node.describe", gatewayOpts, { nodeId }));
}
case "pending":
return jsonResult(await callGatewayTool("node.pair.list", gatewayOpts, {}));
case "approve": {
const requestId = readStringParam(params, "requestId", {
required: true,
});
return jsonResult(
await callGatewayTool("node.pair.approve", gatewayOpts, {
requestId,
}),
);
}
case "reject": {
const requestId = readStringParam(params, "requestId", {
required: true,
});
return jsonResult(
await callGatewayTool("node.pair.reject", gatewayOpts, {
requestId,
}),
);
}
case "notify": {
const node = readStringParam(params, "node", { required: true });
const title = typeof params.title === "string" ? params.title : "";
const body = typeof params.body === "string" ? params.body : "";
if (!title.trim() && !body.trim()) {
throw new Error("title or body required");
}
const nodeId = await resolveNodeId(gatewayOpts, node);
await callGatewayTool("node.invoke", gatewayOpts, {
nodeId,
command: "system.notify",
params: {
title: title.trim() || undefined,
body: body.trim() || undefined,
sound: typeof params.sound === "string" ? params.sound : undefined,
priority: typeof params.priority === "string" ? params.priority : undefined,
delivery: typeof params.delivery === "string" ? params.delivery : undefined,
},
idempotencyKey: crypto.randomUUID(),
});
return jsonResult({ ok: true });
}
case "camera_snap": {
const node = readStringParam(params, "node", { required: true });
const nodeId = await resolveNodeId(gatewayOpts, node);
const facingRaw =
typeof params.facing === "string" ? params.facing.toLowerCase() : "both";
const facings: CameraFacing[] =
facingRaw === "both"
? ["front", "back"]
: facingRaw === "front" || facingRaw === "back"
? [facingRaw]
: (() => {
throw new Error("invalid facing (front|back|both)");
})();
const maxWidth =
typeof params.maxWidth === "number" && Number.isFinite(params.maxWidth)
? params.maxWidth
: undefined;
const quality =
typeof params.quality === "number" && Number.isFinite(params.quality)
? params.quality
: undefined;
const delayMs =
typeof params.delayMs === "number" && Number.isFinite(params.delayMs)
? params.delayMs
: undefined;
const deviceId =
typeof params.deviceId === "string" && params.deviceId.trim()
? params.deviceId.trim()
: undefined;
const content: AgentToolResult<unknown>["content"] = [];
const details: Array<Record<string, unknown>> = [];
for (const facing of facings) {
const raw = await callGatewayTool<{ payload: unknown }>("node.invoke", gatewayOpts, {
nodeId,
command: "camera.snap",
params: {
facing,
maxWidth,
quality,
format: "jpg",
delayMs,
deviceId,
},
idempotencyKey: crypto.randomUUID(),
});
const payload = parseCameraSnapPayload(raw?.payload);
const normalizedFormat = payload.format.toLowerCase();
if (
normalizedFormat !== "jpg" &&
normalizedFormat !== "jpeg" &&
normalizedFormat !== "png"
) {
throw new Error(`unsupported camera.snap format: ${payload.format}`);
}
const isJpeg = normalizedFormat === "jpg" || normalizedFormat === "jpeg";
const filePath = cameraTempPath({
kind: "snap",
facing,
ext: isJpeg ? "jpg" : "png",
});
if (payload.url) {
await writeUrlToFile(filePath, payload.url);
} else if (payload.base64) {
await writeBase64ToFile(filePath, payload.base64);
}
content.push({ type: "text", text: `MEDIA:${filePath}` });
if (payload.base64) {
content.push({
type: "image",
data: payload.base64,
mimeType:
imageMimeFromFormat(payload.format) ?? (isJpeg ? "image/jpeg" : "image/png"),
});
}
details.push({
facing,
path: filePath,
width: payload.width,
height: payload.height,
});
}
const result: AgentToolResult<unknown> = { content, details };
return await sanitizeToolResultImages(result, "nodes:camera_snap", imageSanitization);
}
case "camera_list": {
const node = readStringParam(params, "node", { required: true });
const nodeId = await resolveNodeId(gatewayOpts, node);
const raw = await callGatewayTool<{ payload: unknown }>("node.invoke", gatewayOpts, {
nodeId,
command: "camera.list",
params: {},
idempotencyKey: crypto.randomUUID(),
});
const payload =
raw && typeof raw.payload === "object" && raw.payload !== null ? raw.payload : {};
return jsonResult(payload);
}
case "camera_clip": {
const node = readStringParam(params, "node", { required: true });
const nodeId = await resolveNodeId(gatewayOpts, node);
const facing =
typeof params.facing === "string" ? params.facing.toLowerCase() : "front";
if (facing !== "front" && facing !== "back") {
throw new Error("invalid facing (front|back)");
}
const durationMs =
typeof params.durationMs === "number" && Number.isFinite(params.durationMs)
? params.durationMs
: typeof params.duration === "string"
? parseDurationMs(params.duration)
: 3000;
const includeAudio =
typeof params.includeAudio === "boolean" ? params.includeAudio : true;
const deviceId =
typeof params.deviceId === "string" && params.deviceId.trim()
? params.deviceId.trim()
: undefined;
const raw = await callGatewayTool<{ payload: unknown }>("node.invoke", gatewayOpts, {
nodeId,
command: "camera.clip",
params: {
facing,
durationMs,
includeAudio,
format: "mp4",
deviceId,
},
idempotencyKey: crypto.randomUUID(),
});
const payload = parseCameraClipPayload(raw?.payload);
const filePath = await writeCameraClipPayloadToFile({
payload,
facing,
});
return {
content: [{ type: "text", text: `FILE:${filePath}` }],
details: {
facing,
path: filePath,
durationMs: payload.durationMs,
hasAudio: payload.hasAudio,
},
};
}
case "screen_record": {
const node = readStringParam(params, "node", { required: true });
const nodeId = await resolveNodeId(gatewayOpts, node);
const durationMs =
typeof params.durationMs === "number" && Number.isFinite(params.durationMs)
? params.durationMs
: typeof params.duration === "string"
? parseDurationMs(params.duration)
: 10_000;
const fps =
typeof params.fps === "number" && Number.isFinite(params.fps) ? params.fps : 10;
const screenIndex =
typeof params.screenIndex === "number" && Number.isFinite(params.screenIndex)
? params.screenIndex
: 0;
const includeAudio =
typeof params.includeAudio === "boolean" ? params.includeAudio : true;
const raw = await callGatewayTool<{ payload: unknown }>("node.invoke", gatewayOpts, {
nodeId,
command: "screen.record",
params: {
durationMs,
screenIndex,
fps,
format: "mp4",
includeAudio,
},
idempotencyKey: crypto.randomUUID(),
});
const payload = parseScreenRecordPayload(raw?.payload);
const filePath =
typeof params.outPath === "string" && params.outPath.trim()
? params.outPath.trim()
: screenRecordTempPath({ ext: payload.format || "mp4" });
const written = await writeScreenRecordToFile(filePath, payload.base64);
return {
content: [{ type: "text", text: `FILE:${written.path}` }],
details: {
path: written.path,
durationMs: payload.durationMs,
fps: payload.fps,
screenIndex: payload.screenIndex,
hasAudio: payload.hasAudio,
},
};
}
case "location_get": {
const node = readStringParam(params, "node", { required: true });
const nodeId = await resolveNodeId(gatewayOpts, node);
const maxAgeMs =
typeof params.maxAgeMs === "number" && Number.isFinite(params.maxAgeMs)
? params.maxAgeMs
: undefined;
const desiredAccuracy =
params.desiredAccuracy === "coarse" ||
params.desiredAccuracy === "balanced" ||
params.desiredAccuracy === "precise"
? params.desiredAccuracy
: undefined;
const locationTimeoutMs =
typeof params.locationTimeoutMs === "number" &&
Number.isFinite(params.locationTimeoutMs)
? params.locationTimeoutMs
: undefined;
const raw = await callGatewayTool<{ payload: unknown }>("node.invoke", gatewayOpts, {
nodeId,
command: "location.get",
params: {
maxAgeMs,
desiredAccuracy,
timeoutMs: locationTimeoutMs,
},
idempotencyKey: crypto.randomUUID(),
});
return jsonResult(raw?.payload ?? {});
}
case "run": {
const node = readStringParam(params, "node", { required: true });
const nodes = await listNodes(gatewayOpts);
if (nodes.length === 0) {
throw new Error(
"system.run requires a paired companion app or node host (no nodes available).",
);
}
const nodeId = resolveNodeIdFromList(nodes, node);
const nodeInfo = nodes.find((entry) => entry.nodeId === nodeId);
const supportsSystemRun = Array.isArray(nodeInfo?.commands)
? nodeInfo?.commands?.includes("system.run")
: false;
if (!supportsSystemRun) {
throw new Error(
"system.run requires a companion app or node host; the selected node does not support system.run.",
);
}
const commandRaw = params.command;
if (!commandRaw) {
throw new Error("command required (argv array, e.g. ['echo', 'Hello'])");
}
if (!Array.isArray(commandRaw)) {
throw new Error("command must be an array of strings (argv), e.g. ['echo', 'Hello']");
}
const command = commandRaw.map((c) => String(c));
if (command.length === 0) {
throw new Error("command must not be empty");
}
const cwd =
typeof params.cwd === "string" && params.cwd.trim() ? params.cwd.trim() : undefined;
const env = parseEnvPairs(params.env);
const commandTimeoutMs = parseTimeoutMs(params.commandTimeoutMs);
const invokeTimeoutMs = parseTimeoutMs(params.invokeTimeoutMs);
const needsScreenRecording =
typeof params.needsScreenRecording === "boolean"
? params.needsScreenRecording
: undefined;
const runParams = {
command,
cwd,
env,
timeoutMs: commandTimeoutMs,
needsScreenRecording,
agentId,
sessionKey,
};
// First attempt without approval flags.
try {
const raw = await callGatewayTool<{ payload?: unknown }>("node.invoke", gatewayOpts, {
nodeId,
command: "system.run",
params: runParams,
timeoutMs: invokeTimeoutMs,
idempotencyKey: crypto.randomUUID(),
});
return jsonResult(raw?.payload ?? {});
} catch (firstErr) {
const msg = firstErr instanceof Error ? firstErr.message : String(firstErr);
if (!msg.includes("SYSTEM_RUN_DENIED: approval required")) {
throw firstErr;
}
}
// Node requires approval create a pending approval request on
// the gateway and wait for the user to approve/deny via the UI.
const APPROVAL_TIMEOUT_MS = 120_000;
const cmdText = formatExecCommand(command);
const approvalId = crypto.randomUUID();
const approvalResult = await callGatewayTool(
"exec.approval.request",
{ ...gatewayOpts, timeoutMs: APPROVAL_TIMEOUT_MS + 5_000 },
{
id: approvalId,
command: cmdText,
commandArgv: command,
cwd,
nodeId,
host: "node",
agentId,
sessionKey,
timeoutMs: APPROVAL_TIMEOUT_MS,
},
);
const decisionRaw =
approvalResult && typeof approvalResult === "object"
? (approvalResult as { decision?: unknown }).decision
: undefined;
const approvalDecision =
decisionRaw === "allow-once" || decisionRaw === "allow-always" ? decisionRaw : null;
if (!approvalDecision) {
if (decisionRaw === "deny") {
throw new Error("exec denied: user denied");
}
if (decisionRaw === undefined || decisionRaw === null) {
throw new Error("exec denied: approval timed out");
}
throw new Error("exec denied: invalid approval decision");
}
// Retry with the approval decision.
const raw = await callGatewayTool<{ payload?: unknown }>("node.invoke", gatewayOpts, {
nodeId,
command: "system.run",
params: {
...runParams,
runId: approvalId,
approved: true,
approvalDecision,
},
timeoutMs: invokeTimeoutMs,
idempotencyKey: crypto.randomUUID(),
});
return jsonResult(raw?.payload ?? {});
}
case "invoke": {
const node = readStringParam(params, "node", { required: true });
const nodeId = await resolveNodeId(gatewayOpts, node);
const invokeCommand = readStringParam(params, "invokeCommand", { required: true });
const invokeParamsJson =
typeof params.invokeParamsJson === "string" ? params.invokeParamsJson.trim() : "";
let invokeParams: unknown = {};
if (invokeParamsJson) {
try {
invokeParams = JSON.parse(invokeParamsJson);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
throw new Error(`invokeParamsJson must be valid JSON: ${message}`, {
cause: err,
});
}
}
const invokeTimeoutMs = parseTimeoutMs(params.invokeTimeoutMs);
const raw = await callGatewayTool("node.invoke", gatewayOpts, {
nodeId,
command: invokeCommand,
params: invokeParams,
timeoutMs: invokeTimeoutMs,
idempotencyKey: crypto.randomUUID(),
});
return jsonResult(raw ?? {});
}
default:
throw new Error(`Unknown action: ${action}`);
}
} catch (err) {
const nodeLabel =
typeof params.node === "string" && params.node.trim() ? params.node.trim() : "auto";
const gatewayLabel =
gatewayOpts.gatewayUrl && gatewayOpts.gatewayUrl.trim()
? gatewayOpts.gatewayUrl.trim()
: "default";
const agentLabel = agentId ?? "unknown";
let message = err instanceof Error ? err.message : String(err);
if (action === "invoke" && isPairingRequiredMessage(message)) {
const requestId = extractPairingRequestId(message);
const approveHint = requestId
? `Approve pairing request ${requestId} and retry.`
: "Approve the pending pairing request and retry.";
message = `pairing required before node invoke. ${approveHint}`;
}
throw new Error(
`agent=${agentLabel} node=${nodeLabel} gateway=${gatewayLabel} action=${action}: ${message}`,
{ cause: err },
);
}
},
};
}