diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a7d5828fb6..b4c811d2324 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,12 +9,12 @@ Docs: https://docs.openclaw.ai - Channels/CLI: add per-account/channel `defaultTo` outbound routing fallback so `openclaw agent --deliver` can send without explicit `--reply-to` when a default target is configured. (#16985) Thanks @KirillShchetinin. - iOS/Chat: clean chat UI noise by stripping inbound untrusted metadata/timestamp prefixes, formatting tool outputs into concise summaries/errors, compacting the composer while typing, and supporting tap-to-dismiss keyboard in chat view. (#22122) thanks @mbelinky. - iOS/Watch: bridge mirrored watch prompt notification actions into iOS quick-reply handling, including queued action handoff until app model initialization. (#22123) thanks @mbelinky. -- iOS/Permissions: gate advertised iOS node capabilities/commands by live OS permission state (photos/contacts/calendar/reminders/motion), add Settings permission controls and disclosure, and refresh active gateway registration after permission-driven settings changes. (#22135) thanks @mbelinky. - iOS/Tests: cover IPv4-mapped IPv6 loopback in manual TLS policy tests for connect validation paths. (#22045) Thanks @mbelinky. - iOS/Gateway: stabilize background wake and reconnect behavior with background reconnect suppression/lease windows, BGAppRefresh wake fallback, location wake hook throttling, and APNs wake retry+nudge instrumentation. (#21226) thanks @mbelinky. - Auto-reply/UI: add model fallback lifecycle visibility in verbose logs, /status active-model context with fallback reason, and cohesive WebUI fallback indicators. (#20704) Thanks @joshavant. - Discord/Streaming: add stream preview mode for live draft replies with partial/block options and configurable chunking. Thanks @thewilloftheshadow. Inspiration @neoagentic-ship-it. - Discord/Telegram: add configurable lifecycle status reactions for queued/thinking/tool/done/error phases with a shared controller and emoji/timing overrides. Thanks @wolly-tundracube and @thewilloftheshadow. +- Discord/Voice: add voice channel join/leave/status via `/vc`, plus auto-join configuration for realtime voice conversations. Thanks @thewilloftheshadow. ### Fixes @@ -128,7 +128,6 @@ Docs: https://docs.openclaw.ai - Security/Voice Call: harden `voice-call` telephony TTS override merging by blocking unsafe deep-merge keys (`__proto__`, `prototype`, `constructor`) and add regression coverage for top-level and nested prototype-pollution payloads. - Security/Windows Daemon: harden Scheduled Task `gateway.cmd` generation by quoting cmd metacharacter arguments, escaping `%`/`!` expansions, and rejecting CR/LF in arguments, descriptions, and environment assignments (`set "KEY=VALUE"`), preventing command injection in Windows daemon startup scripts. This ships in the next npm release. Thanks @tdjackey for reporting. - Security/Gateway/Canvas: replace shared-IP fallback auth with node-scoped session capability URLs for `/__openclaw__/canvas/*` and `/__openclaw__/a2ui/*`, fail closed when trusted-proxy requests omit forwarded client headers, and add IPv6/proxy-header regression coverage. This ships in the next npm release. Thanks @aether-ai-agent for reporting. -- Security/Canvas: restrict A2UI JSONL file reads to allowed local roots and reject non-local `jsonlPath` schemes to prevent unintended file exposure. Thanks @thewilloftheshadow. - Security/Net: enforce strict dotted-decimal IPv4 literals in SSRF checks and fail closed on unsupported legacy forms (octal/hex/short/packed, for example `0177.0.0.1`, `127.1`, `2130706433`) before DNS lookup. - Security/Discord: enforce trusted-sender guild permission checks for moderation actions (`timeout`, `kick`, `ban`) and ignore untrusted `senderUserId` params to prevent privilege escalation in tool-driven flows. Thanks @aether-ai-agent for reporting. - Security/ACP+Exec: add `openclaw acp --token-file/--password-file` secret-file support (with inline secret flag warnings), redact ACP working-directory prefixes to `~` home-relative paths, constrain exec script preflight file inspection to the effective `workdir` boundary, and add security-audit warnings when `tools.exec.host="sandbox"` is configured while sandbox mode is off. diff --git a/apps/ios/Sources/Gateway/GatewayConnectionController.swift b/apps/ios/Sources/Gateway/GatewayConnectionController.swift index 812cb593bf7..acfb9aab358 100644 --- a/apps/ios/Sources/Gateway/GatewayConnectionController.swift +++ b/apps/ios/Sources/Gateway/GatewayConnectionController.swift @@ -1,12 +1,15 @@ import AVFoundation +import Contacts import CoreLocation import CoreMotion import CryptoKit +import EventKit import Foundation import Darwin import OpenClawKit import Network import Observation +import Photos import ReplayKit import Security import Speech @@ -701,7 +704,7 @@ final class GatewayConnectionController { var addr = in_addr() let parsed = host.withCString { inet_pton(AF_INET, $0, &addr) == 1 } guard parsed else { return false } - let value = UInt32(bigEndian: addr.s_addr) + let value = ntohl(addr.s_addr) let firstOctet = UInt8((value >> 24) & 0xFF) return firstOctet == 127 } @@ -780,7 +783,6 @@ final class GatewayConnectionController { } private func currentCaps() -> [String] { - let permissionSnapshot = IOSPermissionCenter.statusSnapshot() var caps = [OpenClawCapability.canvas.rawValue, OpenClawCapability.screen.rawValue] // Default-on: if the key doesn't exist yet, treat it as enabled. @@ -801,19 +803,11 @@ final class GatewayConnectionController { if WatchMessagingService.isSupportedOnDevice() { caps.append(OpenClawCapability.watch.rawValue) } - if permissionSnapshot.photosAllowed { - caps.append(OpenClawCapability.photos.rawValue) - } - if permissionSnapshot.contactsAllowed { - caps.append(OpenClawCapability.contacts.rawValue) - } - if permissionSnapshot.calendarReadAllowed || permissionSnapshot.calendarWriteAllowed { - caps.append(OpenClawCapability.calendar.rawValue) - } - if permissionSnapshot.remindersReadAllowed || permissionSnapshot.remindersWriteAllowed { - caps.append(OpenClawCapability.reminders.rawValue) - } - if Self.motionAvailable() && permissionSnapshot.motionAllowed { + caps.append(OpenClawCapability.photos.rawValue) + caps.append(OpenClawCapability.contacts.rawValue) + caps.append(OpenClawCapability.calendar.rawValue) + caps.append(OpenClawCapability.reminders.rawValue) + if Self.motionAvailable() { caps.append(OpenClawCapability.motion.rawValue) } @@ -821,7 +815,6 @@ final class GatewayConnectionController { } private func currentCommands() -> [String] { - let permissionSnapshot = IOSPermissionCenter.statusSnapshot() var commands: [String] = [ OpenClawCanvasCommand.present.rawValue, OpenClawCanvasCommand.hide.rawValue, @@ -865,20 +858,12 @@ final class GatewayConnectionController { commands.append(OpenClawContactsCommand.add.rawValue) } if caps.contains(OpenClawCapability.calendar.rawValue) { - if permissionSnapshot.calendarReadAllowed { - commands.append(OpenClawCalendarCommand.events.rawValue) - } - if permissionSnapshot.calendarWriteAllowed { - commands.append(OpenClawCalendarCommand.add.rawValue) - } + commands.append(OpenClawCalendarCommand.events.rawValue) + commands.append(OpenClawCalendarCommand.add.rawValue) } if caps.contains(OpenClawCapability.reminders.rawValue) { - if permissionSnapshot.remindersReadAllowed { - commands.append(OpenClawRemindersCommand.list.rawValue) - } - if permissionSnapshot.remindersWriteAllowed { - commands.append(OpenClawRemindersCommand.add.rawValue) - } + commands.append(OpenClawRemindersCommand.list.rawValue) + commands.append(OpenClawRemindersCommand.add.rawValue) } if caps.contains(OpenClawCapability.motion.rawValue) { commands.append(OpenClawMotionCommand.activity.rawValue) @@ -889,7 +874,6 @@ final class GatewayConnectionController { } private func currentPermissions() -> [String: Bool] { - let permissionSnapshot = IOSPermissionCenter.statusSnapshot() var permissions: [String: Bool] = [:] permissions["camera"] = AVCaptureDevice.authorizationStatus(for: .video) == .authorized permissions["microphone"] = AVCaptureDevice.authorizationStatus(for: .audio) == .authorized @@ -899,23 +883,22 @@ final class GatewayConnectionController { && CLLocationManager.locationServicesEnabled() permissions["screenRecording"] = RPScreenRecorder.shared().isAvailable - permissions["photos"] = permissionSnapshot.photosAllowed - permissions["photosDenied"] = permissionSnapshot.photos.isDeniedOrRestricted - permissions["contacts"] = permissionSnapshot.contactsAllowed - permissions["contactsDenied"] = permissionSnapshot.contacts.isDeniedOrRestricted + let photoStatus = PHPhotoLibrary.authorizationStatus(for: .readWrite) + permissions["photos"] = photoStatus == .authorized || photoStatus == .limited + let contactsStatus = CNContactStore.authorizationStatus(for: .contacts) + permissions["contacts"] = contactsStatus == .authorized || contactsStatus == .limited - permissions["calendar"] = permissionSnapshot.calendarReadAllowed || permissionSnapshot.calendarWriteAllowed - permissions["calendarRead"] = permissionSnapshot.calendarReadAllowed - permissions["calendarWrite"] = permissionSnapshot.calendarWriteAllowed - permissions["calendarDenied"] = permissionSnapshot.calendar.isDeniedOrRestricted + let calendarStatus = EKEventStore.authorizationStatus(for: .event) + permissions["calendar"] = + calendarStatus == .authorized || calendarStatus == .fullAccess || calendarStatus == .writeOnly + let remindersStatus = EKEventStore.authorizationStatus(for: .reminder) + permissions["reminders"] = + remindersStatus == .authorized || remindersStatus == .fullAccess || remindersStatus == .writeOnly - permissions["reminders"] = permissionSnapshot.remindersReadAllowed || permissionSnapshot.remindersWriteAllowed - permissions["remindersRead"] = permissionSnapshot.remindersReadAllowed - permissions["remindersWrite"] = permissionSnapshot.remindersWriteAllowed - permissions["remindersDenied"] = permissionSnapshot.reminders.isDeniedOrRestricted - - permissions["motion"] = permissionSnapshot.motionAllowed - permissions["motionDenied"] = permissionSnapshot.motion.isDeniedOrRestricted + let motionStatus = CMMotionActivityManager.authorizationStatus() + let pedometerStatus = CMPedometer.authorizationStatus() + permissions["motion"] = + motionStatus == .authorized || pedometerStatus == .authorized let watchStatus = WatchMessagingService.currentStatusSnapshot() permissions["watchSupported"] = watchStatus.supported diff --git a/apps/ios/Sources/Info.plist b/apps/ios/Sources/Info.plist index 7ef344fc853..37ab15e4a85 100644 --- a/apps/ios/Sources/Info.plist +++ b/apps/ios/Sources/Info.plist @@ -44,22 +44,12 @@ NSCameraUsageDescription OpenClaw can capture photos or short video clips when requested via the gateway. - NSPhotoLibraryUsageDescription - OpenClaw can read your photo library when you ask it to share recent photos. - NSContactsUsageDescription - OpenClaw can read and create contacts when requested via the gateway. NSLocalNetworkUsageDescription OpenClaw discovers and connects to your OpenClaw gateway on the local network. NSLocationAlwaysAndWhenInUseUsageDescription OpenClaw can share your location in the background when you enable Always. NSLocationWhenInUseUsageDescription OpenClaw uses your location when you allow location sharing. - NSCalendarsFullAccessUsageDescription - OpenClaw can read and add calendar events when requested via the gateway. - NSRemindersFullAccessUsageDescription - OpenClaw can read and add reminders when requested via the gateway. - NSMotionUsageDescription - OpenClaw uses Motion & Fitness data for activity and pedometer commands. NSMicrophoneUsageDescription OpenClaw needs microphone access for voice wake. NSSpeechRecognitionUsageDescription diff --git a/apps/ios/Sources/Model/NodeAppModel+Permissions.swift b/apps/ios/Sources/Model/NodeAppModel+Permissions.swift deleted file mode 100644 index 9560b6035a8..00000000000 --- a/apps/ios/Sources/Model/NodeAppModel+Permissions.swift +++ /dev/null @@ -1,22 +0,0 @@ -import Foundation -import UIKit - -@MainActor -extension NodeAppModel { - func permissionSnapshot() -> IOSPermissionSnapshot { - IOSPermissionCenter.statusSnapshot() - } - - @discardableResult - func requestPermission(_ permission: IOSPermissionKind) async -> IOSPermissionSnapshot { - _ = await IOSPermissionCenter.request(permission) - return IOSPermissionCenter.statusSnapshot() - } - - func openSystemSettings() { - guard let url = URL(string: UIApplication.openSettingsURLString) else { - return - } - UIApplication.shared.open(url) - } -} diff --git a/apps/ios/Sources/Permissions/IOSPermissionCenter.swift b/apps/ios/Sources/Permissions/IOSPermissionCenter.swift deleted file mode 100644 index 03a9b942a34..00000000000 --- a/apps/ios/Sources/Permissions/IOSPermissionCenter.swift +++ /dev/null @@ -1,303 +0,0 @@ -import Contacts -import CoreMotion -import EventKit -import Foundation -import Photos - -enum IOSPermissionKind: String, CaseIterable, Identifiable, Sendable { - case photos - case contacts - case calendar - case reminders - case motion - - var id: String { self.rawValue } - - var title: String { - switch self { - case .photos: - "Photos" - case .contacts: - "Contacts" - case .calendar: - "Calendar" - case .reminders: - "Reminders" - case .motion: - "Motion & Fitness" - } - } -} - -enum IOSPermissionState: String, Equatable, Sendable { - case granted - case limited - case writeOnly - case denied - case restricted - case notDetermined - case unavailable - - var label: String { - switch self { - case .granted: - "Granted" - case .limited: - "Limited" - case .writeOnly: - "Write only" - case .denied: - "Denied" - case .restricted: - "Restricted" - case .notDetermined: - "Not requested" - case .unavailable: - "Unavailable" - } - } - - var isDeniedOrRestricted: Bool { - self == .denied || self == .restricted - } -} - -struct IOSPermissionSnapshot: Equatable, Sendable { - var photos: IOSPermissionState - var contacts: IOSPermissionState - var calendar: IOSPermissionState - var reminders: IOSPermissionState - var motion: IOSPermissionState - - static let initial = IOSPermissionSnapshot( - photos: .notDetermined, - contacts: .notDetermined, - calendar: .notDetermined, - reminders: .notDetermined, - motion: .notDetermined) - - func state(for kind: IOSPermissionKind) -> IOSPermissionState { - switch kind { - case .photos: - self.photos - case .contacts: - self.contacts - case .calendar: - self.calendar - case .reminders: - self.reminders - case .motion: - self.motion - } - } - - var photosAllowed: Bool { - self.photos == .granted || self.photos == .limited - } - - var contactsAllowed: Bool { - self.contacts == .granted || self.contacts == .limited - } - - var calendarReadAllowed: Bool { - self.calendar == .granted - } - - var calendarWriteAllowed: Bool { - self.calendar == .granted || self.calendar == .writeOnly - } - - var remindersReadAllowed: Bool { - self.reminders == .granted - } - - var remindersWriteAllowed: Bool { - self.reminders == .granted || self.reminders == .writeOnly - } - - var motionAllowed: Bool { - self.motion == .granted - } -} - -@MainActor -enum IOSPermissionCenter { - static func statusSnapshot() -> IOSPermissionSnapshot { - IOSPermissionSnapshot( - photos: self.mapPhotoStatus(PHPhotoLibrary.authorizationStatus(for: .readWrite)), - contacts: self.mapContactsStatus(CNContactStore.authorizationStatus(for: .contacts)), - calendar: self.mapEventKitStatus(EKEventStore.authorizationStatus(for: .event)), - reminders: self.mapEventKitStatus(EKEventStore.authorizationStatus(for: .reminder)), - motion: self.motionState()) - } - - static func request(_ kind: IOSPermissionKind) async -> IOSPermissionState { - switch kind { - case .photos: - await self.requestPhotosIfNeeded() - case .contacts: - await self.requestContactsIfNeeded() - case .calendar: - await self.requestCalendarIfNeeded() - case .reminders: - await self.requestRemindersIfNeeded() - case .motion: - await self.requestMotionIfNeeded() - } - return self.statusSnapshot().state(for: kind) - } - - private static func requestPhotosIfNeeded() async { - guard PHPhotoLibrary.authorizationStatus(for: .readWrite) == .notDetermined else { - return - } - _ = await withCheckedContinuation { (cont: CheckedContinuation) in - PHPhotoLibrary.requestAuthorization(for: .readWrite) { status in - cont.resume(returning: status) - } - } - } - - private static func requestContactsIfNeeded() async { - guard CNContactStore.authorizationStatus(for: .contacts) == .notDetermined else { - return - } - let store = CNContactStore() - _ = await withCheckedContinuation { (cont: CheckedContinuation) in - store.requestAccess(for: .contacts) { granted, _ in - cont.resume(returning: granted) - } - } - } - - private static func requestCalendarIfNeeded() async { - let status = EKEventStore.authorizationStatus(for: .event) - guard status == .notDetermined || status == .writeOnly else { - return - } - let store = EKEventStore() - _ = try? await store.requestFullAccessToEvents() - } - - private static func requestRemindersIfNeeded() async { - let status = EKEventStore.authorizationStatus(for: .reminder) - guard status == .notDetermined || status == .writeOnly else { - return - } - let store = EKEventStore() - _ = try? await store.requestFullAccessToReminders() - } - - private static func requestMotionIfNeeded() async { - guard self.motionState() == .notDetermined else { - return - } - - let activityManager = CMMotionActivityManager() - await self.runPermissionProbe { complete in - let end = Date() - activityManager.queryActivityStarting( - from: end.addingTimeInterval(-120), - to: end, - to: OperationQueue()) { _, _ in - complete() - } - } - - let pedometer = CMPedometer() - await self.runPermissionProbe { complete in - let end = Date() - pedometer.queryPedometerData( - from: end.addingTimeInterval(-120), - to: end) { _, _ in - complete() - } - } - } - - private static func runPermissionProbe(start: (@escaping () -> Void) -> Void) async { - await withCheckedContinuation { (cont: CheckedContinuation) in - let lock = NSLock() - var resumed = false - start { - lock.lock() - defer { lock.unlock() } - guard !resumed else { return } - resumed = true - cont.resume(returning: ()) - } - } - } - - private static func mapPhotoStatus(_ status: PHAuthorizationStatus) -> IOSPermissionState { - switch status { - case .authorized: - .granted - case .limited: - .limited - case .denied: - .denied - case .restricted: - .restricted - case .notDetermined: - .notDetermined - @unknown default: - .restricted - } - } - - private static func mapContactsStatus(_ status: CNAuthorizationStatus) -> IOSPermissionState { - switch status { - case .authorized: - .granted - case .limited: - .limited - case .denied: - .denied - case .restricted: - .restricted - case .notDetermined: - .notDetermined - @unknown default: - .restricted - } - } - - private static func mapEventKitStatus(_ status: EKAuthorizationStatus) -> IOSPermissionState { - switch status { - case .authorized, .fullAccess: - .granted - case .writeOnly: - .writeOnly - case .denied: - .denied - case .restricted: - .restricted - case .notDetermined: - .notDetermined - @unknown default: - .restricted - } - } - - private static func motionState() -> IOSPermissionState { - let available = CMMotionActivityManager.isActivityAvailable() || CMPedometer.isStepCountingAvailable() - guard available else { - return .unavailable - } - - let activity = CMMotionActivityManager.authorizationStatus() - let pedometer = CMPedometer.authorizationStatus() - - if activity == .authorized || pedometer == .authorized { - return .granted - } - if activity == .restricted || pedometer == .restricted { - return .restricted - } - if activity == .denied || pedometer == .denied { - return .denied - } - return .notDetermined - } -} diff --git a/apps/ios/Sources/Settings/PermissionsDisclosureSection.swift b/apps/ios/Sources/Settings/PermissionsDisclosureSection.swift deleted file mode 100644 index aa0cc8fb726..00000000000 --- a/apps/ios/Sources/Settings/PermissionsDisclosureSection.swift +++ /dev/null @@ -1,98 +0,0 @@ -import SwiftUI - -struct PermissionsDisclosureSection: View { - let snapshot: IOSPermissionSnapshot - let requestingPermission: IOSPermissionKind? - let onRequest: (IOSPermissionKind) -> Void - let onOpenSettings: () -> Void - let onInfo: (IOSPermissionKind) -> Void - - var body: some View { - DisclosureGroup("Permissions") { - self.permissionRow(.photos) - self.permissionRow(.contacts) - self.permissionRow(.calendar) - self.permissionRow(.reminders) - self.permissionRow(.motion) - - Button { - self.onOpenSettings() - } label: { - Label("Open iOS Settings", systemImage: "gear") - } - } - } - - @ViewBuilder - private func permissionRow(_ kind: IOSPermissionKind) -> some View { - let state = self.snapshot.state(for: kind) - HStack(spacing: 8) { - Text(kind.title) - Spacer() - Text(state.label) - .font(.footnote) - .foregroundStyle(self.permissionStatusColor(for: state)) - if self.requestingPermission == kind { - ProgressView() - .progressViewStyle(.circular) - } - if let action = self.permissionAction(for: state) { - Button(action.title) { - switch action { - case .request: - self.onRequest(kind) - case .openSettings: - self.onOpenSettings() - } - } - .disabled(self.requestingPermission != nil) - } - Button { - self.onInfo(kind) - } label: { - Image(systemName: "info.circle") - .foregroundStyle(.secondary) - } - .buttonStyle(.plain) - .accessibilityLabel("\(kind.title) permission info") - } - } - - private enum PermissionAction { - case request - case openSettings - - var title: String { - switch self { - case .request: - "Request" - case .openSettings: - "Settings" - } - } - } - - private func permissionAction(for state: IOSPermissionState) -> PermissionAction? { - switch state { - case .notDetermined, .writeOnly: - .request - case .denied, .restricted: - .openSettings - case .granted, .limited, .unavailable: - nil - } - } - - private func permissionStatusColor(for state: IOSPermissionState) -> Color { - switch state { - case .granted, .limited: - .green - case .writeOnly: - .orange - case .denied, .restricted: - .red - case .notDetermined, .unavailable: - .secondary - } - } -} diff --git a/apps/ios/Sources/Settings/SettingsTab.swift b/apps/ios/Sources/Settings/SettingsTab.swift index 0acf0530ffc..a74f2fed952 100644 --- a/apps/ios/Sources/Settings/SettingsTab.swift +++ b/apps/ios/Sources/Settings/SettingsTab.swift @@ -16,7 +16,6 @@ struct SettingsTab: View { @Environment(VoiceWakeManager.self) private var voiceWake: VoiceWakeManager @Environment(GatewayConnectionController.self) private var gatewayController: GatewayConnectionController @Environment(\.dismiss) private var dismiss - @Environment(\.scenePhase) private var scenePhase @AppStorage("node.displayName") private var displayName: String = "iOS Node" @AppStorage("node.instanceId") private var instanceId: String = UUID().uuidString @AppStorage("voiceWake.enabled") private var voiceWakeEnabled: Bool = false @@ -43,6 +42,7 @@ struct SettingsTab: View { @AppStorage("gateway.hasConnectedOnce") private var hasConnectedOnce: Bool = false @State private var connectingGatewayID: String? + @State private var lastLocationModeRaw: String = OpenClawLocationMode.off.rawValue @State private var gatewayToken: String = "" @State private var gatewayPassword: String = "" @State private var defaultShareInstruction: String = "" @@ -51,8 +51,6 @@ struct SettingsTab: View { @State private var manualGatewayPortText: String = "" @State private var gatewayExpanded: Bool = true @State private var selectedAgentPickerId: String = "" - @State private var permissionSnapshot: IOSPermissionSnapshot = .initial - @State private var requestingPermission: IOSPermissionKind? @State private var showResetOnboardingAlert: Bool = false @State private var activeFeatureHelp: FeatureHelp? @@ -61,23 +59,317 @@ struct SettingsTab: View { private let gatewayLogger = Logger(subsystem: "ai.openclaw.ios", category: "GatewaySettings") var body: some View { - self.settingsScreen - .gatewayTrustPromptAlert() - } + NavigationStack { + Form { + Section { + DisclosureGroup(isExpanded: self.$gatewayExpanded) { + if !self.isGatewayConnected { + Text( + "1. Open Telegram and message your bot: /pair\n" + + "2. Copy the setup code it returns\n" + + "3. Paste here and tap Connect\n" + + "4. Back in Telegram, run /pair approve") + .font(.footnote) + .foregroundStyle(.secondary) - @ViewBuilder - private var settingsScreen: some View { - let base = NavigationStack { - self.settingsForm - } - self.lifecycleObservedSettingsScreen(self.presentedSettingsScreen(base)) - } + if let warning = self.tailnetWarningText { + Text(warning) + .font(.footnote.weight(.semibold)) + .foregroundStyle(.orange) + } - private func presentedSettingsScreen(_ content: Content) -> some View { - content + TextField("Paste setup code", text: self.$setupCode) + .textInputAutocapitalization(.never) + .autocorrectionDisabled() + + Button { + Task { await self.applySetupCodeAndConnect() } + } label: { + if self.connectingGatewayID == "manual" { + HStack(spacing: 8) { + ProgressView() + .progressViewStyle(.circular) + Text("Connecting…") + } + } else { + Text("Connect with setup code") + } + } + .disabled(self.connectingGatewayID != nil + || self.setupCode.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty) + + if let status = self.setupStatusLine { + Text(status) + .font(.footnote) + .foregroundStyle(.secondary) + } + } + + if self.isGatewayConnected { + Picker("Bot", selection: self.$selectedAgentPickerId) { + Text("Default").tag("") + let defaultId = (self.appModel.gatewayDefaultAgentId ?? "") + .trimmingCharacters(in: .whitespacesAndNewlines) + ForEach(self.appModel.gatewayAgents.filter { $0.id != defaultId }, id: \.id) { agent in + let name = (agent.name ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + Text(name.isEmpty ? agent.id : name).tag(agent.id) + } + } + Text("Controls which bot Chat and Talk speak to.") + .font(.footnote) + .foregroundStyle(.secondary) + } + + if self.appModel.gatewayServerName == nil { + LabeledContent("Discovery", value: self.gatewayController.discoveryStatusText) + } + LabeledContent("Status", value: self.appModel.gatewayStatusText) + Toggle("Auto-connect on launch", isOn: self.$gatewayAutoConnect) + + if let serverName = self.appModel.gatewayServerName { + LabeledContent("Server", value: serverName) + if let addr = self.appModel.gatewayRemoteAddress { + let parts = Self.parseHostPort(from: addr) + let urlString = Self.httpURLString(host: parts?.host, port: parts?.port, fallback: addr) + LabeledContent("Address") { + Text(urlString) + } + .contextMenu { + Button { + UIPasteboard.general.string = urlString + } label: { + Label("Copy URL", systemImage: "doc.on.doc") + } + + if let parts { + Button { + UIPasteboard.general.string = parts.host + } label: { + Label("Copy Host", systemImage: "doc.on.doc") + } + + Button { + UIPasteboard.general.string = "\(parts.port)" + } label: { + Label("Copy Port", systemImage: "doc.on.doc") + } + } + } + } + + Button("Disconnect", role: .destructive) { + self.appModel.disconnectGateway() + } + } else { + self.gatewayList(showing: .all) + } + + DisclosureGroup("Advanced") { + Toggle("Use Manual Gateway", isOn: self.$manualGatewayEnabled) + + TextField("Host", text: self.$manualGatewayHost) + .textInputAutocapitalization(.never) + .autocorrectionDisabled() + + TextField("Port (optional)", text: self.manualPortBinding) + .keyboardType(.numberPad) + + Toggle("Use TLS", isOn: self.$manualGatewayTLS) + + Button { + Task { await self.connectManual() } + } label: { + if self.connectingGatewayID == "manual" { + HStack(spacing: 8) { + ProgressView() + .progressViewStyle(.circular) + Text("Connecting…") + } + } else { + Text("Connect (Manual)") + } + } + .disabled(self.connectingGatewayID != nil || self.manualGatewayHost + .trimmingCharacters(in: .whitespacesAndNewlines) + .isEmpty || !self.manualPortIsValid) + + Text( + "Use this when mDNS/Bonjour discovery is blocked. " + + "Leave port empty for 443 on tailnet DNS (TLS) or 18789 otherwise.") + .font(.footnote) + .foregroundStyle(.secondary) + + Toggle("Discovery Debug Logs", isOn: self.$discoveryDebugLogsEnabled) + .onChange(of: self.discoveryDebugLogsEnabled) { _, newValue in + self.gatewayController.setDiscoveryDebugLoggingEnabled(newValue) + } + + NavigationLink("Discovery Logs") { + GatewayDiscoveryDebugLogView() + } + + Toggle("Debug Canvas Status", isOn: self.$canvasDebugStatusEnabled) + + TextField("Gateway Auth Token", text: self.$gatewayToken) + .textInputAutocapitalization(.never) + .autocorrectionDisabled() + + SecureField("Gateway Password", text: self.$gatewayPassword) + + Button("Reset Onboarding", role: .destructive) { + self.showResetOnboardingAlert = true + } + + VStack(alignment: .leading, spacing: 6) { + Text("Debug") + .font(.footnote.weight(.semibold)) + .foregroundStyle(.secondary) + Text(self.gatewayDebugText()) + .font(.system(size: 12, weight: .regular, design: .monospaced)) + .foregroundStyle(.secondary) + .frame(maxWidth: .infinity, alignment: .leading) + .padding(10) + .background(.thinMaterial, in: RoundedRectangle(cornerRadius: 10, style: .continuous)) + } + } + } label: { + HStack(spacing: 10) { + Circle() + .fill(self.isGatewayConnected ? Color.green : Color.secondary.opacity(0.35)) + .frame(width: 10, height: 10) + Text("Gateway") + Spacer() + Text(self.gatewaySummaryText) + .font(.footnote) + .foregroundStyle(.secondary) + } + } + } + + Section("Device") { + DisclosureGroup("Features") { + self.featureToggle( + "Voice Wake", + isOn: self.$voiceWakeEnabled, + help: "Enables wake-word activation to start a hands-free session.") { newValue in + self.appModel.setVoiceWakeEnabled(newValue) + } + self.featureToggle( + "Talk Mode", + isOn: self.$talkEnabled, + help: "Enables voice conversation mode with your connected OpenClaw agent.") { newValue in + self.appModel.setTalkEnabled(newValue) + } + self.featureToggle( + "Background Listening", + isOn: self.$talkBackgroundEnabled, + help: "Keeps listening while the app is backgrounded. Uses more battery.") + + NavigationLink { + VoiceWakeWordsSettingsView() + } label: { + LabeledContent( + "Wake Words", + value: VoiceWakePreferences.displayString(for: self.voiceWake.triggerWords)) + } + + self.featureToggle( + "Allow Camera", + isOn: self.$cameraEnabled, + help: "Allows the gateway to request photos or short video clips while OpenClaw is foregrounded.") + + HStack(spacing: 8) { + Text("Location Access") + Spacer() + Button { + self.activeFeatureHelp = FeatureHelp( + title: "Location Access", + message: "Controls location permissions for OpenClaw. Off disables location tools, While Using enables foreground location, and Always enables background location.") + } label: { + Image(systemName: "info.circle") + .foregroundStyle(.secondary) + } + .buttonStyle(.plain) + .accessibilityLabel("Location Access info") + } + Picker("Location Access", selection: self.$locationEnabledModeRaw) { + Text("Off").tag(OpenClawLocationMode.off.rawValue) + Text("While Using").tag(OpenClawLocationMode.whileUsing.rawValue) + Text("Always").tag(OpenClawLocationMode.always.rawValue) + } + .labelsHidden() + .pickerStyle(.segmented) + + self.featureToggle( + "Prevent Sleep", + isOn: self.$preventSleep, + help: "Keeps the screen awake while OpenClaw is open.") + + DisclosureGroup("Advanced") { + self.featureToggle( + "Voice Directive Hint", + isOn: self.$talkVoiceDirectiveHintEnabled, + help: "Adds voice-switching instructions to Talk prompts. Disable to reduce prompt size.") + self.featureToggle( + "Show Talk Button", + isOn: self.$talkButtonEnabled, + help: "Shows the floating Talk button in the main interface.") + TextField("Default Share Instruction", text: self.$defaultShareInstruction, axis: .vertical) + .lineLimit(2 ... 6) + .textInputAutocapitalization(.sentences) + HStack(spacing: 8) { + Text("Default Share Instruction") + .font(.footnote) + .foregroundStyle(.secondary) + Spacer() + Button { + self.activeFeatureHelp = FeatureHelp( + title: "Default Share Instruction", + message: "Appends this instruction when sharing content into OpenClaw from iOS.") + } label: { + Image(systemName: "info.circle") + .foregroundStyle(.secondary) + } + .buttonStyle(.plain) + .accessibilityLabel("Default Share Instruction info") + } + + VStack(alignment: .leading, spacing: 8) { + Button { + Task { await self.appModel.runSharePipelineSelfTest() } + } label: { + Label("Run Share Self-Test", systemImage: "checkmark.seal") + } + Text(self.appModel.lastShareEventText) + .font(.footnote) + .foregroundStyle(.secondary) + } + } + } + + DisclosureGroup("Device Info") { + TextField("Name", text: self.$displayName) + Text(self.instanceId) + .font(.footnote) + .foregroundStyle(.secondary) + .lineLimit(1) + .truncationMode(.middle) + LabeledContent("Device", value: self.deviceFamily()) + LabeledContent("Platform", value: self.platformString()) + LabeledContent("OpenClaw", value: self.openClawVersionString()) + } + } + } .navigationTitle("Settings") .toolbar { - self.closeToolbar + ToolbarItem(placement: .topBarTrailing) { + Button { + self.dismiss() + } label: { + Image(systemName: "xmark") + } + .accessibilityLabel("Close") + } } .alert("Reset Onboarding?", isPresented: self.$showResetOnboardingAlert) { Button("Reset", role: .destructive) { @@ -94,42 +386,47 @@ struct SettingsTab: View { message: Text(help.message), dismissButton: .default(Text("OK"))) } - } - - @ToolbarContentBuilder - private var closeToolbar: some ToolbarContent { - ToolbarItem(placement: .topBarTrailing) { - Button { - self.dismiss() - } label: { - Image(systemName: "xmark") - } - .accessibilityLabel("Close") - } - } - - private func lifecycleObservedSettingsScreen(_ content: Content) -> some View { - content .onAppear { - self.handleOnAppear() - } - .onChange(of: self.scenePhase) { _, newValue in - self.handleScenePhaseChange(newValue) + self.lastLocationModeRaw = self.locationEnabledModeRaw + self.syncManualPortText() + let trimmedInstanceId = self.instanceId.trimmingCharacters(in: .whitespacesAndNewlines) + if !trimmedInstanceId.isEmpty { + self.gatewayToken = GatewaySettingsStore.loadGatewayToken(instanceId: trimmedInstanceId) ?? "" + self.gatewayPassword = GatewaySettingsStore.loadGatewayPassword(instanceId: trimmedInstanceId) ?? "" + } + self.defaultShareInstruction = ShareToAgentSettings.loadDefaultInstruction() + self.appModel.refreshLastShareEventFromRelay() + // Keep setup front-and-center when disconnected; keep things compact once connected. + self.gatewayExpanded = !self.isGatewayConnected + self.selectedAgentPickerId = self.appModel.selectedAgentId ?? "" } .onChange(of: self.selectedAgentPickerId) { _, newValue in - self.handleSelectedAgentPickerChange(newValue) + let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) + self.appModel.setSelectedAgentId(trimmed.isEmpty ? nil : trimmed) } .onChange(of: self.appModel.selectedAgentId ?? "") { _, newValue in - self.handleAppSelectedAgentIdChange(newValue) + if newValue != self.selectedAgentPickerId { + self.selectedAgentPickerId = newValue + } } .onChange(of: self.preferredGatewayStableID) { _, newValue in - self.handlePreferredGatewayStableIdChange(newValue) + let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return } + GatewaySettingsStore.savePreferredGatewayStableID(trimmed) } .onChange(of: self.gatewayToken) { _, newValue in - self.handleGatewayTokenChange(newValue) + guard !self.suppressCredentialPersist else { return } + let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) + let instanceId = self.instanceId.trimmingCharacters(in: .whitespacesAndNewlines) + guard !instanceId.isEmpty else { return } + GatewaySettingsStore.saveGatewayToken(trimmed, instanceId: instanceId) } .onChange(of: self.gatewayPassword) { _, newValue in - self.handleGatewayPasswordChange(newValue) + guard !self.suppressCredentialPersist else { return } + let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) + let instanceId = self.instanceId.trimmingCharacters(in: .whitespacesAndNewlines) + guard !instanceId.isEmpty else { return } + GatewaySettingsStore.saveGatewayPassword(trimmed, instanceId: instanceId) } .onChange(of: self.defaultShareInstruction) { _, newValue in ShareToAgentSettings.saveDefaultInstruction(newValue) @@ -138,430 +435,41 @@ struct SettingsTab: View { self.syncManualPortText() } .onChange(of: self.appModel.gatewayServerName) { _, newValue in - self.handleGatewayServerNameChange(newValue) + if newValue != nil { + self.setupCode = "" + self.setupStatusText = nil + return + } + if self.manualGatewayEnabled { + self.setupStatusText = self.appModel.gatewayStatusText + } } .onChange(of: self.appModel.gatewayStatusText) { _, newValue in - self.handleGatewayStatusTextChange(newValue) + guard self.manualGatewayEnabled || self.connectingGatewayID == "manual" else { return } + let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return } + self.setupStatusText = trimmed } - .onChange(of: self.locationEnabledModeRaw) { oldValue, newValue in - self.handleLocationModeChange(from: oldValue, to: newValue) - } - } - - private func handleOnAppear() { - self.syncManualPortText() - let trimmedInstanceId = self.instanceId.trimmingCharacters(in: .whitespacesAndNewlines) - if !trimmedInstanceId.isEmpty { - self.gatewayToken = GatewaySettingsStore.loadGatewayToken(instanceId: trimmedInstanceId) ?? "" - self.gatewayPassword = GatewaySettingsStore.loadGatewayPassword(instanceId: trimmedInstanceId) ?? "" - } - self.defaultShareInstruction = ShareToAgentSettings.loadDefaultInstruction() - self.appModel.refreshLastShareEventFromRelay() - // Keep setup front-and-center when disconnected; keep things compact once connected. - self.gatewayExpanded = !self.isGatewayConnected - self.selectedAgentPickerId = self.appModel.selectedAgentId ?? "" - self.refreshPermissionSnapshot() - } - - private func handleScenePhaseChange(_ newValue: ScenePhase) { - guard newValue == .active else { return } - self.refreshPermissionSnapshot() - self.gatewayController.refreshActiveGatewayRegistrationFromSettings() - } - - private func handleSelectedAgentPickerChange(_ newValue: String) { - let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) - self.appModel.setSelectedAgentId(trimmed.isEmpty ? nil : trimmed) - } - - private func handleAppSelectedAgentIdChange(_ newValue: String) { - if newValue != self.selectedAgentPickerId { - self.selectedAgentPickerId = newValue - } - } - - private func handlePreferredGatewayStableIdChange(_ newValue: String) { - let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) - guard !trimmed.isEmpty else { return } - GatewaySettingsStore.savePreferredGatewayStableID(trimmed) - } - - private func handleGatewayTokenChange(_ newValue: String) { - guard !self.suppressCredentialPersist else { return } - let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) - let instanceId = self.instanceId.trimmingCharacters(in: .whitespacesAndNewlines) - guard !instanceId.isEmpty else { return } - GatewaySettingsStore.saveGatewayToken(trimmed, instanceId: instanceId) - } - - private func handleGatewayPasswordChange(_ newValue: String) { - guard !self.suppressCredentialPersist else { return } - let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) - let instanceId = self.instanceId.trimmingCharacters(in: .whitespacesAndNewlines) - guard !instanceId.isEmpty else { return } - GatewaySettingsStore.saveGatewayPassword(trimmed, instanceId: instanceId) - } - - private func handleGatewayServerNameChange(_ newValue: String?) { - if newValue != nil { - self.setupCode = "" - self.setupStatusText = nil - return - } - if self.manualGatewayEnabled { - self.setupStatusText = self.appModel.gatewayStatusText - } - } - - private func handleGatewayStatusTextChange(_ newValue: String) { - guard self.manualGatewayEnabled || self.connectingGatewayID == "manual" else { return } - let trimmed = newValue.trimmingCharacters(in: .whitespacesAndNewlines) - guard !trimmed.isEmpty else { return } - self.setupStatusText = trimmed - } - - private func handleLocationModeChange(from oldValue: String, to newValue: String) { - guard let mode = OpenClawLocationMode(rawValue: newValue) else { return } - Task { - let granted = await self.appModel.requestLocationPermissions(mode: mode) - if !granted { - await MainActor.run { - self.locationEnabledModeRaw = oldValue - } - return - } - await MainActor.run { - self.gatewayController.refreshActiveGatewayRegistrationFromSettings() - } - } - } - - @ViewBuilder - private var settingsForm: some View { - Form { - self.gatewaySection - self.deviceSection - } - } - - @ViewBuilder - private var gatewaySection: some View { - Section { - DisclosureGroup(isExpanded: self.$gatewayExpanded) { - if !self.isGatewayConnected { - Text( - "1. Open Telegram and message your bot: /pair\n" - + "2. Copy the setup code it returns\n" - + "3. Paste here and tap Connect\n" - + "4. Back in Telegram, run /pair approve") - .font(.footnote) - .foregroundStyle(.secondary) - - if let warning = self.tailnetWarningText { - Text(warning) - .font(.footnote.weight(.semibold)) - .foregroundStyle(.orange) - } - - TextField("Paste setup code", text: self.$setupCode) - .textInputAutocapitalization(.never) - .autocorrectionDisabled() - - Button { - Task { await self.applySetupCodeAndConnect() } - } label: { - if self.connectingGatewayID == "manual" { - HStack(spacing: 8) { - ProgressView() - .progressViewStyle(.circular) - Text("Connecting…") - } - } else { - Text("Connect with setup code") + .onChange(of: self.locationEnabledModeRaw) { _, newValue in + let previous = self.lastLocationModeRaw + self.lastLocationModeRaw = newValue + guard let mode = OpenClawLocationMode(rawValue: newValue) else { return } + Task { + let granted = await self.appModel.requestLocationPermissions(mode: mode) + if !granted { + await MainActor.run { + self.locationEnabledModeRaw = previous + self.lastLocationModeRaw = previous } + return } - .disabled(self.connectingGatewayID != nil - || self.setupCode.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty) - - if let status = self.setupStatusLine { - Text(status) - .font(.footnote) - .foregroundStyle(.secondary) + await MainActor.run { + self.gatewayController.refreshActiveGatewayRegistrationFromSettings() } } - - if self.isGatewayConnected { - Picker("Bot", selection: self.$selectedAgentPickerId) { - Text("Default").tag("") - let defaultId = (self.appModel.gatewayDefaultAgentId ?? "") - .trimmingCharacters(in: .whitespacesAndNewlines) - ForEach(self.appModel.gatewayAgents.filter { $0.id != defaultId }, id: \.id) { agent in - let name = (agent.name ?? "").trimmingCharacters(in: .whitespacesAndNewlines) - Text(name.isEmpty ? agent.id : name).tag(agent.id) - } - } - Text("Controls which bot Chat and Talk speak to.") - .font(.footnote) - .foregroundStyle(.secondary) - } - - if self.appModel.gatewayServerName == nil { - LabeledContent("Discovery", value: self.gatewayController.discoveryStatusText) - } - LabeledContent("Status", value: self.appModel.gatewayStatusText) - Toggle("Auto-connect on launch", isOn: self.$gatewayAutoConnect) - - if let serverName = self.appModel.gatewayServerName { - LabeledContent("Server", value: serverName) - if let addr = self.appModel.gatewayRemoteAddress { - let parts = Self.parseHostPort(from: addr) - let urlString = Self.httpURLString(host: parts?.host, port: parts?.port, fallback: addr) - LabeledContent("Address") { - Text(urlString) - } - .contextMenu { - Button { - UIPasteboard.general.string = urlString - } label: { - Label("Copy URL", systemImage: "doc.on.doc") - } - - if let parts { - Button { - UIPasteboard.general.string = parts.host - } label: { - Label("Copy Host", systemImage: "doc.on.doc") - } - - Button { - UIPasteboard.general.string = "\(parts.port)" - } label: { - Label("Copy Port", systemImage: "doc.on.doc") - } - } - } - } - - Button("Disconnect", role: .destructive) { - self.appModel.disconnectGateway() - } - } else { - self.gatewayList(showing: .all) - } - - DisclosureGroup("Advanced") { - Toggle("Use Manual Gateway", isOn: self.$manualGatewayEnabled) - - TextField("Host", text: self.$manualGatewayHost) - .textInputAutocapitalization(.never) - .autocorrectionDisabled() - - TextField("Port (optional)", text: self.manualPortBinding) - .keyboardType(.numberPad) - - Toggle("Use TLS", isOn: self.$manualGatewayTLS) - - Button { - Task { await self.connectManual() } - } label: { - if self.connectingGatewayID == "manual" { - HStack(spacing: 8) { - ProgressView() - .progressViewStyle(.circular) - Text("Connecting…") - } - } else { - Text("Connect (Manual)") - } - } - .disabled(self.connectingGatewayID != nil || self.manualGatewayHost - .trimmingCharacters(in: .whitespacesAndNewlines) - .isEmpty || !self.manualPortIsValid) - - Text( - "Use this when mDNS/Bonjour discovery is blocked. " - + "Leave port empty for 443 on tailnet DNS (TLS) or 18789 otherwise.") - .font(.footnote) - .foregroundStyle(.secondary) - - Toggle("Discovery Debug Logs", isOn: self.$discoveryDebugLogsEnabled) - .onChange(of: self.discoveryDebugLogsEnabled) { _, newValue in - self.gatewayController.setDiscoveryDebugLoggingEnabled(newValue) - } - - NavigationLink("Discovery Logs") { - GatewayDiscoveryDebugLogView() - } - - Toggle("Debug Canvas Status", isOn: self.$canvasDebugStatusEnabled) - - TextField("Gateway Auth Token", text: self.$gatewayToken) - .textInputAutocapitalization(.never) - .autocorrectionDisabled() - - SecureField("Gateway Password", text: self.$gatewayPassword) - - Button("Reset Onboarding", role: .destructive) { - self.showResetOnboardingAlert = true - } - - VStack(alignment: .leading, spacing: 6) { - Text("Debug") - .font(.footnote.weight(.semibold)) - .foregroundStyle(.secondary) - Text(self.gatewayDebugText()) - .font(.system(size: 12, weight: .regular, design: .monospaced)) - .foregroundStyle(.secondary) - .frame(maxWidth: .infinity, alignment: .leading) - .padding(10) - .background(.thinMaterial, in: RoundedRectangle(cornerRadius: 10, style: .continuous)) - } - } - } label: { - HStack(spacing: 10) { - Circle() - .fill(self.isGatewayConnected ? Color.green : Color.secondary.opacity(0.35)) - .frame(width: 10, height: 10) - Text("Gateway") - Spacer() - Text(self.gatewaySummaryText) - .font(.footnote) - .foregroundStyle(.secondary) - } } } - } - - @ViewBuilder - private var deviceSection: some View { - Section("Device") { - DisclosureGroup("Features") { - self.featureToggle( - "Voice Wake", - isOn: self.$voiceWakeEnabled, - help: "Enables wake-word activation to start a hands-free session.") { newValue in - self.appModel.setVoiceWakeEnabled(newValue) - } - self.featureToggle( - "Talk Mode", - isOn: self.$talkEnabled, - help: "Enables voice conversation mode with your connected OpenClaw agent.") { newValue in - self.appModel.setTalkEnabled(newValue) - } - self.featureToggle( - "Background Listening", - isOn: self.$talkBackgroundEnabled, - help: "Keeps listening while the app is backgrounded. Uses more battery.") - - NavigationLink { - VoiceWakeWordsSettingsView() - } label: { - LabeledContent( - "Wake Words", - value: VoiceWakePreferences.displayString(for: self.voiceWake.triggerWords)) - } - - self.featureToggle( - "Allow Camera", - isOn: self.$cameraEnabled, - help: "Allows the gateway to request photos or short video clips while OpenClaw is foregrounded.") - - HStack(spacing: 8) { - Text("Location Access") - Spacer() - Button { - self.activeFeatureHelp = FeatureHelp( - title: "Location Access", - message: "Controls location permissions for OpenClaw. Off disables location tools, While Using enables foreground location, and Always enables background location.") - } label: { - Image(systemName: "info.circle") - .foregroundStyle(.secondary) - } - .buttonStyle(.plain) - .accessibilityLabel("Location Access info") - } - Picker("Location Access", selection: self.$locationEnabledModeRaw) { - Text("Off").tag(OpenClawLocationMode.off.rawValue) - Text("While Using").tag(OpenClawLocationMode.whileUsing.rawValue) - Text("Always").tag(OpenClawLocationMode.always.rawValue) - } - .labelsHidden() - .pickerStyle(.segmented) - - self.featureToggle( - "Prevent Sleep", - isOn: self.$preventSleep, - help: "Keeps the screen awake while OpenClaw is open.") - - DisclosureGroup("Advanced") { - self.featureToggle( - "Voice Directive Hint", - isOn: self.$talkVoiceDirectiveHintEnabled, - help: "Adds voice-switching instructions to Talk prompts. Disable to reduce prompt size.") - self.featureToggle( - "Show Talk Button", - isOn: self.$talkButtonEnabled, - help: "Shows the floating Talk button in the main interface.") - TextField("Default Share Instruction", text: self.$defaultShareInstruction, axis: .vertical) - .lineLimit(2 ... 6) - .textInputAutocapitalization(.sentences) - HStack(spacing: 8) { - Text("Default Share Instruction") - .font(.footnote) - .foregroundStyle(.secondary) - Spacer() - Button { - self.activeFeatureHelp = FeatureHelp( - title: "Default Share Instruction", - message: "Appends this instruction when sharing content into OpenClaw from iOS.") - } label: { - Image(systemName: "info.circle") - .foregroundStyle(.secondary) - } - .buttonStyle(.plain) - .accessibilityLabel("Default Share Instruction info") - } - - VStack(alignment: .leading, spacing: 8) { - Button { - Task { await self.appModel.runSharePipelineSelfTest() } - } label: { - Label("Run Share Self-Test", systemImage: "checkmark.seal") - } - Text(self.appModel.lastShareEventText) - .font(.footnote) - .foregroundStyle(.secondary) - } - } - } - - DisclosureGroup("Device Info") { - TextField("Name", text: self.$displayName) - Text(self.instanceId) - .font(.footnote) - .foregroundStyle(.secondary) - .lineLimit(1) - .truncationMode(.middle) - LabeledContent("Device", value: self.deviceFamily()) - LabeledContent("Platform", value: self.platformString()) - LabeledContent("OpenClaw", value: self.openClawVersionString()) - } - - PermissionsDisclosureSection( - snapshot: self.permissionSnapshot, - requestingPermission: self.requestingPermission, - onRequest: { kind in - Task { await self.requestPermission(kind) } - }, - onOpenSettings: { - self.appModel.openSystemSettings() - }, - onInfo: { kind in - self.activeFeatureHelp = FeatureHelp( - title: kind.title, - message: self.permissionHelp(for: kind)) - }) - } + .gatewayTrustPromptAlert() } @ViewBuilder @@ -701,33 +609,6 @@ struct SettingsTab: View { } } - private func permissionHelp(for kind: IOSPermissionKind) -> String { - switch kind { - case .photos: - "Required for photos.latest tool access." - case .contacts: - "Required for contacts.search and contacts.add." - case .calendar: - "Full access enables calendar.events and calendar.add." - case .reminders: - "Full access enables reminders.list and reminders.add." - case .motion: - "Required for motion.activity and motion.pedometer." - } - } - - private func refreshPermissionSnapshot() { - self.permissionSnapshot = self.appModel.permissionSnapshot() - } - - private func requestPermission(_ kind: IOSPermissionKind) async { - self.requestingPermission = kind - _ = await self.appModel.requestPermission(kind) - self.refreshPermissionSnapshot() - self.gatewayController.refreshActiveGatewayRegistrationFromSettings() - self.requestingPermission = nil - } - private func connect(_ gateway: GatewayDiscoveryModel.DiscoveredGateway) async { self.connectingGatewayID = gateway.id self.manualGatewayEnabled = false diff --git a/apps/ios/project.yml b/apps/ios/project.yml index 8268cbd29a6..9b43db118ef 100644 --- a/apps/ios/project.yml +++ b/apps/ios/project.yml @@ -108,13 +108,8 @@ targets: NSBonjourServices: - _openclaw-gw._tcp NSCameraUsageDescription: OpenClaw can capture photos or short video clips when requested via the gateway. - NSPhotoLibraryUsageDescription: OpenClaw can read your photo library when you ask it to share recent photos. - NSContactsUsageDescription: OpenClaw can read and create contacts when requested via the gateway. NSLocationWhenInUseUsageDescription: OpenClaw uses your location when you allow location sharing. NSLocationAlwaysAndWhenInUseUsageDescription: OpenClaw can share your location in the background when you enable Always. - NSCalendarsFullAccessUsageDescription: OpenClaw can read and add calendar events when requested via the gateway. - NSRemindersFullAccessUsageDescription: OpenClaw can read and add reminders when requested via the gateway. - NSMotionUsageDescription: OpenClaw uses Motion & Fitness data for activity and pedometer commands. NSMicrophoneUsageDescription: OpenClaw needs microphone access for voice wake. NSSpeechRecognitionUsageDescription: OpenClaw uses on-device speech recognition for voice wake. UISupportedInterfaceOrientations: diff --git a/docs/channels/discord.md b/docs/channels/discord.md index 464dc430db4..3fbf4a119bc 100644 --- a/docs/channels/discord.md +++ b/docs/channels/discord.md @@ -817,6 +817,47 @@ Example: } ``` +## Voice channels + +OpenClaw can join Discord voice channels for realtime, continuous conversations. This is separate from voice message attachments. + +Requirements: + +- Enable native commands (`commands.native` or `channels.discord.commands.native`). +- Configure `channels.discord.voice`. +- The bot needs Connect + Speak permissions in the target voice channel. + +Use the Discord-only native command `/vc join|leave|status` to control sessions. The command uses the account default agent and follows the same allowlist and group policy rules as other Discord commands. + +Auto-join example: + +```json5 +{ + channels: { + discord: { + voice: { + enabled: true, + autoJoin: [ + { + guildId: "123456789012345678", + channelId: "234567890123456789", + }, + ], + tts: { + provider: "openai", + openai: { voice: "alloy" }, + }, + }, + }, + }, +} +``` + +Notes: + +- `voice.tts` overrides `messages.tts` for voice playback only. +- Voice is enabled by default; set `channels.discord.voice.enabled=false` to disable it. + ## Voice messages Discord voice messages show a waveform preview and require OGG/Opus audio plus metadata. OpenClaw generates the waveform automatically, but it needs `ffmpeg` and `ffprobe` available on the gateway host to inspect and convert audio files. diff --git a/docs/gateway/configuration-reference.md b/docs/gateway/configuration-reference.md index 54de076ba9e..0ad82b64062 100644 --- a/docs/gateway/configuration-reference.md +++ b/docs/gateway/configuration-reference.md @@ -216,6 +216,19 @@ WhatsApp runs through the gateway's web channel (Baileys Web). It starts automat accentColor: "#5865F2", }, }, + voice: { + enabled: true, + autoJoin: [ + { + guildId: "123456789012345678", + channelId: "234567890123456789", + }, + ], + tts: { + provider: "openai", + openai: { voice: "alloy" }, + }, + }, retry: { attempts: 3, minDelayMs: 500, @@ -233,6 +246,7 @@ WhatsApp runs through the gateway's web channel (Baileys Web). It starts automat - Bot-authored messages are ignored by default. `allowBots: true` enables them (own messages still filtered). - `maxLinesPerMessage` (default 17) splits tall messages even when under 2000 chars. - `channels.discord.ui.components.accentColor` sets the accent color for Discord components v2 containers. +- `channels.discord.voice` enables Discord voice channel conversations and optional auto-join + TTS overrides. **Reaction notification modes:** `off` (none), `own` (bot's messages, default), `all` (all messages), `allowlist` (from `guilds..users` on all messages). diff --git a/docs/tools/slash-commands.md b/docs/tools/slash-commands.md index b8735d7e248..38f80b53b04 100644 --- a/docs/tools/slash-commands.md +++ b/docs/tools/slash-commands.md @@ -119,6 +119,7 @@ Notes: - `/allowlist add|remove` requires `commands.config=true` and honors channel `configWrites`. - `/usage` controls the per-response usage footer; `/usage cost` prints a local cost summary from OpenClaw session logs. - `/restart` is enabled by default; set `commands.restart: false` to disable it. +- Discord-only native command: `/vc join|leave|status` controls voice channels (requires `channels.discord.voice` and native commands; not available as text). - `/verbose` is meant for debugging and extra visibility; keep it **off** in normal use. - `/reasoning` (and `/verbose`) are risky in group settings: they may reveal internal reasoning or tool output you did not intend to expose. Prefer leaving them off, especially in group chats. - **Fast path:** command-only messages from allowlisted senders are handled immediately (bypass queue + model). diff --git a/package.json b/package.json index 790b3ffe6b9..311d18b5eda 100644 --- a/package.json +++ b/package.json @@ -130,8 +130,10 @@ "dependencies": { "@agentclientprotocol/sdk": "0.14.1", "@aws-sdk/client-bedrock": "^3.993.0", - "@buape/carbon": "0.14.0", + "@buape/carbon": "0.0.0-beta-20260216184201", "@clack/prompts": "^1.0.1", + "@discordjs/opus": "^0.9.0", + "@discordjs/voice": "^0.19.0", "@grammyjs/runner": "^2.0.3", "@grammyjs/transformer-throttler": "^1.2.1", "@homebridge/ciao": "^1.3.5", @@ -146,6 +148,7 @@ "@sinclair/typebox": "0.34.48", "@slack/bolt": "^4.6.0", "@slack/web-api": "^7.14.1", + "@snazzah/davey": "^0.1.9", "@whiskeysockets/baileys": "7.0.0-rc.9", "ajv": "^8.18.0", "chalk": "^5.6.2", @@ -166,6 +169,7 @@ "long": "^5.3.2", "markdown-it": "^14.1.1", "node-edge-tts": "^1.2.10", + "opusscript": "^0.0.8", "osc-progress": "^0.3.0", "pdfjs-dist": "^5.4.624", "playwright-core": "1.58.2", @@ -233,6 +237,7 @@ "@whiskeysockets/baileys", "authenticate-pam", "esbuild", + "koffi", "node-llama-cpp", "protobufjs", "sharp" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 810503c0d83..93aa440e278 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -27,11 +27,17 @@ importers: specifier: ^3.993.0 version: 3.993.0 '@buape/carbon': - specifier: 0.14.0 - version: 0.14.0(hono@4.11.10) + specifier: 0.0.0-beta-20260216184201 + version: 0.0.0-beta-20260216184201(@discordjs/opus@0.9.0)(hono@4.11.10)(opusscript@0.0.8) '@clack/prompts': specifier: ^1.0.1 version: 1.0.1 + '@discordjs/opus': + specifier: ^0.9.0 + version: 0.9.0 + '@discordjs/voice': + specifier: ^0.19.0 + version: 0.19.0(@discordjs/opus@0.9.0)(opusscript@0.0.8) '@grammyjs/runner': specifier: ^2.0.3 version: 2.0.3(grammy@1.40.0) @@ -77,6 +83,9 @@ importers: '@slack/web-api': specifier: ^7.14.1 version: 7.14.1 + '@snazzah/davey': + specifier: ^0.1.9 + version: 0.1.9 '@whiskeysockets/baileys': specifier: 7.0.0-rc.9 version: 7.0.0-rc.9(audio-decode@2.2.3)(sharp@0.34.5) @@ -140,6 +149,9 @@ importers: node-llama-cpp: specifier: 3.15.1 version: 3.15.1(typescript@5.9.3) + opusscript: + specifier: ^0.0.8 + version: 0.0.8 osc-progress: specifier: ^0.3.0 version: 0.3.0 @@ -909,8 +921,8 @@ packages: '@borewit/text-codec@0.2.1': resolution: {integrity: sha512-k7vvKPbf7J2fZ5klGRD9AeKfUvojuZIQ3BT5u7Jfv+puwXkUBUT5PVyMDfJZpy30CBDXGMgw7fguK/lpOMBvgw==} - '@buape/carbon@0.14.0': - resolution: {integrity: sha512-mavllPK2iVpRNRtC4C8JOUdJ1hdV0+LDelFW+pjpJaM31MBLMfIJ+f/LlYTIK5QrEcQsXOC+6lU2e0gmgjWhIQ==} + '@buape/carbon@0.0.0-beta-20260216184201': + resolution: {integrity: sha512-u5mgYcigfPVqT7D9gVTGd+3YSflTreQmrWog7ORbb0z5w9eT8ft4rJOdw9fGwr75zMu9kXpSBaAcY2eZoJFSdA==} '@cacheable/memory@2.0.7': resolution: {integrity: sha512-RbxnxAMf89Tp1dLhXMS7ceft/PGsDl1Ip7T20z5nZ+pwIAsQ1p2izPjVG69oCLv/jfQ7HDPHTWK0c9rcAWXN3A==} @@ -974,6 +986,14 @@ packages: '@d-fischer/typed-event-emitter@3.3.3': resolution: {integrity: sha512-OvSEOa8icfdWDqcRtjSEZtgJTFOFNgTjje7zaL0+nAtu2/kZtRCSK5wUMrI/aXtCH8o0Qz2vA8UqkhWUTARFQQ==} + '@discordjs/node-pre-gyp@0.4.5': + resolution: {integrity: sha512-YJOVVZ545x24mHzANfYoy0BJX5PDyeZlpiJjDkUBM/V/Ao7TFX9lcUvCN4nr0tbr5ubeaXxtEBILUrHtTphVeQ==} + hasBin: true + + '@discordjs/opus@0.9.0': + resolution: {integrity: sha512-NEE76A96FtQ5YuoAVlOlB3ryMPrkXbUCTQICHGKb8ShtjXyubGicjRMouHtP1RpuDdm16cDa+oI3aAMo1zQRUQ==} + engines: {node: '>=12.0.0'} + '@discordjs/voice@0.19.0': resolution: {integrity: sha512-UyX6rGEXzVyPzb1yvjHtPfTlnLvB5jX/stAMdiytHhfoydX+98hfympdOwsnTktzr+IRvphxTbdErgYDJkEsvw==} engines: {node: '>=22.12.0'} @@ -1193,7 +1213,7 @@ packages: resolution: {integrity: sha512-vHL6w3ecZsky+8P5MD+eFfaGTyCeOHUIFYMGpQGbrBTSmNNoxv0if69rEZ5giu36weC5saFuznL411gRX7bJDw==} engines: {node: '>=18.14.1'} peerDependencies: - hono: ^4 + hono: 4.11.10 '@huggingface/jinja@0.5.5': resolution: {integrity: sha512-xRlzazC+QZwr6z4ixEqYHo9fgwhTZ3xNSdljlKfUFGZSdlvt166DljRELFUfFytlYOYvo3vTisA/AFOuOAzFQQ==} @@ -2984,6 +3004,93 @@ packages: resolution: {integrity: sha512-4aUIteuyxtBUhVdiQqcDhKFitwfd9hqoSDYY2KRXiWtgoWJ9Bmise+KfEPDiVHWeJepvF8xJO9/9+WDIciMFFw==} engines: {node: '>=18.0.0'} + '@snazzah/davey-android-arm-eabi@0.1.9': + resolution: {integrity: sha512-Dq0WyeVGBw+uQbisV/6PeCQV2ndJozfhZqiNIfQxu6ehIdXB7iHILv+oY+AQN2n+qxiFmLh/MOX9RF+pIWdPbA==} + engines: {node: '>= 10'} + cpu: [arm] + os: [android] + + '@snazzah/davey-android-arm64@0.1.9': + resolution: {integrity: sha512-OE16OZjv7F/JrD7Mzw5eL2gY2vXRPC8S7ZrmkcMyz/sHHJsGHlT+L7X5s56Bec1YDTVmzAsH4UBuvVBoXuIWEQ==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [android] + + '@snazzah/davey-darwin-arm64@0.1.9': + resolution: {integrity: sha512-z7oORvAPExikFkH6tvHhbUdZd77MYZp9VqbCpKEiI+sisWFVXgHde7F7iH3G4Bz6gUYJfgvKhWXiDRc+0SC4dg==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [darwin] + + '@snazzah/davey-darwin-x64@0.1.9': + resolution: {integrity: sha512-f1LzGyRGlM414KpXml3OgWVSd7CgylcdYaFj/zDBb8bvWjxyvsI9iMeuPfe/cduloxRj8dELde/yCDZtFR6PdQ==} + engines: {node: '>= 10'} + cpu: [x64] + os: [darwin] + + '@snazzah/davey-freebsd-x64@0.1.9': + resolution: {integrity: sha512-k6p3JY2b8rD6j0V9Ql7kBUMR4eJdcpriNwiHltLzmtGuz/nK5RGQdkEP68gTLc+Uj3xs5Cy0jRKmv2xJQBR4sA==} + engines: {node: '>= 10'} + cpu: [x64] + os: [freebsd] + + '@snazzah/davey-linux-arm-gnueabihf@0.1.9': + resolution: {integrity: sha512-xDaAFUC/1+n/YayNwKsqKOBMuW0KI6F0SjgWU+krYTQTVmAKNjOM80IjemrVoqTpBOxBsT80zEtct2wj11CE3Q==} + engines: {node: '>= 10'} + cpu: [arm] + os: [linux] + + '@snazzah/davey-linux-arm64-gnu@0.1.9': + resolution: {integrity: sha512-t1VxFBzWExPNpsNY/9oStdAAuHqFvwZvIO2YPYyVNstxfi2KmAbHMweHUW7xb2ppXuhVQZ4VGmmeXiXcXqhPBw==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [linux] + + '@snazzah/davey-linux-arm64-musl@0.1.9': + resolution: {integrity: sha512-Xvlr+nBPzuFV4PXHufddlt08JsEyu0p8mX2DpqdPxdpysYIH4I8V86yJiS4tk04a6pLBDd8IxTbBwvXJKqd/LQ==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [linux] + + '@snazzah/davey-linux-x64-gnu@0.1.9': + resolution: {integrity: sha512-6Uunc/NxiEkg1reroAKZAGfOtjl1CGa7hfTTVClb2f+DiA8ZRQWBh+3lgkq/0IeL262B4F14X8QRv5Bsv128qw==} + engines: {node: '>= 10'} + cpu: [x64] + os: [linux] + + '@snazzah/davey-linux-x64-musl@0.1.9': + resolution: {integrity: sha512-fFQ/n3aWt1lXhxSdy+Ge3gi5bR3VETMVsWhH0gwBALUKrbo3ZzgSktm4lNrXE9i0ncMz/CDpZ5i0wt/N3XphEQ==} + engines: {node: '>= 10'} + cpu: [x64] + os: [linux] + + '@snazzah/davey-wasm32-wasi@0.1.9': + resolution: {integrity: sha512-xWvzej8YCVlUvzlpmqJMIf0XmLlHqulKZ2e7WNe2TxQmsK+o0zTZqiQYs2MwaEbrNXBhYlHDkdpuwoXkJdscNQ==} + engines: {node: '>=14.0.0'} + cpu: [wasm32] + + '@snazzah/davey-win32-arm64-msvc@0.1.9': + resolution: {integrity: sha512-sTqry/DfltX2OdW1CTLKa3dFYN5FloAEb2yhGsY1i5+Bms6OhwByXfALvyMHYVo61Th2+sD+9BJpQffHFKDA3w==} + engines: {node: '>= 10'} + cpu: [arm64] + os: [win32] + + '@snazzah/davey-win32-ia32-msvc@0.1.9': + resolution: {integrity: sha512-twD3LwlkGnSwphsCtpGb5ztpBIWEvGdc0iujoVkdzZ6nJiq5p8iaLjJMO4hBm9h3s28fc+1Qd7AMVnagiOasnA==} + engines: {node: '>= 10'} + cpu: [ia32] + os: [win32] + + '@snazzah/davey-win32-x64-msvc@0.1.9': + resolution: {integrity: sha512-eMnXbv4GoTngWYY538i/qHz2BS+RgSXFsvKltPzKqnqzPzhQZIY7TemEJn3D5yWGfW4qHve9u23rz93FQqnQMA==} + engines: {node: '>= 10'} + cpu: [x64] + os: [win32] + + '@snazzah/davey@0.1.9': + resolution: {integrity: sha512-vNZk5y+IsxjwzTAXikvzz5pqMLb35YytC64nVF2MAFVhjpXu9ITOKUriZ0JG/llwzCAi56jb5x0cXDRIyE2A2A==} + engines: {node: '>= 10'} + '@standard-schema/spec@1.1.0': resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==} @@ -3040,8 +3147,8 @@ packages: '@types/body-parser@1.19.6': resolution: {integrity: sha512-HLFeCYgz89uk22N5Qg3dvGvsv46B8GLvKKo1zKG4NybA8U2DiEO3w9lqGg29t/tfLRJpJ6iQxnVw4OnB7MoM9g==} - '@types/bun@1.3.6': - resolution: {integrity: sha512-uWCv6FO/8LcpREhenN1d1b6fcspAB+cefwD7uti8C8VffIv0Um08TKMn98FynpTiU38+y2dUO55T11NgDt8VAA==} + '@types/bun@1.3.9': + resolution: {integrity: sha512-KQ571yULOdWJiMH+RIWIOZ7B2RXQGpL1YQrBtLIV3FqDcCu6FsbFUBwhdKUlCKUpS3PJDsHlJ1QKlpxoVR+xtw==} '@types/caseless@0.12.5': resolution: {integrity: sha512-hWtVTC2q7hc7xZ/RLbxapMvDMgUnDvKvMOpKal4DrMyfGBUfB1oKaZlIRr6mJL+If3bAP6sV/QneGzF6tJjZDg==} @@ -3292,6 +3399,9 @@ packages: resolution: {tarball: https://codeload.github.com/whiskeysockets/libsignal-node/tar.gz/1c30d7d7e76a3b0aa120b04dc6a26f5a12dccf67} version: 2.0.1 + abbrev@1.1.1: + resolution: {integrity: sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==} + abort-controller@3.0.0: resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==} engines: {node: '>=6.5'} @@ -3314,6 +3424,10 @@ packages: engines: {node: '>=0.4.0'} hasBin: true + agent-base@6.0.2: + resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} + engines: {node: '>= 6.0.0'} + agent-base@7.1.4: resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==} engines: {node: '>= 14'} @@ -3366,6 +3480,11 @@ packages: aproba@2.1.0: resolution: {integrity: sha512-tLIEcj5GuR2RSTnxNKdkK0dJ/GrC7P38sUkiDmDuHfsHmbagTFAxDVIBltoklXEVIQ/f14IL8IMJ5pn9Hez1Ew==} + are-we-there-yet@2.0.0: + resolution: {integrity: sha512-Ci/qENmwHnsYo9xKIcUJN5LeDKdJ6R1Z1j9V/J5wyq8nh/mYPEpIKJbBZXtZjG04HiK7zV/p6Vs9952MrMeUIw==} + engines: {node: '>=10'} + deprecated: This package is no longer supported. + are-we-there-yet@3.0.1: resolution: {integrity: sha512-QZW4EDmGwlYur0Yyf/b2uGucHQMa8aFUP7eu9ddR73vvhFyt4V0Vl3QHPcTNJ8l6qYOBdxgXdnBXQrHilfRQBg==} engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} @@ -3499,8 +3618,8 @@ packages: buffer-from@1.1.2: resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==} - bun-types@1.3.6: - resolution: {integrity: sha512-OlFwHcnNV99r//9v5IIOgQ9Uk37gZqrNMCcqEaExdkVq3Avwqok1bJFmvGMCkCE0FqzdY8VMOZpfpR3lwI+CsQ==} + bun-types@1.3.9: + resolution: {integrity: sha512-+UBWWOakIP4Tswh0Bt0QD0alpTY8cb5hvgiYeWCMet9YukHbzuruIEeXC2D7nMJPB12kbh8C7XJykSexEqGKJg==} bytes@3.1.2: resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==} @@ -4000,6 +4119,9 @@ packages: resolution: {integrity: sha512-VWSRii4t0AFm6ixFFmLLx1t7wS1gh+ckoa84aOeapGum0h+EZd1EhEumSB+ZdDLnEPuucsVB9oB7cxJHap6Afg==} engines: {node: '>=14.14'} + fs.realpath@1.0.0: + resolution: {integrity: sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==} + fsevents@2.3.2: resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==} engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} @@ -4013,6 +4135,11 @@ packages: function-bind@1.1.2: resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==} + gauge@3.0.2: + resolution: {integrity: sha512-+5J6MS/5XksCuXq++uFRsnUd7Ovu1XenbeuIuNRJxYWjgQbPuFhT14lAvsWfqfAmnwluf1OwMjz39HjfLPci0Q==} + engines: {node: '>=10'} + deprecated: This package is no longer supported. + gauge@4.0.4: resolution: {integrity: sha512-f9m+BEN5jkg6a0fZjleidjN51VE1X+mPFQ2DJ0uv1V39oCLCbsGe6yjbBnp7eK7z/+GAon99a3nHuqbuuthyPg==} engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} @@ -4064,6 +4191,10 @@ packages: resolution: {integrity: sha512-BzXxZg24Ibra1pbQ/zE7Kys4Ua1ks7Bn6pKLkVPZ9FZe4JQS6/Q7ef3LG1H+k7lUf5l4T3PLSyYyYJVYUvfgTw==} engines: {node: 20 || >=22} + glob@7.2.3: + resolution: {integrity: sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==} + deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me + google-auth-library@10.5.0: resolution: {integrity: sha512-7ABviyMOlX5hIVD60YOfHw4/CxOfBhyduaYB+wbFWCWoni4N7SLcV46hrVRktuBbZjFC9ONyqamZITN7q3n32w==} engines: {node: '>=18'} @@ -4165,6 +4296,10 @@ packages: resolution: {integrity: sha512-G5akfn7eKbpDN+8nPS/cb57YeA1jLTVxjpCj7tmm3QKPdyDy7T+qSC40e9ptydSWvkwjSXw1VbkpyEm39ukeAg==} engines: {node: '>=0.10'} + https-proxy-agent@5.0.1: + resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} + engines: {node: '>= 6'} + https-proxy-agent@7.0.6: resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==} engines: {node: '>= 14'} @@ -4194,6 +4329,10 @@ packages: resolution: {integrity: sha512-B6Lc2s6yApwnD2/pMzFh/d5AVjdsDXjgkeJ766FmFuJELIGHNycKRj+l3A39yZPM4CchqNCB4RITEAYB1KUM6A==} engines: {node: '>=20.19.0'} + inflight@1.0.6: + resolution: {integrity: sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==} + deprecated: This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful. + inherits@2.0.4: resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} @@ -4563,6 +4702,10 @@ packages: magicast@0.5.2: resolution: {integrity: sha512-E3ZJh4J3S9KfwdjZhe2afj6R9lGIN5Pher1pF39UGrXRqq/VDaGVIGN13BjHd2u8B61hArAGOnso7nBOouW3TQ==} + make-dir@3.1.0: + resolution: {integrity: sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==} + engines: {node: '>=8'} + make-dir@4.0.0: resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==} engines: {node: '>=10'} @@ -4707,6 +4850,9 @@ packages: resolution: {integrity: sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==} engines: {node: '>= 0.4.0'} + node-addon-api@5.1.0: + resolution: {integrity: sha512-eh0GgfEkpnoWDq+VY8OyvYhFEzBk6jIYbRKdIlyTiAXIVJ8PyBaKb0rp7oDtoddbdoHWhq8wwr+XZ81F1rpNdA==} + node-addon-api@8.5.0: resolution: {integrity: sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A==} engines: {node: ^18 || ^20 || >= 21} @@ -4758,6 +4904,11 @@ packages: resolution: {integrity: sha512-M6Rm/bbG6De/gKGxOpeOobx/dnGuP0dz40adqx38boqHhlWssBJZgLCPBNtb9NkrmnKYiV04xELq+R6PFOnoLA==} engines: {node: '>=4.4.0'} + nopt@5.0.0: + resolution: {integrity: sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ==} + engines: {node: '>=6'} + hasBin: true + nostr-tools@2.23.1: resolution: {integrity: sha512-Q5SJ1omrseBFXtLwqDhufpFLA6vX3rS/IuBCc974qaYX6YKGwEPxa/ZsyxruUOr+b+5EpWL2hFmCB5AueYrfBw==} peerDependencies: @@ -4769,6 +4920,10 @@ packages: nostr-wasm@0.1.0: resolution: {integrity: sha512-78BTryCLcLYv96ONU8Ws3Q1JzjlAt+43pWQhIl86xZmWeegYCNLPml7yQ+gG3vR6V5h4XGj+TxO+SS5dsThQIA==} + npmlog@5.0.1: + resolution: {integrity: sha512-AqZtDUWOMKs1G/8lwylVjrdYgqA4d9nu8hc+0gzRxlDb1I10+FHBGMXs6aiQHFdCUUlqH99MUMuLfzWDNDtfxw==} + deprecated: This package is no longer supported. + npmlog@6.0.2: resolution: {integrity: sha512-/vBvz5Jfr9dT/aFWd0FIRf+T/Q2WBsLENygUaFUqstqsycmZAP/t5BvFJTK0viFmSUxiUKTUplWy5vt+rvKIxg==} engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} @@ -4852,6 +5007,9 @@ packages: opus-decoder@0.7.11: resolution: {integrity: sha512-+e+Jz3vGQLxRTBHs8YJQPRPc1Tr+/aC6coV/DlZylriA29BdHQAYXhvNRKtjftof17OFng0+P4wsFIqQu3a48A==} + opusscript@0.0.8: + resolution: {integrity: sha512-VSTi1aWFuCkRCVq+tx/BQ5q9fMnQ9pVZ3JU4UHKqTkf0ED3fKEPdr+gKAAl3IA2hj9rrP6iyq3hlcJq3HELtNQ==} + ora@8.2.0: resolution: {integrity: sha512-weP+BZ8MVNnlCm8c0Qdc1WSWq4Qn7I+9CJGm7Qali6g44e/PUzbjNqJX5NJ9ljlNMosfJvg1fKEGILklK9cwnw==} engines: {node: '>=18'} @@ -4951,6 +5109,10 @@ packages: partial-json@0.1.7: resolution: {integrity: sha512-Njv/59hHaokb/hRUjce3Hdv12wd60MtM9Z5Olmn+nehe0QDAsRtRbJPvJ0Z91TusF0SuZRIvnM+S4l6EIP8leA==} + path-is-absolute@1.0.1: + resolution: {integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==} + engines: {node: '>=0.10.0'} + path-key@3.1.1: resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==} engines: {node: '>=8'} @@ -5195,6 +5357,11 @@ packages: resolution: {integrity: sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==} engines: {node: '>= 4'} + rimraf@3.0.2: + resolution: {integrity: sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==} + deprecated: Rimraf versions prior to v4 are no longer supported + hasBin: true + rimraf@5.0.10: resolution: {integrity: sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==} hasBin: true @@ -5256,6 +5423,10 @@ packages: selderee@0.11.0: resolution: {integrity: sha512-5TF+l7p4+OsnP8BCCvSyZiSPc4x4//p5uPwK8TCnVPJYRmU2aYKMpOXvw8zM5a5JvuuCGN1jmsMwuU2W02ukfA==} + semver@6.3.1: + resolution: {integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==} + hasBin: true + semver@7.7.4: resolution: {integrity: sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==} engines: {node: '>=10'} @@ -6749,15 +6920,15 @@ snapshots: '@borewit/text-codec@0.2.1': {} - '@buape/carbon@0.14.0(hono@4.11.10)': + '@buape/carbon@0.0.0-beta-20260216184201(@discordjs/opus@0.9.0)(hono@4.11.10)(opusscript@0.0.8)': dependencies: '@types/node': 25.3.0 discord-api-types: 0.38.37 optionalDependencies: '@cloudflare/workers-types': 4.20260120.0 - '@discordjs/voice': 0.19.0 + '@discordjs/voice': 0.19.0(@discordjs/opus@0.9.0)(opusscript@0.0.8) '@hono/node-server': 1.19.9(hono@4.11.10) - '@types/bun': 1.3.6 + '@types/bun': 1.3.9 '@types/ws': 8.18.1 ws: 8.19.0 transitivePeerDependencies: @@ -6880,11 +7051,34 @@ snapshots: dependencies: tslib: 2.8.1 - '@discordjs/voice@0.19.0': + '@discordjs/node-pre-gyp@0.4.5': + dependencies: + detect-libc: 2.1.2 + https-proxy-agent: 5.0.1 + make-dir: 3.1.0 + node-fetch: 2.7.0 + nopt: 5.0.0 + npmlog: 5.0.1 + rimraf: 3.0.2 + semver: 7.7.4 + tar: 7.5.9 + transitivePeerDependencies: + - encoding + - supports-color + + '@discordjs/opus@0.9.0': + dependencies: + '@discordjs/node-pre-gyp': 0.4.5 + node-addon-api: 5.1.0 + transitivePeerDependencies: + - encoding + - supports-color + + '@discordjs/voice@0.19.0(@discordjs/opus@0.9.0)(opusscript@0.0.8)': dependencies: '@types/ws': 8.18.1 discord-api-types: 0.38.39 - prism-media: 1.3.5 + prism-media: 1.3.5(@discordjs/opus@0.9.0)(opusscript@0.0.8) tslib: 2.8.1 ws: 8.19.0 transitivePeerDependencies: @@ -6894,7 +7088,6 @@ snapshots: - node-opus - opusscript - utf-8-validate - optional: true '@emnapi/core@1.8.1': dependencies: @@ -8757,6 +8950,67 @@ snapshots: dependencies: tslib: 2.8.1 + '@snazzah/davey-android-arm-eabi@0.1.9': + optional: true + + '@snazzah/davey-android-arm64@0.1.9': + optional: true + + '@snazzah/davey-darwin-arm64@0.1.9': + optional: true + + '@snazzah/davey-darwin-x64@0.1.9': + optional: true + + '@snazzah/davey-freebsd-x64@0.1.9': + optional: true + + '@snazzah/davey-linux-arm-gnueabihf@0.1.9': + optional: true + + '@snazzah/davey-linux-arm64-gnu@0.1.9': + optional: true + + '@snazzah/davey-linux-arm64-musl@0.1.9': + optional: true + + '@snazzah/davey-linux-x64-gnu@0.1.9': + optional: true + + '@snazzah/davey-linux-x64-musl@0.1.9': + optional: true + + '@snazzah/davey-wasm32-wasi@0.1.9': + dependencies: + '@napi-rs/wasm-runtime': 1.1.1 + optional: true + + '@snazzah/davey-win32-arm64-msvc@0.1.9': + optional: true + + '@snazzah/davey-win32-ia32-msvc@0.1.9': + optional: true + + '@snazzah/davey-win32-x64-msvc@0.1.9': + optional: true + + '@snazzah/davey@0.1.9': + optionalDependencies: + '@snazzah/davey-android-arm-eabi': 0.1.9 + '@snazzah/davey-android-arm64': 0.1.9 + '@snazzah/davey-darwin-arm64': 0.1.9 + '@snazzah/davey-darwin-x64': 0.1.9 + '@snazzah/davey-freebsd-x64': 0.1.9 + '@snazzah/davey-linux-arm-gnueabihf': 0.1.9 + '@snazzah/davey-linux-arm64-gnu': 0.1.9 + '@snazzah/davey-linux-arm64-musl': 0.1.9 + '@snazzah/davey-linux-x64-gnu': 0.1.9 + '@snazzah/davey-linux-x64-musl': 0.1.9 + '@snazzah/davey-wasm32-wasi': 0.1.9 + '@snazzah/davey-win32-arm64-msvc': 0.1.9 + '@snazzah/davey-win32-ia32-msvc': 0.1.9 + '@snazzah/davey-win32-x64-msvc': 0.1.9 + '@standard-schema/spec@1.1.0': {} '@swc/helpers@0.5.18': @@ -8847,9 +9101,9 @@ snapshots: '@types/connect': 3.4.38 '@types/node': 25.3.0 - '@types/bun@1.3.6': + '@types/bun@1.3.9': dependencies: - bun-types: 1.3.6 + bun-types: 1.3.9 optional: true '@types/caseless@0.12.5': {} @@ -9186,6 +9440,8 @@ snapshots: curve25519-js: 0.0.4 protobufjs: 6.8.8 + abbrev@1.1.1: {} + abort-controller@3.0.0: dependencies: event-target-shim: 5.0.1 @@ -9206,6 +9462,12 @@ snapshots: acorn@8.15.0: {} + agent-base@6.0.2: + dependencies: + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + agent-base@7.1.4: {} ajv-formats@3.0.1(ajv@8.18.0): @@ -9251,6 +9513,11 @@ snapshots: aproba@2.1.0: {} + are-we-there-yet@2.0.0: + dependencies: + delegates: 1.0.0 + readable-stream: 3.6.2 + are-we-there-yet@3.0.1: dependencies: delegates: 1.0.0 @@ -9409,7 +9676,7 @@ snapshots: buffer-from@1.1.2: {} - bun-types@1.3.6: + bun-types@1.3.9: dependencies: '@types/node': 25.3.0 optional: true @@ -9941,6 +10208,8 @@ snapshots: jsonfile: 6.2.0 universalify: 2.0.1 + fs.realpath@1.0.0: {} + fsevents@2.3.2: optional: true @@ -9949,6 +10218,18 @@ snapshots: function-bind@1.1.2: {} + gauge@3.0.2: + dependencies: + aproba: 2.1.0 + color-support: 1.1.3 + console-control-strings: 1.1.0 + has-unicode: 2.0.1 + object-assign: 4.1.1 + signal-exit: 3.0.7 + string-width: 4.2.3 + strip-ansi: 6.0.1 + wide-align: 1.1.5 + gauge@4.0.4: dependencies: aproba: 2.1.0 @@ -10032,6 +10313,15 @@ snapshots: minipass: 7.1.2 path-scurry: 2.0.1 + glob@7.2.3: + dependencies: + fs.realpath: 1.0.0 + inflight: 1.0.6 + inherits: 2.0.4 + minimatch: 10.2.1 + once: 1.4.0 + path-is-absolute: 1.0.1 + google-auth-library@10.5.0: dependencies: base64-js: 1.5.1 @@ -10154,6 +10444,13 @@ snapshots: jsprim: 2.0.2 sshpk: 1.18.0 + https-proxy-agent@5.0.1: + dependencies: + agent-base: 6.0.2 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + https-proxy-agent@7.0.6: dependencies: agent-base: 7.1.4 @@ -10184,6 +10481,11 @@ snapshots: import-without-cache@0.2.5: {} + inflight@1.0.6: + dependencies: + once: 1.4.0 + wrappy: 1.0.2 + inherits@2.0.4: {} ini@1.3.8: {} @@ -10546,6 +10848,10 @@ snapshots: '@babel/types': 7.29.0 source-map-js: 1.2.1 + make-dir@3.1.0: + dependencies: + semver: 6.3.1 + make-dir@4.0.0: dependencies: semver: 7.7.4 @@ -10667,6 +10973,8 @@ snapshots: netmask@2.0.2: {} + node-addon-api@5.1.0: {} + node-addon-api@8.5.0: {} node-api-headers@1.8.0: {} @@ -10750,6 +11058,10 @@ snapshots: node-wav@0.0.2: optional: true + nopt@5.0.0: + dependencies: + abbrev: 1.1.1 + nostr-tools@2.23.1(typescript@5.9.3): dependencies: '@noble/ciphers': 2.1.1 @@ -10764,6 +11076,13 @@ snapshots: nostr-wasm@0.1.0: {} + npmlog@5.0.1: + dependencies: + are-we-there-yet: 2.0.0 + console-control-strings: 1.1.0 + gauge: 3.0.2 + set-blocking: 2.0.0 + npmlog@6.0.2: dependencies: are-we-there-yet: 3.0.1 @@ -10844,6 +11163,8 @@ snapshots: '@wasm-audio-decoders/common': 9.0.7 optional: true + opusscript@0.0.8: {} + ora@8.2.0: dependencies: chalk: 5.6.2 @@ -10986,6 +11307,8 @@ snapshots: partial-json@0.1.7: {} + path-is-absolute@1.0.1: {} + path-key@3.1.1: {} path-scurry@1.11.1: @@ -11071,8 +11394,10 @@ snapshots: dependencies: parse-ms: 4.0.0 - prism-media@1.3.5: - optional: true + prism-media@1.3.5(@discordjs/opus@0.9.0)(opusscript@0.0.8): + optionalDependencies: + '@discordjs/opus': 0.9.0 + opusscript: 0.0.8 process-nextick-args@2.0.1: {} @@ -11253,6 +11578,10 @@ snapshots: retry@0.13.1: {} + rimraf@3.0.2: + dependencies: + glob: 7.2.3 + rimraf@5.0.10: dependencies: glob: 10.5.0 @@ -11375,6 +11704,8 @@ snapshots: dependencies: parseley: 0.12.1 + semver@6.3.1: {} + semver@7.7.4: {} send@0.19.2: diff --git a/src/agents/openclaw-tools.ts b/src/agents/openclaw-tools.ts index 0169ca47172..41f059fb6a7 100644 --- a/src/agents/openclaw-tools.ts +++ b/src/agents/openclaw-tools.ts @@ -107,10 +107,7 @@ export function createOpenClawTools(options?: { sandboxBridgeUrl: options?.sandboxBrowserBridgeUrl, allowHostControl: options?.allowHostBrowserControl, }), - createCanvasTool({ - config: options?.config, - agentSessionKey: options?.agentSessionKey, - }), + createCanvasTool({ config: options?.config }), createNodesTool({ agentSessionKey: options?.agentSessionKey, config: options?.config, diff --git a/src/agents/tools/canvas-tool.ts b/src/agents/tools/canvas-tool.ts index ee452f83a87..7db4aad515a 100644 --- a/src/agents/tools/canvas-tool.ts +++ b/src/agents/tools/canvas-tool.ts @@ -1,15 +1,14 @@ import crypto from "node:crypto"; +import fs from "node:fs/promises"; import path from "node:path"; -import { fileURLToPath } from "node:url"; import { Type } from "@sinclair/typebox"; import { writeBase64ToFile } from "../../cli/nodes-camera.js"; import { canvasSnapshotTempPath, parseCanvasSnapshotPayload } from "../../cli/nodes-canvas.js"; import type { OpenClawConfig } from "../../config/config.js"; -import { openFileWithinRoot, SafeOpenError } from "../../infra/fs-safe.js"; -import { getAgentScopedMediaLocalRoots } from "../../media/local-roots.js"; +import { logVerbose, shouldLogVerbose } from "../../globals.js"; +import { isInboundPathAllowed } from "../../media/inbound-path-policy.js"; +import { getDefaultMediaLocalRoots } from "../../media/local-roots.js"; import { imageMimeFromFormat } from "../../media/mime.js"; -import { resolveUserPath } from "../../utils.js"; -import { resolveSessionAgentId } from "../agent-scope.js"; import { resolveImageSanitizationLimits } from "../image-sanitization.js"; import { optionalStringEnum, stringEnum } from "../schema/typebox.js"; import { type AnyAgentTool, imageResult, jsonResult, readStringParam } from "./common.js"; @@ -28,77 +27,27 @@ const CANVAS_ACTIONS = [ const CANVAS_SNAPSHOT_FORMATS = ["png", "jpg", "jpeg"] as const; -const PATH_SCHEME_RE = /^[a-z][a-z0-9+.-]*:/i; -const WINDOWS_DRIVE_RE = /^[a-zA-Z]:[\\/]/; - -function resolveJsonlLocalPath(rawPath: string): string { - const trimmed = rawPath.trim(); +async function readJsonlFromPath(jsonlPath: string): Promise { + const trimmed = jsonlPath.trim(); if (!trimmed) { - return trimmed; + return ""; } - if (trimmed.startsWith("file://")) { - try { - return fileURLToPath(trimmed); - } catch (err) { - throw new Error(`Invalid jsonlPath file URL: ${rawPath}`, { cause: err }); + const resolved = path.resolve(trimmed); + const roots = getDefaultMediaLocalRoots(); + if (!isInboundPathAllowed({ filePath: resolved, roots })) { + if (shouldLogVerbose()) { + logVerbose(`Blocked canvas jsonlPath outside allowed roots: ${resolved}`); } + throw new Error("jsonlPath outside allowed roots"); } - if (PATH_SCHEME_RE.test(trimmed) && !WINDOWS_DRIVE_RE.test(trimmed)) { - throw new Error("jsonlPath must be a local file path."); - } - if (trimmed.startsWith("~")) { - return resolveUserPath(trimmed); - } - return path.resolve(trimmed); -} - -function resolveLocalRoot(filePath: string, roots: readonly string[]): string | null { - const resolvedPath = path.resolve(filePath); - for (const root of roots) { - const resolvedRoot = path.resolve(root); - const rel = path.relative(resolvedRoot, resolvedPath); - if (!rel || (!rel.startsWith("..") && !path.isAbsolute(rel))) { - return resolvedRoot; + const canonical = await fs.realpath(resolved).catch(() => resolved); + if (!isInboundPathAllowed({ filePath: canonical, roots })) { + if (shouldLogVerbose()) { + logVerbose(`Blocked canvas jsonlPath outside allowed roots: ${canonical}`); } + throw new Error("jsonlPath outside allowed roots"); } - return null; -} - -async function readJsonlFromPath(params: { - jsonlPath: string; - localRoots: readonly string[]; -}): Promise { - const resolvedPath = resolveJsonlLocalPath(params.jsonlPath); - const resolvedRoot = resolveLocalRoot(resolvedPath, params.localRoots); - if (!resolvedRoot) { - throw new Error("jsonlPath must be under an allowed directory."); - } - const relativePath = path.relative(resolvedRoot, resolvedPath); - try { - const opened = await openFileWithinRoot({ - rootDir: resolvedRoot, - relativePath, - }); - try { - const buffer = await opened.handle.readFile(); - return buffer.toString("utf8"); - } finally { - await opened.handle.close().catch(() => {}); - } - } catch (err) { - if (err instanceof SafeOpenError) { - if (err.code === "not-found") { - throw new Error("jsonlPath file not found.", { cause: err }); - } - if (err.code === "not-file") { - throw new Error("jsonlPath must be a regular file.", { cause: err }); - } - throw new Error("jsonlPath must be a regular file within an allowed directory.", { - cause: err, - }); - } - throw err; - } + return await fs.readFile(canonical, "utf8"); } // Flattened schema: runtime validates per-action requirements. @@ -128,15 +77,8 @@ const CanvasToolSchema = Type.Object({ jsonlPath: Type.Optional(Type.String()), }); -export function createCanvasTool(options?: { - config?: OpenClawConfig; - agentSessionKey?: string; -}): AnyAgentTool { +export function createCanvasTool(options?: { config?: OpenClawConfig }): AnyAgentTool { const imageSanitization = resolveImageSanitizationLimits(options?.config); - const agentId = options?.agentSessionKey - ? resolveSessionAgentId({ sessionKey: options.agentSessionKey, config: options?.config }) - : undefined; - const localRoots = getAgentScopedMediaLocalRoots(options?.config ?? {}, agentId); return { label: "Canvas", name: "canvas", @@ -254,10 +196,7 @@ export function createCanvasTool(options?: { typeof params.jsonl === "string" && params.jsonl.trim() ? params.jsonl : typeof params.jsonlPath === "string" && params.jsonlPath.trim() - ? await readJsonlFromPath({ - jsonlPath: params.jsonlPath, - localRoots, - }) + ? await readJsonlFromPath(params.jsonlPath) : ""; if (!jsonl.trim()) { throw new Error("jsonl or jsonlPath required"); diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 79f8bc05e3c..b47390b8eae 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -434,6 +434,12 @@ export const FIELD_HELP: Record = { "channels.discord.maxLinesPerMessage": "Soft max line count per Discord message (default: 17).", "channels.discord.ui.components.accentColor": "Accent color for Discord component containers (hex). Set per account via channels.discord.accounts..ui.components.accentColor.", + "channels.discord.voice.enabled": + "Enable Discord voice channel conversations (default: true). Omit channels.discord.voice to keep voice support disabled for the account.", + "channels.discord.voice.autoJoin": + "Voice channels to auto-join on startup (list of guildId/channelId entries).", + "channels.discord.voice.tts": + "Optional TTS overrides for Discord voice playback (merged with messages.tts).", "channels.discord.intents.presence": "Enable the Guild Presences privileged intent. Must also be enabled in the Discord Developer Portal. Allows tracking user activities (e.g. Spotify). Default: false.", "channels.discord.intents.guildMembers": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 8822d361839..640656d48a8 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -291,6 +291,8 @@ export const FIELD_LABELS: Record = { "channels.discord.ui.components.accentColor": "Discord Component Accent Color", "channels.discord.intents.presence": "Discord Presence Intent", "channels.discord.intents.guildMembers": "Discord Guild Members Intent", + "channels.discord.voice.enabled": "Discord Voice Enabled", + "channels.discord.voice.autoJoin": "Discord Voice Auto-Join", "channels.discord.pluralkit.enabled": "Discord PluralKit Enabled", "channels.discord.pluralkit.token": "Discord PluralKit Token", "channels.discord.activity": "Discord Presence Activity", diff --git a/src/config/types.discord.ts b/src/config/types.discord.ts index 1bce558c16c..95ce774da16 100644 --- a/src/config/types.discord.ts +++ b/src/config/types.discord.ts @@ -11,6 +11,7 @@ import type { import type { ChannelHeartbeatVisibilityConfig } from "./types.channels.js"; import type { DmConfig, ProviderCommandsConfig } from "./types.messages.js"; import type { GroupToolPolicyBySenderConfig, GroupToolPolicyConfig } from "./types.tools.js"; +import type { TtsConfig } from "./types.tts.js"; export type DiscordStreamMode = "partial" | "block" | "off"; @@ -94,6 +95,22 @@ export type DiscordIntentsConfig = { guildMembers?: boolean; }; +export type DiscordVoiceAutoJoinConfig = { + /** Guild ID that owns the voice channel. */ + guildId: string; + /** Voice channel ID to join. */ + channelId: string; +}; + +export type DiscordVoiceConfig = { + /** Enable Discord voice channel conversations (default: true). */ + enabled?: boolean; + /** Voice channels to auto-join on startup. */ + autoJoin?: DiscordVoiceAutoJoinConfig[]; + /** Optional TTS overrides for Discord voice output. */ + tts?: TtsConfig; +}; + export type DiscordExecApprovalConfig = { /** Enable exec approval forwarding to Discord DMs. Default: false. */ enabled?: boolean; @@ -211,6 +228,8 @@ export type DiscordAccountConfig = { ui?: DiscordUiConfig; /** Privileged Gateway Intents (must also be enabled in Discord Developer Portal). */ intents?: DiscordIntentsConfig; + /** Voice channel conversation settings. */ + voice?: DiscordVoiceConfig; /** PluralKit identity resolution for proxied messages. */ pluralkit?: DiscordPluralKitConfig; /** Outbound response prefix override for this channel/account. */ diff --git a/src/config/zod-schema.providers-core.ts b/src/config/zod-schema.providers-core.ts index 8f9be6c4056..0ac13ae8d5b 100644 --- a/src/config/zod-schema.providers-core.ts +++ b/src/config/zod-schema.providers-core.ts @@ -21,6 +21,7 @@ import { ProviderCommandsSchema, ReplyToModeSchema, RetryConfigSchema, + TtsConfigSchema, requireOpenAllowFrom, } from "./zod-schema.core.js"; import { sensitive } from "./zod-schema.sensitive.js"; @@ -271,6 +272,22 @@ const DiscordUiSchema = z .strict() .optional(); +const DiscordVoiceAutoJoinSchema = z + .object({ + guildId: z.string().min(1), + channelId: z.string().min(1), + }) + .strict(); + +const DiscordVoiceSchema = z + .object({ + enabled: z.boolean().optional(), + autoJoin: z.array(DiscordVoiceAutoJoinSchema).optional(), + tts: TtsConfigSchema.optional(), + }) + .strict() + .optional(); + export const DiscordAccountSchema = z .object({ name: z.string().optional(), @@ -347,6 +364,7 @@ export const DiscordAccountSchema = z }) .strict() .optional(), + voice: DiscordVoiceSchema, pluralkit: z .object({ enabled: z.boolean().optional(), diff --git a/src/discord/monitor/gateway-plugin.ts b/src/discord/monitor/gateway-plugin.ts index be754890eab..74e1aad8630 100644 --- a/src/discord/monitor/gateway-plugin.ts +++ b/src/discord/monitor/gateway-plugin.ts @@ -14,7 +14,8 @@ export function resolveDiscordGatewayIntents( GatewayIntents.MessageContent | GatewayIntents.DirectMessages | GatewayIntents.GuildMessageReactions | - GatewayIntents.DirectMessageReactions; + GatewayIntents.DirectMessageReactions | + GatewayIntents.GuildVoiceStates; if (intentsConfig?.presence) { intents |= GatewayIntents.GuildPresences; } diff --git a/src/discord/monitor/provider.ts b/src/discord/monitor/provider.ts index 7dbe4195ff7..e0cf1d95617 100644 --- a/src/discord/monitor/provider.ts +++ b/src/discord/monitor/provider.ts @@ -6,6 +6,7 @@ import { type Modal, } from "@buape/carbon"; import { GatewayCloseCodes, type GatewayPlugin } from "@buape/carbon/gateway"; +import { VoicePlugin } from "@buape/carbon/voice"; import { Routes } from "discord-api-types/v10"; import { resolveTextChunkLimit } from "../../auto-reply/chunk.js"; import { listNativeCommandSpecsForConfig } from "../../auto-reply/commands-registry.js"; @@ -38,6 +39,8 @@ import { fetchDiscordApplicationId } from "../probe.js"; import { resolveDiscordChannelAllowlist } from "../resolve-channels.js"; import { resolveDiscordUserAllowlist } from "../resolve-users.js"; import { normalizeDiscordToken } from "../token.js"; +import { createDiscordVoiceCommand } from "../voice/command.js"; +import { DiscordVoiceManager, DiscordVoiceReadyListener } from "../voice/manager.js"; import { createAgentComponentButton, createAgentSelectMenu, @@ -241,6 +244,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { const useAccessGroups = cfg.commands?.useAccessGroups !== false; const sessionPrefix = "discord:slash"; const ephemeralDefault = true; + const voiceEnabled = discordCfg.voice?.enabled !== false; if (token) { if (guildEntries && Object.keys(guildEntries).length > 0) { @@ -428,6 +432,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { ), ); } + const voiceManagerRef: { current: DiscordVoiceManager | null } = { current: null }; const commands = commandSpecs.map((spec) => createDiscordNativeCommand({ command: spec, @@ -438,6 +443,19 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { ephemeralDefault, }), ); + if (nativeEnabled && voiceEnabled) { + commands.push( + createDiscordVoiceCommand({ + cfg, + discordConfig: discordCfg, + accountId: account.accountId, + groupPolicy, + useAccessGroups, + getManager: () => voiceManagerRef.current, + ephemeralDefault, + }), + ); + } // Initialize exec approvals handler if enabled const execApprovalsConfig = discordCfg.execApprovals ?? {}; @@ -506,6 +524,10 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { } } + const clientPlugins = [createDiscordGatewayPlugin({ discordConfig: discordCfg, runtime })]; + if (voiceEnabled) { + clientPlugins.push(new VoicePlugin()); + } const client = new Client( { baseUrl: "http://localhost", @@ -521,7 +543,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { components, modals, }, - [createDiscordGatewayPlugin({ discordConfig: discordCfg, runtime })], + clientPlugins, ); await deployDiscordCommands({ client, runtime, enabled: nativeEnabled }); @@ -529,6 +551,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { const logger = createSubsystemLogger("discord/monitor"); const guildHistories = new Map(); let botUserId: string | undefined; + let voiceManager: DiscordVoiceManager | null = null; if (nativeDisabledExplicit) { await clearDiscordNativeCommands({ @@ -545,6 +568,19 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { runtime.error?.(danger(`discord: failed to fetch bot identity: ${String(err)}`)); } + if (voiceEnabled) { + voiceManager = new DiscordVoiceManager({ + client, + cfg, + discordConfig: discordCfg, + accountId: account.accountId, + runtime, + botUserId, + }); + voiceManagerRef.current = voiceManager; + registerDiscordListener(client.listeners, new DiscordVoiceReadyListener(voiceManager)); + } + const messageHandler = createDiscordMessageHandler({ cfg, discordConfig: discordCfg, @@ -697,6 +733,10 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { } gatewayEmitter?.removeListener("debug", onGatewayDebug); abortSignal?.removeEventListener("abort", onAbort); + if (voiceManager) { + await voiceManager.destroy(); + voiceManagerRef.current = null; + } if (execApprovalsHandler) { await execApprovalsHandler.stop(); } diff --git a/src/discord/voice/command.ts b/src/discord/voice/command.ts new file mode 100644 index 00000000000..d62151ccb0f --- /dev/null +++ b/src/discord/voice/command.ts @@ -0,0 +1,339 @@ +import { + ChannelType as CarbonChannelType, + Command, + CommandWithSubcommands, + type CommandInteraction, +} from "@buape/carbon"; +import { + ApplicationCommandOptionType, + ChannelType as DiscordChannelType, +} from "discord-api-types/v10"; +import { resolveCommandAuthorizedFromAuthorizers } from "../../channels/command-gating.js"; +import type { OpenClawConfig } from "../../config/config.js"; +import type { DiscordAccountConfig } from "../../config/types.js"; +import { + allowListMatches, + isDiscordGroupAllowedByPolicy, + normalizeDiscordAllowList, + normalizeDiscordSlug, + resolveDiscordChannelConfigWithFallback, + resolveDiscordGuildEntry, + resolveDiscordMemberAccessState, +} from "../monitor/allow-list.js"; +import { resolveDiscordChannelInfo } from "../monitor/message-utils.js"; +import { resolveDiscordSenderIdentity } from "../monitor/sender-identity.js"; +import { resolveDiscordThreadParentInfo } from "../monitor/threading.js"; +import type { DiscordVoiceManager } from "./manager.js"; + +const VOICE_CHANNEL_TYPES: DiscordChannelType[] = [ + DiscordChannelType.GuildVoice, + DiscordChannelType.GuildStageVoice, +]; + +type VoiceCommandContext = { + cfg: OpenClawConfig; + discordConfig: DiscordAccountConfig; + accountId: string; + groupPolicy: "open" | "disabled" | "allowlist"; + useAccessGroups: boolean; + getManager: () => DiscordVoiceManager | null; + ephemeralDefault: boolean; +}; + +type VoiceCommandChannelOverride = { + id: string; + name?: string; + parentId?: string; +}; + +async function authorizeVoiceCommand( + interaction: CommandInteraction, + params: VoiceCommandContext, + options?: { channelOverride?: VoiceCommandChannelOverride }, +): Promise<{ ok: boolean; message?: string; guildId?: string }> { + const channelOverride = options?.channelOverride; + const channel = channelOverride ? undefined : interaction.channel; + if (!interaction.guild) { + return { ok: false, message: "Voice commands are only available in guilds." }; + } + const user = interaction.user; + if (!user) { + return { ok: false, message: "Unable to resolve command user." }; + } + + const channelId = channelOverride?.id ?? channel?.id ?? ""; + const rawChannelName = + channelOverride?.name ?? (channel && "name" in channel ? (channel.name as string) : undefined); + const rawParentId = + channelOverride?.parentId ?? + ("parentId" in (channel ?? {}) + ? ((channel as { parentId?: string }).parentId ?? undefined) + : undefined); + const channelInfo = channelId + ? await resolveDiscordChannelInfo(interaction.client, channelId) + : null; + const channelName = rawChannelName ?? channelInfo?.name; + const channelSlug = channelName ? normalizeDiscordSlug(channelName) : ""; + const isThreadChannel = + channelInfo?.type === CarbonChannelType.PublicThread || + channelInfo?.type === CarbonChannelType.PrivateThread || + channelInfo?.type === CarbonChannelType.AnnouncementThread; + let parentId: string | undefined; + let parentName: string | undefined; + let parentSlug: string | undefined; + if (isThreadChannel && channelId) { + const parentInfo = await resolveDiscordThreadParentInfo({ + client: interaction.client, + threadChannel: { + id: channelId, + name: channelName, + parentId: rawParentId ?? channelInfo?.parentId, + parent: undefined, + }, + channelInfo, + }); + parentId = parentInfo.id; + parentName = parentInfo.name; + parentSlug = parentName ? normalizeDiscordSlug(parentName) : undefined; + } + + const guildInfo = resolveDiscordGuildEntry({ + guild: interaction.guild ?? undefined, + guildEntries: params.discordConfig.guilds, + }); + + const channelConfig = channelId + ? resolveDiscordChannelConfigWithFallback({ + guildInfo, + channelId, + channelName, + channelSlug, + parentId, + parentName, + parentSlug, + scope: isThreadChannel ? "thread" : "channel", + }) + : null; + + if (channelConfig?.enabled === false) { + return { ok: false, message: "This channel is disabled." }; + } + + const channelAllowlistConfigured = + Boolean(guildInfo?.channels) && Object.keys(guildInfo?.channels ?? {}).length > 0; + const channelAllowed = channelConfig?.allowed !== false; + if ( + !isDiscordGroupAllowedByPolicy({ + groupPolicy: params.groupPolicy, + guildAllowlisted: Boolean(guildInfo), + channelAllowlistConfigured, + channelAllowed, + }) || + channelConfig?.allowed === false + ) { + const channelId = channelOverride?.id ?? channel?.id; + const channelLabel = channelId ? `<#${channelId}>` : "This channel"; + return { + ok: false, + message: `${channelLabel} is not allowlisted for voice commands.`, + }; + } + + const memberRoleIds = Array.isArray(interaction.rawData.member?.roles) + ? interaction.rawData.member.roles.map((roleId: string) => String(roleId)) + : []; + const sender = resolveDiscordSenderIdentity({ author: user, member: interaction.rawData.member }); + + const { hasAccessRestrictions, memberAllowed } = resolveDiscordMemberAccessState({ + channelConfig, + guildInfo, + memberRoleIds, + sender, + }); + + const ownerAllowList = normalizeDiscordAllowList( + params.discordConfig.allowFrom ?? params.discordConfig.dm?.allowFrom ?? [], + ["discord:", "user:", "pk:"], + ); + const ownerOk = ownerAllowList + ? allowListMatches(ownerAllowList, { + id: sender.id, + name: sender.name, + tag: sender.tag, + }) + : false; + + const authorizers = params.useAccessGroups + ? [ + { configured: ownerAllowList != null, allowed: ownerOk }, + { configured: hasAccessRestrictions, allowed: memberAllowed }, + ] + : [{ configured: hasAccessRestrictions, allowed: memberAllowed }]; + + const commandAuthorized = resolveCommandAuthorizedFromAuthorizers({ + useAccessGroups: params.useAccessGroups, + authorizers, + modeWhenAccessGroupsOff: "configured", + }); + + if (!commandAuthorized) { + return { ok: false, message: "You are not authorized to use this command." }; + } + + return { ok: true, guildId: interaction.guild.id }; +} + +export function createDiscordVoiceCommand(params: VoiceCommandContext): CommandWithSubcommands { + const resolveSessionChannelId = (manager: DiscordVoiceManager, guildId: string) => + manager.status().find((entry) => entry.guildId === guildId)?.channelId; + + class JoinCommand extends Command { + name = "join"; + description = "Join a voice channel"; + defer = true; + ephemeral = params.ephemeralDefault; + options = [ + { + name: "channel", + description: "Voice channel to join", + type: ApplicationCommandOptionType.Channel, + required: true, + channel_types: VOICE_CHANNEL_TYPES, + }, + ]; + + async run(interaction: CommandInteraction) { + const channel = await interaction.options.getChannel("channel", true); + if (!channel || !("id" in channel)) { + await interaction.reply({ content: "Voice channel not found.", ephemeral: true }); + return; + } + + const access = await authorizeVoiceCommand(interaction, params, { + channelOverride: { + id: channel.id, + name: "name" in channel ? (channel.name as string) : undefined, + parentId: + "parentId" in channel + ? ((channel as { parentId?: string }).parentId ?? undefined) + : undefined, + }, + }); + if (!access.ok) { + await interaction.reply({ content: access.message ?? "Not authorized.", ephemeral: true }); + return; + } + if (!isVoiceChannelType(channel.type)) { + await interaction.reply({ content: "That is not a voice channel.", ephemeral: true }); + return; + } + const guildId = access.guildId ?? ("guildId" in channel ? channel.guildId : undefined); + if (!guildId) { + await interaction.reply({ + content: "Unable to resolve guild for this voice channel.", + ephemeral: true, + }); + return; + } + + const manager = params.getManager(); + if (!manager) { + await interaction.reply({ + content: "Voice manager is not available yet.", + ephemeral: true, + }); + return; + } + + const result = await manager.join({ guildId, channelId: channel.id }); + await interaction.reply({ content: result.message, ephemeral: true }); + } + } + + class LeaveCommand extends Command { + name = "leave"; + description = "Leave the current voice channel"; + defer = true; + ephemeral = params.ephemeralDefault; + + async run(interaction: CommandInteraction) { + const guildId = interaction.guild?.id; + if (!guildId) { + await interaction.reply({ + content: "Unable to resolve guild for this command.", + ephemeral: true, + }); + return; + } + const manager = params.getManager(); + if (!manager) { + await interaction.reply({ + content: "Voice manager is not available yet.", + ephemeral: true, + }); + return; + } + const sessionChannelId = resolveSessionChannelId(manager, guildId); + const access = await authorizeVoiceCommand(interaction, params, { + channelOverride: sessionChannelId ? { id: sessionChannelId } : undefined, + }); + if (!access.ok) { + await interaction.reply({ content: access.message ?? "Not authorized.", ephemeral: true }); + return; + } + const result = await manager.leave({ guildId }); + await interaction.reply({ content: result.message, ephemeral: true }); + } + } + + class StatusCommand extends Command { + name = "status"; + description = "Show active voice sessions"; + defer = true; + ephemeral = params.ephemeralDefault; + + async run(interaction: CommandInteraction) { + const guildId = interaction.guild?.id; + if (!guildId) { + await interaction.reply({ + content: "Unable to resolve guild for this command.", + ephemeral: true, + }); + return; + } + const manager = params.getManager(); + if (!manager) { + await interaction.reply({ + content: "Voice manager is not available yet.", + ephemeral: true, + }); + return; + } + const sessions = manager.status().filter((entry) => entry.guildId === guildId); + const sessionChannelId = sessions[0]?.channelId; + const access = await authorizeVoiceCommand(interaction, params, { + channelOverride: sessionChannelId ? { id: sessionChannelId } : undefined, + }); + if (!access.ok) { + await interaction.reply({ content: access.message ?? "Not authorized.", ephemeral: true }); + return; + } + if (sessions.length === 0) { + await interaction.reply({ content: "No active voice sessions.", ephemeral: true }); + return; + } + const lines = sessions.map((entry) => `• <#${entry.channelId}> (guild ${entry.guildId})`); + await interaction.reply({ content: lines.join("\n"), ephemeral: true }); + } + } + + return new (class extends CommandWithSubcommands { + name = "vc"; + description = "Voice channel controls"; + subcommands = [new JoinCommand(), new LeaveCommand(), new StatusCommand()]; + })(); +} + +function isVoiceChannelType(type: CarbonChannelType) { + return type === CarbonChannelType.GuildVoice || type === CarbonChannelType.GuildStageVoice; +} diff --git a/src/discord/voice/manager.ts b/src/discord/voice/manager.ts new file mode 100644 index 00000000000..02849c40bc6 --- /dev/null +++ b/src/discord/voice/manager.ts @@ -0,0 +1,670 @@ +import { randomUUID } from "node:crypto"; +import fs from "node:fs/promises"; +import { createRequire } from "node:module"; +import path from "node:path"; +import type { Readable } from "node:stream"; +import { ChannelType, type Client, ReadyListener } from "@buape/carbon"; +import type { VoicePlugin } from "@buape/carbon/voice"; +import { + AudioPlayerStatus, + EndBehaviorType, + VoiceConnectionStatus, + createAudioPlayer, + createAudioResource, + entersState, + joinVoiceChannel, + type AudioPlayer, + type VoiceConnection, +} from "@discordjs/voice"; +import { resolveAgentDir } from "../../agents/agent-scope.js"; +import type { MsgContext } from "../../auto-reply/templating.js"; +import { agentCommand } from "../../commands/agent.js"; +import type { OpenClawConfig } from "../../config/config.js"; +import type { DiscordAccountConfig, TtsConfig } from "../../config/types.js"; +import { logVerbose, shouldLogVerbose } from "../../globals.js"; +import { formatErrorMessage } from "../../infra/errors.js"; +import { resolvePreferredOpenClawTmpDir } from "../../infra/tmp-openclaw-dir.js"; +import { createSubsystemLogger } from "../../logging/subsystem.js"; +import { + buildProviderRegistry, + createMediaAttachmentCache, + normalizeMediaAttachments, + runCapability, +} from "../../media-understanding/runner.js"; +import { resolveAgentRoute } from "../../routing/resolve-route.js"; +import type { RuntimeEnv } from "../../runtime.js"; +import { parseTtsDirectives } from "../../tts/tts-core.js"; +import { resolveTtsConfig, textToSpeech, type ResolvedTtsConfig } from "../../tts/tts.js"; + +const require = createRequire(import.meta.url); +const OpusScript = require("opusscript") as typeof import("opusscript"); + +const SAMPLE_RATE = 48_000; +const CHANNELS = 2; +const BIT_DEPTH = 16; +const MIN_SEGMENT_SECONDS = 0.35; +const SILENCE_DURATION_MS = 1_000; +const PLAYBACK_READY_TIMEOUT_MS = 15_000; +const SPEAKING_READY_TIMEOUT_MS = 60_000; + +const logger = createSubsystemLogger("discord/voice"); + +const logVoiceVerbose = (message: string) => { + logVerbose(`discord voice: ${message}`); +}; + +type VoiceOperationResult = { + ok: boolean; + message: string; + channelId?: string; + guildId?: string; +}; + +type VoiceSessionEntry = { + guildId: string; + channelId: string; + sessionChannelId: string; + route: ReturnType; + connection: VoiceConnection; + player: AudioPlayer; + playbackQueue: Promise; + processingQueue: Promise; + activeSpeakers: Set; + stop: () => void; +}; + +function mergeTtsConfig(base: TtsConfig, override?: TtsConfig): TtsConfig { + if (!override) { + return base; + } + return { + ...base, + ...override, + modelOverrides: { + ...base.modelOverrides, + ...override.modelOverrides, + }, + elevenlabs: { + ...base.elevenlabs, + ...override.elevenlabs, + voiceSettings: { + ...base.elevenlabs?.voiceSettings, + ...override.elevenlabs?.voiceSettings, + }, + }, + openai: { + ...base.openai, + ...override.openai, + }, + edge: { + ...base.edge, + ...override.edge, + }, + }; +} + +function resolveVoiceTtsConfig(params: { cfg: OpenClawConfig; override?: TtsConfig }): { + cfg: OpenClawConfig; + resolved: ResolvedTtsConfig; +} { + if (!params.override) { + return { cfg: params.cfg, resolved: resolveTtsConfig(params.cfg) }; + } + const base = params.cfg.messages?.tts ?? {}; + const merged = mergeTtsConfig(base, params.override); + const messages = params.cfg.messages ?? {}; + const cfg = { + ...params.cfg, + messages: { + ...messages, + tts: merged, + }, + }; + return { cfg, resolved: resolveTtsConfig(cfg) }; +} + +function buildWavBuffer(pcm: Buffer): Buffer { + const blockAlign = (CHANNELS * BIT_DEPTH) / 8; + const byteRate = SAMPLE_RATE * blockAlign; + const header = Buffer.alloc(44); + header.write("RIFF", 0); + header.writeUInt32LE(36 + pcm.length, 4); + header.write("WAVE", 8); + header.write("fmt ", 12); + header.writeUInt32LE(16, 16); + header.writeUInt16LE(1, 20); + header.writeUInt16LE(CHANNELS, 22); + header.writeUInt32LE(SAMPLE_RATE, 24); + header.writeUInt32LE(byteRate, 28); + header.writeUInt16LE(blockAlign, 32); + header.writeUInt16LE(BIT_DEPTH, 34); + header.write("data", 36); + header.writeUInt32LE(pcm.length, 40); + return Buffer.concat([header, pcm]); +} + +type OpusDecoder = { + decode: (buffer: Buffer) => Buffer; +}; + +function createOpusDecoder(): { decoder: OpusDecoder; name: string } | null { + try { + const decoder = new OpusScript(SAMPLE_RATE, CHANNELS, OpusScript.Application.AUDIO); + return { decoder, name: "opusscript" }; + } catch (err) { + logger.warn(`discord voice: opusscript init failed: ${formatErrorMessage(err)}`); + } + try { + const { OpusEncoder } = require("@discordjs/opus") as typeof import("@discordjs/opus"); + const decoder = new OpusEncoder(SAMPLE_RATE, CHANNELS); + return { decoder, name: "@discordjs/opus" }; + } catch (err) { + logger.warn(`discord voice: opus decoder init failed: ${formatErrorMessage(err)}`); + } + return null; +} + +async function decodeOpusStream(stream: Readable): Promise { + const selected = createOpusDecoder(); + if (!selected) { + return Buffer.alloc(0); + } + logVoiceVerbose(`opus decoder: ${selected.name}`); + const chunks: Buffer[] = []; + try { + for await (const chunk of stream) { + if (!chunk || !(chunk instanceof Buffer) || chunk.length === 0) { + continue; + } + const decoded = selected.decoder.decode(chunk); + if (decoded && decoded.length > 0) { + chunks.push(Buffer.from(decoded)); + } + } + } catch (err) { + if (shouldLogVerbose()) { + logVerbose(`discord voice: opus decode failed: ${formatErrorMessage(err)}`); + } + } + return chunks.length > 0 ? Buffer.concat(chunks) : Buffer.alloc(0); +} + +function estimateDurationSeconds(pcm: Buffer): number { + const bytesPerSample = (BIT_DEPTH / 8) * CHANNELS; + if (bytesPerSample <= 0) { + return 0; + } + return pcm.length / (bytesPerSample * SAMPLE_RATE); +} + +async function writeWavFile(pcm: Buffer): Promise<{ path: string; durationSeconds: number }> { + const tempDir = await fs.mkdtemp(path.join(resolvePreferredOpenClawTmpDir(), "discord-voice-")); + const filePath = path.join(tempDir, `segment-${randomUUID()}.wav`); + const wav = buildWavBuffer(pcm); + await fs.writeFile(filePath, wav); + scheduleTempCleanup(tempDir); + return { path: filePath, durationSeconds: estimateDurationSeconds(pcm) }; +} + +function scheduleTempCleanup(tempDir: string, delayMs: number = 30 * 60 * 1000): void { + const timer = setTimeout(() => { + fs.rm(tempDir, { recursive: true, force: true }).catch((err) => { + if (shouldLogVerbose()) { + logVerbose(`discord voice: temp cleanup failed for ${tempDir}: ${formatErrorMessage(err)}`); + } + }); + }, delayMs); + timer.unref(); +} + +async function transcribeAudio(params: { + cfg: OpenClawConfig; + agentId: string; + filePath: string; +}): Promise { + const ctx: MsgContext = { + MediaPath: params.filePath, + MediaType: "audio/wav", + }; + const attachments = normalizeMediaAttachments(ctx); + if (attachments.length === 0) { + return undefined; + } + const cache = createMediaAttachmentCache(attachments); + const providerRegistry = buildProviderRegistry(); + try { + const result = await runCapability({ + capability: "audio", + cfg: params.cfg, + ctx, + attachments: cache, + media: attachments, + agentDir: resolveAgentDir(params.cfg, params.agentId), + providerRegistry, + config: params.cfg.tools?.media?.audio, + }); + const output = result.outputs.find((entry) => entry.kind === "audio.transcription"); + const text = output?.text?.trim(); + return text || undefined; + } finally { + await cache.cleanup(); + } +} + +export class DiscordVoiceManager { + private sessions = new Map(); + private botUserId?: string; + private readonly voiceEnabled: boolean; + private autoJoinTask: Promise | null = null; + + constructor( + private params: { + client: Client; + cfg: OpenClawConfig; + discordConfig: DiscordAccountConfig; + accountId: string; + runtime: RuntimeEnv; + botUserId?: string; + }, + ) { + this.botUserId = params.botUserId; + this.voiceEnabled = params.discordConfig.voice?.enabled !== false; + } + + setBotUserId(id?: string) { + if (id) { + this.botUserId = id; + } + } + + isEnabled() { + return this.voiceEnabled; + } + + async autoJoin(): Promise { + if (!this.voiceEnabled) { + return; + } + if (this.autoJoinTask) { + return this.autoJoinTask; + } + this.autoJoinTask = (async () => { + const entries = this.params.discordConfig.voice?.autoJoin ?? []; + logVoiceVerbose(`autoJoin: ${entries.length} entries`); + const seenGuilds = new Set(); + for (const entry of entries) { + const guildId = entry.guildId.trim(); + if (!guildId) { + continue; + } + if (seenGuilds.has(guildId)) { + logger.warn( + `discord voice: autoJoin has multiple entries for guild ${guildId}; skipping`, + ); + continue; + } + seenGuilds.add(guildId); + logVoiceVerbose(`autoJoin: joining guild ${guildId} channel ${entry.channelId}`); + await this.join({ + guildId: entry.guildId, + channelId: entry.channelId, + }); + } + })().finally(() => { + this.autoJoinTask = null; + }); + return this.autoJoinTask; + } + + status(): VoiceOperationResult[] { + return Array.from(this.sessions.values()).map((session) => ({ + ok: true, + message: `connected: guild ${session.guildId} channel ${session.channelId}`, + guildId: session.guildId, + channelId: session.channelId, + })); + } + + async join(params: { guildId: string; channelId: string }): Promise { + if (!this.voiceEnabled) { + return { + ok: false, + message: "Discord voice is disabled (channels.discord.voice.enabled).", + }; + } + const guildId = params.guildId.trim(); + const channelId = params.channelId.trim(); + if (!guildId || !channelId) { + return { ok: false, message: "Missing guildId or channelId." }; + } + logVoiceVerbose(`join requested: guild ${guildId} channel ${channelId}`); + + const existing = this.sessions.get(guildId); + if (existing && existing.channelId === channelId) { + logVoiceVerbose(`join: already connected to guild ${guildId} channel ${channelId}`); + return { ok: true, message: `Already connected to <#${channelId}>.`, guildId, channelId }; + } + if (existing) { + logVoiceVerbose(`join: replacing existing session for guild ${guildId}`); + await this.leave({ guildId }); + } + + const channelInfo = await this.params.client.fetchChannel(channelId).catch(() => null); + if (!channelInfo || ("type" in channelInfo && !isVoiceChannel(channelInfo.type))) { + return { ok: false, message: `Channel ${channelId} is not a voice channel.` }; + } + const channelGuildId = "guildId" in channelInfo ? channelInfo.guildId : undefined; + if (channelGuildId && channelGuildId !== guildId) { + return { ok: false, message: "Voice channel is not in this guild." }; + } + + const voicePlugin = this.params.client.getPlugin("voice"); + if (!voicePlugin) { + return { ok: false, message: "Discord voice plugin is not available." }; + } + + const adapterCreator = voicePlugin.getGatewayAdapterCreator(guildId); + const connection = joinVoiceChannel({ + channelId, + guildId, + adapterCreator, + selfDeaf: false, + selfMute: false, + }); + + try { + await entersState(connection, VoiceConnectionStatus.Ready, PLAYBACK_READY_TIMEOUT_MS); + logVoiceVerbose(`join: connected to guild ${guildId} channel ${channelId}`); + } catch (err) { + connection.destroy(); + return { ok: false, message: `Failed to join voice channel: ${formatErrorMessage(err)}` }; + } + + const sessionChannelId = channelInfo?.id ?? channelId; + // Use the voice channel id as the session channel so text chat in the voice channel + // shares the same session as spoken audio. + if (sessionChannelId !== channelId) { + logVoiceVerbose( + `join: using session channel ${sessionChannelId} for voice channel ${channelId}`, + ); + } + const route = resolveAgentRoute({ + cfg: this.params.cfg, + channel: "discord", + accountId: this.params.accountId, + guildId, + peer: { kind: "channel", id: sessionChannelId }, + }); + + const player = createAudioPlayer(); + connection.subscribe(player); + + const entry: VoiceSessionEntry = { + guildId, + channelId, + sessionChannelId, + route, + connection, + player, + playbackQueue: Promise.resolve(), + processingQueue: Promise.resolve(), + activeSpeakers: new Set(), + stop: () => { + player.stop(); + connection.destroy(); + }, + }; + + const speakingHandler = (userId: string) => { + void this.handleSpeakingStart(entry, userId).catch((err) => { + logger.warn(`discord voice: capture failed: ${formatErrorMessage(err)}`); + }); + }; + + connection.receiver.speaking.on("start", speakingHandler); + connection.on(VoiceConnectionStatus.Disconnected, async () => { + try { + await Promise.race([ + entersState(connection, VoiceConnectionStatus.Signalling, 5_000), + entersState(connection, VoiceConnectionStatus.Connecting, 5_000), + ]); + } catch { + this.sessions.delete(guildId); + connection.destroy(); + } + }); + connection.on(VoiceConnectionStatus.Destroyed, () => { + this.sessions.delete(guildId); + }); + + player.on("error", (err) => { + logger.warn(`discord voice: playback error: ${formatErrorMessage(err)}`); + }); + + this.sessions.set(guildId, entry); + return { + ok: true, + message: `Joined <#${channelId}>.`, + guildId, + channelId, + }; + } + + async leave(params: { guildId: string; channelId?: string }): Promise { + const guildId = params.guildId.trim(); + logVoiceVerbose(`leave requested: guild ${guildId} channel ${params.channelId ?? "current"}`); + const entry = this.sessions.get(guildId); + if (!entry) { + return { ok: false, message: "Not connected to a voice channel." }; + } + if (params.channelId && params.channelId !== entry.channelId) { + return { ok: false, message: "Not connected to that voice channel." }; + } + entry.stop(); + this.sessions.delete(guildId); + logVoiceVerbose(`leave: disconnected from guild ${guildId} channel ${entry.channelId}`); + return { + ok: true, + message: `Left <#${entry.channelId}>.`, + guildId, + channelId: entry.channelId, + }; + } + + async destroy(): Promise { + for (const entry of this.sessions.values()) { + entry.stop(); + } + this.sessions.clear(); + } + + private enqueueProcessing(entry: VoiceSessionEntry, task: () => Promise) { + entry.processingQueue = entry.processingQueue + .then(task) + .catch((err) => logger.warn(`discord voice: processing failed: ${formatErrorMessage(err)}`)); + } + + private enqueuePlayback(entry: VoiceSessionEntry, task: () => Promise) { + entry.playbackQueue = entry.playbackQueue + .then(task) + .catch((err) => logger.warn(`discord voice: playback failed: ${formatErrorMessage(err)}`)); + } + + private async handleSpeakingStart(entry: VoiceSessionEntry, userId: string) { + if (!userId || entry.activeSpeakers.has(userId)) { + return; + } + if (this.botUserId && userId === this.botUserId) { + return; + } + + entry.activeSpeakers.add(userId); + logVoiceVerbose( + `capture start: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`, + ); + if (entry.player.state.status === AudioPlayerStatus.Playing) { + entry.player.stop(true); + } + + const stream = entry.connection.receiver.subscribe(userId, { + end: { + behavior: EndBehaviorType.AfterSilence, + duration: SILENCE_DURATION_MS, + }, + }); + stream.on("error", (err) => { + logger.warn(`discord voice: receive error: ${formatErrorMessage(err)}`); + }); + + try { + const pcm = await decodeOpusStream(stream); + if (pcm.length === 0) { + logVoiceVerbose( + `capture empty: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`, + ); + return; + } + const { path: wavPath, durationSeconds } = await writeWavFile(pcm); + if (durationSeconds < MIN_SEGMENT_SECONDS) { + logVoiceVerbose( + `capture too short (${durationSeconds.toFixed(2)}s): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`, + ); + return; + } + logVoiceVerbose( + `capture ready (${durationSeconds.toFixed(2)}s): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`, + ); + this.enqueueProcessing(entry, async () => { + await this.processSegment({ entry, wavPath, userId, durationSeconds }); + }); + } finally { + entry.activeSpeakers.delete(userId); + } + } + + private async processSegment(params: { + entry: VoiceSessionEntry; + wavPath: string; + userId: string; + durationSeconds: number; + }) { + const { entry, wavPath, userId, durationSeconds } = params; + logVoiceVerbose( + `segment processing (${durationSeconds.toFixed(2)}s): guild ${entry.guildId} channel ${entry.channelId}`, + ); + const transcript = await transcribeAudio({ + cfg: this.params.cfg, + agentId: entry.route.agentId, + filePath: wavPath, + }); + if (!transcript) { + logVoiceVerbose( + `transcription empty: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`, + ); + return; + } + logVoiceVerbose( + `transcription ok (${transcript.length} chars): guild ${entry.guildId} channel ${entry.channelId}`, + ); + + const speakerLabel = await this.resolveSpeakerLabel(entry.guildId, userId); + const prompt = speakerLabel ? `${speakerLabel}: ${transcript}` : transcript; + + const result = await agentCommand( + { + message: prompt, + sessionKey: entry.route.sessionKey, + agentId: entry.route.agentId, + messageChannel: "discord", + deliver: false, + }, + this.params.runtime, + ); + + const replyText = (result.payloads ?? []) + .map((payload) => payload.text) + .filter((text) => typeof text === "string" && text.trim()) + .join("\n") + .trim(); + + if (!replyText) { + logVoiceVerbose( + `reply empty: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`, + ); + return; + } + logVoiceVerbose( + `reply ok (${replyText.length} chars): guild ${entry.guildId} channel ${entry.channelId}`, + ); + + const { cfg: ttsCfg, resolved: ttsConfig } = resolveVoiceTtsConfig({ + cfg: this.params.cfg, + override: this.params.discordConfig.voice?.tts, + }); + const directive = parseTtsDirectives(replyText, ttsConfig.modelOverrides); + const speakText = directive.overrides.ttsText ?? directive.cleanedText.trim(); + if (!speakText) { + logVoiceVerbose( + `tts skipped (empty): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`, + ); + return; + } + + const ttsResult = await textToSpeech({ + text: speakText, + cfg: ttsCfg, + channel: "discord", + overrides: directive.overrides, + }); + if (!ttsResult.success || !ttsResult.audioPath) { + logger.warn(`discord voice: TTS failed: ${ttsResult.error ?? "unknown error"}`); + return; + } + logVoiceVerbose( + `tts ok (${speakText.length} chars): guild ${entry.guildId} channel ${entry.channelId}`, + ); + + this.enqueuePlayback(entry, async () => { + logVoiceVerbose( + `playback start: guild ${entry.guildId} channel ${entry.channelId} file ${path.basename(ttsResult.audioPath)}`, + ); + const resource = createAudioResource(ttsResult.audioPath); + entry.player.play(resource); + await entersState(entry.player, AudioPlayerStatus.Playing, PLAYBACK_READY_TIMEOUT_MS).catch( + () => undefined, + ); + await entersState(entry.player, AudioPlayerStatus.Idle, SPEAKING_READY_TIMEOUT_MS).catch( + () => undefined, + ); + logVoiceVerbose(`playback done: guild ${entry.guildId} channel ${entry.channelId}`); + }); + } + + private async resolveSpeakerLabel(guildId: string, userId: string): Promise { + try { + const member = await this.params.client.fetchMember(guildId, userId); + return member.nickname ?? member.user?.globalName ?? member.user?.username ?? userId; + } catch { + try { + const user = await this.params.client.fetchUser(userId); + return user.globalName ?? user.username ?? userId; + } catch { + return userId; + } + } + } +} + +export class DiscordVoiceReadyListener extends ReadyListener { + constructor(private manager: DiscordVoiceManager) { + super(); + } + + async handle() { + await this.manager.autoJoin(); + } +} + +function isVoiceChannel(type: ChannelType) { + return type === ChannelType.GuildVoice || type === ChannelType.GuildStageVoice; +}