diff --git a/CHANGELOG.md b/CHANGELOG.md index 662af8051b6..c1770977455 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## 2.0.0-beta5 — Unreleased +### Features +- Talk mode: continuous speech conversations (macOS/iOS/Android) with ElevenLabs TTS, reply directives, and optional interrupt-on-speech. + ### Fixes - macOS: Voice Wake now fully tears down the Speech pipeline when disabled (cancel pending restarts, drop stale callbacks) to avoid high CPU in the background. - iOS/Android nodes: enable scrolling for loaded web pages in the Canvas WebView (default scaffold stays touch-first). @@ -10,7 +13,7 @@ - iOS node: fix ReplayKit screen recording crash caused by queue isolation assertions during capture. - iOS/Android nodes: bridge auto-connect refreshes stale tokens and settings now show richer bridge/device details. - iOS/Android nodes: status pill now surfaces camera activity instead of overlay toasts. -- iOS/Android nodes: camera snaps recompress to keep base64 payloads under 5 MB. +- iOS/Android/macOS nodes: camera snaps recompress to keep base64 payloads under 5 MB. - CLI: avoid spurious gateway close errors after successful request/response cycles. - Agent runtime: clamp tool-result images to the 5MB Anthropic limit to avoid hard request rejections. - Tests: add Swift Testing coverage for camera errors and Kotest coverage for Android bridge endpoints. diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/node/CameraCaptureManager.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/node/CameraCaptureManager.kt index b25b95ea477..41669076618 100644 --- a/apps/android/app/src/main/java/com/steipete/clawdis/node/node/CameraCaptureManager.kt +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/node/CameraCaptureManager.kt @@ -101,6 +101,7 @@ class CameraCaptureManager(private val context: Context) { } val maxPayloadBytes = 5 * 1024 * 1024 + // Base64 inflates payloads by ~4/3; cap encoded bytes so the payload stays under 5MB (API limit). val maxEncodedBytes = (maxPayloadBytes / 4) * 3 val result = JpegSizeLimiter.compressToLimit( diff --git a/apps/android/app/src/main/java/com/steipete/clawdis/node/ui/RootScreen.kt b/apps/android/app/src/main/java/com/steipete/clawdis/node/ui/RootScreen.kt index f3cfb4b67d1..2594449b80c 100644 --- a/apps/android/app/src/main/java/com/steipete/clawdis/node/ui/RootScreen.kt +++ b/apps/android/app/src/main/java/com/steipete/clawdis/node/ui/RootScreen.kt @@ -67,6 +67,7 @@ fun RootScreen(viewModel: MainViewModel) { val cameraFlashToken by viewModel.cameraFlashToken.collectAsState() val activity = remember(cameraHud) { + // Status pill owns transient capture state so it doesn't overlap the connection indicator. cameraHud?.let { hud -> when (hud.kind) { CameraHudKind.Photo -> diff --git a/apps/ios/Sources/Camera/CameraController.swift b/apps/ios/Sources/Camera/CameraController.swift index cf8c6ce50f9..00d633bd925 100644 --- a/apps/ios/Sources/Camera/CameraController.swift +++ b/apps/ios/Sources/Camera/CameraController.swift @@ -85,7 +85,7 @@ actor CameraController { withExtendedLifetime(delegate) {} let maxPayloadBytes = 5 * 1024 * 1024 - // Base64 inflates payloads by ~4/3, so cap encoded bytes to keep payload <= 5MB. + // Base64 inflates payloads by ~4/3; cap encoded bytes so the payload stays under 5MB (API limit). let maxEncodedBytes = (maxPayloadBytes / 4) * 3 let res = try JPEGTranscoder.transcodeToJPEG( imageData: rawData, diff --git a/apps/ios/Sources/RootCanvas.swift b/apps/ios/Sources/RootCanvas.swift index 4d552618eb4..c02eceb6932 100644 --- a/apps/ios/Sources/RootCanvas.swift +++ b/apps/ios/Sources/RootCanvas.swift @@ -173,6 +173,7 @@ private struct CanvasContent: View { } private var statusActivity: StatusPill.Activity? { + // Status pill owns transient capture state so it doesn't overlap the connection indicator. guard let cameraHUDText, !cameraHUDText.isEmpty, let cameraHUDKind else { return nil } let systemImage: String let tint: Color? diff --git a/apps/macos/Sources/Clawdis/CameraCaptureService.swift b/apps/macos/Sources/Clawdis/CameraCaptureService.swift index c087c8fd3e4..3c9d9c3570f 100644 --- a/apps/macos/Sources/Clawdis/CameraCaptureService.swift +++ b/apps/macos/Sources/Clawdis/CameraCaptureService.swift @@ -79,7 +79,14 @@ actor CameraCaptureService { } withExtendedLifetime(delegate) {} - let res = try JPEGTranscoder.transcodeToJPEG(imageData: rawData, maxWidthPx: maxWidth, quality: quality) + let maxPayloadBytes = 5 * 1024 * 1024 + // Base64 inflates payloads by ~4/3; cap encoded bytes so the payload stays under 5MB (API limit). + let maxEncodedBytes = (maxPayloadBytes / 4) * 3 + let res = try JPEGTranscoder.transcodeToJPEG( + imageData: rawData, + maxWidthPx: maxWidth, + quality: quality, + maxBytes: maxEncodedBytes) return (data: res.data, size: CGSize(width: res.widthPx, height: res.heightPx)) } diff --git a/docs/camera.md b/docs/camera.md index 0353d1567ef..aba3b52680e 100644 --- a/docs/camera.md +++ b/docs/camera.md @@ -121,6 +121,7 @@ clawdis nodes camera clip --node --no-audio Notes: - `clawdis nodes camera snap` defaults to `maxWidth=1600` unless overridden. +- Photo payloads are recompressed to keep base64 under 5 MB. ## Safety + practical limits