feat(android): add camera list and device selection

This commit is contained in:
Ayaan Zaidi
2026-02-27 09:24:57 +05:30
committed by Ayaan Zaidi
parent c838a4dde0
commit 1f7b3c613d
7 changed files with 126 additions and 7 deletions

View File

@@ -1,13 +1,16 @@
package ai.openclaw.android.node package ai.openclaw.android.node
import android.Manifest import android.Manifest
import android.content.Context
import android.annotation.SuppressLint import android.annotation.SuppressLint
import android.content.Context
import android.graphics.Bitmap import android.graphics.Bitmap
import android.graphics.BitmapFactory import android.graphics.BitmapFactory
import android.graphics.Matrix import android.graphics.Matrix
import android.util.Base64
import android.content.pm.PackageManager import android.content.pm.PackageManager
import android.hardware.camera2.CameraCharacteristics
import android.util.Base64
import androidx.camera.camera2.interop.Camera2CameraInfo
import androidx.camera.core.CameraInfo
import androidx.exifinterface.media.ExifInterface import androidx.exifinterface.media.ExifInterface
import androidx.lifecycle.LifecycleOwner import androidx.lifecycle.LifecycleOwner
import androidx.camera.core.CameraSelector import androidx.camera.core.CameraSelector
@@ -44,6 +47,12 @@ import kotlin.coroutines.resumeWithException
class CameraCaptureManager(private val context: Context) { class CameraCaptureManager(private val context: Context) {
data class Payload(val payloadJson: String) data class Payload(val payloadJson: String)
data class FilePayload(val file: File, val durationMs: Long, val hasAudio: Boolean) data class FilePayload(val file: File, val durationMs: Long, val hasAudio: Boolean)
data class CameraDeviceInfo(
val id: String,
val name: String,
val position: String,
val deviceType: String,
)
@Volatile private var lifecycleOwner: LifecycleOwner? = null @Volatile private var lifecycleOwner: LifecycleOwner? = null
@Volatile private var permissionRequester: PermissionRequester? = null @Volatile private var permissionRequester: PermissionRequester? = null
@@ -56,6 +65,14 @@ class CameraCaptureManager(private val context: Context) {
permissionRequester = requester permissionRequester = requester
} }
suspend fun listDevices(): List<CameraDeviceInfo> =
withContext(Dispatchers.Main) {
val provider = context.cameraProvider()
provider.availableCameraInfos
.mapNotNull { info -> cameraDeviceInfoOrNull(info) }
.sortedBy { it.id }
}
private suspend fun ensureCameraPermission() { private suspend fun ensureCameraPermission() {
val granted = checkSelfPermission(context, Manifest.permission.CAMERA) == PackageManager.PERMISSION_GRANTED val granted = checkSelfPermission(context, Manifest.permission.CAMERA) == PackageManager.PERMISSION_GRANTED
if (granted) return if (granted) return
@@ -88,11 +105,11 @@ class CameraCaptureManager(private val context: Context) {
val facing = parseFacing(params) ?: "front" val facing = parseFacing(params) ?: "front"
val quality = (parseQuality(params) ?: 0.95).coerceIn(0.1, 1.0) val quality = (parseQuality(params) ?: 0.95).coerceIn(0.1, 1.0)
val maxWidth = parseMaxWidth(params) ?: 1600 val maxWidth = parseMaxWidth(params) ?: 1600
val deviceId = parseDeviceId(params)
val provider = context.cameraProvider() val provider = context.cameraProvider()
val capture = ImageCapture.Builder().build() val capture = ImageCapture.Builder().build()
val selector = val selector = resolveCameraSelector(provider, facing, deviceId)
if (facing == "front") CameraSelector.DEFAULT_FRONT_CAMERA else CameraSelector.DEFAULT_BACK_CAMERA
provider.unbindAll() provider.unbindAll()
provider.bindToLifecycle(owner, selector, capture) provider.bindToLifecycle(owner, selector, capture)
@@ -154,9 +171,10 @@ class CameraCaptureManager(private val context: Context) {
val facing = parseFacing(params) ?: "front" val facing = parseFacing(params) ?: "front"
val durationMs = (parseDurationMs(params) ?: 3_000).coerceIn(200, 60_000) val durationMs = (parseDurationMs(params) ?: 3_000).coerceIn(200, 60_000)
val includeAudio = parseIncludeAudio(params) ?: true val includeAudio = parseIncludeAudio(params) ?: true
val deviceId = parseDeviceId(params)
if (includeAudio) ensureMicPermission() if (includeAudio) ensureMicPermission()
android.util.Log.w("CameraCaptureManager", "clip: start facing=$facing duration=$durationMs audio=$includeAudio") android.util.Log.w("CameraCaptureManager", "clip: start facing=$facing duration=$durationMs audio=$includeAudio deviceId=${deviceId ?: "-"}")
val provider = context.cameraProvider() val provider = context.cameraProvider()
android.util.Log.w("CameraCaptureManager", "clip: got camera provider") android.util.Log.w("CameraCaptureManager", "clip: got camera provider")
@@ -168,8 +186,7 @@ class CameraCaptureManager(private val context: Context) {
) )
.build() .build()
val videoCapture = VideoCapture.withOutput(recorder) val videoCapture = VideoCapture.withOutput(recorder)
val selector = val selector = resolveCameraSelector(provider, facing, deviceId)
if (facing == "front") CameraSelector.DEFAULT_FRONT_CAMERA else CameraSelector.DEFAULT_BACK_CAMERA
// CameraX requires a Preview use case for the camera to start producing frames; // CameraX requires a Preview use case for the camera to start producing frames;
// without it, the encoder may get no data (ERROR_NO_VALID_DATA). // without it, the encoder may get no data (ERROR_NO_VALID_DATA).
@@ -308,6 +325,12 @@ class CameraCaptureManager(private val context: Context) {
private fun parseDurationMs(params: JsonObject?): Int? = private fun parseDurationMs(params: JsonObject?): Int? =
readPrimitive(params, "durationMs")?.contentOrNull?.toIntOrNull() readPrimitive(params, "durationMs")?.contentOrNull?.toIntOrNull()
private fun parseDeviceId(params: JsonObject?): String? =
readPrimitive(params, "deviceId")
?.contentOrNull
?.trim()
?.takeIf { it.isNotEmpty() }
private fun parseIncludeAudio(params: JsonObject?): Boolean? { private fun parseIncludeAudio(params: JsonObject?): Boolean? {
val value = readPrimitive(params, "includeAudio")?.contentOrNull?.trim()?.lowercase() val value = readPrimitive(params, "includeAudio")?.contentOrNull?.trim()?.lowercase()
return when (value) { return when (value) {
@@ -318,6 +341,56 @@ class CameraCaptureManager(private val context: Context) {
} }
private fun Context.mainExecutor(): Executor = ContextCompat.getMainExecutor(this) private fun Context.mainExecutor(): Executor = ContextCompat.getMainExecutor(this)
private fun resolveCameraSelector(
provider: ProcessCameraProvider,
facing: String,
deviceId: String?,
): CameraSelector {
if (deviceId.isNullOrEmpty()) {
return if (facing == "front") CameraSelector.DEFAULT_FRONT_CAMERA else CameraSelector.DEFAULT_BACK_CAMERA
}
val availableIds = provider.availableCameraInfos.mapNotNull { cameraIdOrNull(it) }.toSet()
if (!availableIds.contains(deviceId)) {
throw IllegalStateException("INVALID_REQUEST: unknown camera deviceId '$deviceId'")
}
return CameraSelector.Builder()
.addCameraFilter { infos -> infos.filter { cameraIdOrNull(it) == deviceId } }
.build()
}
private fun cameraDeviceInfoOrNull(info: CameraInfo): CameraDeviceInfo? {
val cameraId = cameraIdOrNull(info) ?: return null
val lensFacing =
runCatching {
Camera2CameraInfo.from(info).getCameraCharacteristic(CameraCharacteristics.LENS_FACING)
}.getOrNull()
val position =
when (lensFacing) {
CameraCharacteristics.LENS_FACING_FRONT -> "front"
CameraCharacteristics.LENS_FACING_BACK -> "back"
CameraCharacteristics.LENS_FACING_EXTERNAL -> "external"
else -> "unspecified"
}
val deviceType =
if (lensFacing == CameraCharacteristics.LENS_FACING_EXTERNAL) "external" else "builtIn"
val name =
when (position) {
"front" -> "Front Camera"
"back" -> "Back Camera"
"external" -> "External Camera"
else -> "Camera $cameraId"
}
return CameraDeviceInfo(
id = cameraId,
name = name,
position = position,
deviceType = deviceType,
)
}
private fun cameraIdOrNull(info: CameraInfo): String? =
runCatching { Camera2CameraInfo.from(info).cameraId }.getOrNull()
} }
private suspend fun Context.cameraProvider(): ProcessCameraProvider = private suspend fun Context.cameraProvider(): ProcessCameraProvider =

View File

@@ -9,7 +9,10 @@ import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.withContext import kotlinx.coroutines.withContext
import kotlinx.serialization.json.Json import kotlinx.serialization.json.Json
import kotlinx.serialization.json.JsonPrimitive import kotlinx.serialization.json.JsonPrimitive
import kotlinx.serialization.json.buildJsonArray
import kotlinx.serialization.json.buildJsonObject
import kotlinx.serialization.json.contentOrNull import kotlinx.serialization.json.contentOrNull
import kotlinx.serialization.json.put
internal const val CAMERA_CLIP_MAX_RAW_BYTES: Long = 18L * 1024L * 1024L internal const val CAMERA_CLIP_MAX_RAW_BYTES: Long = 18L * 1024L * 1024L
@@ -24,6 +27,33 @@ class CameraHandler(
private val triggerCameraFlash: () -> Unit, private val triggerCameraFlash: () -> Unit,
private val invokeErrorFromThrowable: (err: Throwable) -> Pair<String, String>, private val invokeErrorFromThrowable: (err: Throwable) -> Pair<String, String>,
) { ) {
suspend fun handleList(_paramsJson: String?): GatewaySession.InvokeResult {
return try {
val devices = camera.listDevices()
val payload =
buildJsonObject {
put(
"devices",
buildJsonArray {
devices.forEach { device ->
add(
buildJsonObject {
put("id", JsonPrimitive(device.id))
put("name", JsonPrimitive(device.name))
put("position", JsonPrimitive(device.position))
put("deviceType", JsonPrimitive(device.deviceType))
},
)
}
},
)
}.toString()
GatewaySession.InvokeResult.ok(payload)
} catch (err: Throwable) {
val (code, message) = invokeErrorFromThrowable(err)
GatewaySession.InvokeResult.error(code = code, message = message)
}
}
suspend fun handleSnap(paramsJson: String?): GatewaySession.InvokeResult { suspend fun handleSnap(paramsJson: String?): GatewaySession.InvokeResult {
val logFile = if (BuildConfig.DEBUG) java.io.File(appContext.cacheDir, "camera_debug.log") else null val logFile = if (BuildConfig.DEBUG) java.io.File(appContext.cacheDir, "camera_debug.log") else null

View File

@@ -62,6 +62,11 @@ object InvokeCommandRegistry {
name = OpenClawScreenCommand.Record.rawValue, name = OpenClawScreenCommand.Record.rawValue,
requiresForeground = true, requiresForeground = true,
), ),
InvokeCommandSpec(
name = OpenClawCameraCommand.List.rawValue,
requiresForeground = true,
availability = InvokeCommandAvailability.CameraEnabled,
),
InvokeCommandSpec( InvokeCommandSpec(
name = OpenClawCameraCommand.Snap.rawValue, name = OpenClawCameraCommand.Snap.rawValue,
requiresForeground = true, requiresForeground = true,

View File

@@ -112,6 +112,7 @@ class InvokeDispatcher(
} }
// Camera commands // Camera commands
OpenClawCameraCommand.List.rawValue -> cameraHandler.handleList(paramsJson)
OpenClawCameraCommand.Snap.rawValue -> cameraHandler.handleSnap(paramsJson) OpenClawCameraCommand.Snap.rawValue -> cameraHandler.handleSnap(paramsJson)
OpenClawCameraCommand.Clip.rawValue -> cameraHandler.handleClip(paramsJson) OpenClawCameraCommand.Clip.rawValue -> cameraHandler.handleClip(paramsJson)

View File

@@ -35,6 +35,7 @@ enum class OpenClawCanvasA2UICommand(val rawValue: String) {
} }
enum class OpenClawCameraCommand(val rawValue: String) { enum class OpenClawCameraCommand(val rawValue: String) {
List("camera.list"),
Snap("camera.snap"), Snap("camera.snap"),
Clip("camera.clip"), Clip("camera.clip"),
; ;

View File

@@ -22,6 +22,7 @@ class InvokeCommandRegistryTest {
assertFalse(commands.contains(OpenClawCameraCommand.Snap.rawValue)) assertFalse(commands.contains(OpenClawCameraCommand.Snap.rawValue))
assertFalse(commands.contains(OpenClawCameraCommand.Clip.rawValue)) assertFalse(commands.contains(OpenClawCameraCommand.Clip.rawValue))
assertFalse(commands.contains(OpenClawCameraCommand.List.rawValue))
assertFalse(commands.contains(OpenClawLocationCommand.Get.rawValue)) assertFalse(commands.contains(OpenClawLocationCommand.Get.rawValue))
assertTrue(commands.contains(OpenClawDeviceCommand.Status.rawValue)) assertTrue(commands.contains(OpenClawDeviceCommand.Status.rawValue))
assertTrue(commands.contains(OpenClawDeviceCommand.Info.rawValue)) assertTrue(commands.contains(OpenClawDeviceCommand.Info.rawValue))
@@ -44,6 +45,7 @@ class InvokeCommandRegistryTest {
assertTrue(commands.contains(OpenClawCameraCommand.Snap.rawValue)) assertTrue(commands.contains(OpenClawCameraCommand.Snap.rawValue))
assertTrue(commands.contains(OpenClawCameraCommand.Clip.rawValue)) assertTrue(commands.contains(OpenClawCameraCommand.Clip.rawValue))
assertTrue(commands.contains(OpenClawCameraCommand.List.rawValue))
assertTrue(commands.contains(OpenClawLocationCommand.Get.rawValue)) assertTrue(commands.contains(OpenClawLocationCommand.Get.rawValue))
assertTrue(commands.contains(OpenClawDeviceCommand.Status.rawValue)) assertTrue(commands.contains(OpenClawDeviceCommand.Status.rawValue))
assertTrue(commands.contains(OpenClawDeviceCommand.Info.rawValue)) assertTrue(commands.contains(OpenClawDeviceCommand.Info.rawValue))

View File

@@ -31,6 +31,13 @@ class OpenClawProtocolConstantsTest {
assertEquals("device", OpenClawCapability.Device.rawValue) assertEquals("device", OpenClawCapability.Device.rawValue)
} }
@Test
fun cameraCommandsUseStableStrings() {
assertEquals("camera.list", OpenClawCameraCommand.List.rawValue)
assertEquals("camera.snap", OpenClawCameraCommand.Snap.rawValue)
assertEquals("camera.clip", OpenClawCameraCommand.Clip.rawValue)
}
@Test @Test
fun screenCommandsUseStableStrings() { fun screenCommandsUseStableStrings() {
assertEquals("screen.record", OpenClawScreenCommand.Record.rawValue) assertEquals("screen.record", OpenClawScreenCommand.Record.rawValue)