diff --git a/apps/android/app/src/main/java/ai/openclaw/android/voice/TalkModeManager.kt b/apps/android/app/src/main/java/ai/openclaw/android/voice/TalkModeManager.kt index 04d18b62260..54bea53bd67 100644 --- a/apps/android/app/src/main/java/ai/openclaw/android/voice/TalkModeManager.kt +++ b/apps/android/app/src/main/java/ai/openclaw/android/voice/TalkModeManager.kt @@ -54,6 +54,47 @@ class TalkModeManager( private const val tag = "TalkMode" private const val defaultModelIdFallback = "eleven_v3" private const val defaultOutputFormatFallback = "pcm_24000" + private const val defaultTalkProvider = "elevenlabs" + + internal data class TalkProviderConfigSelection( + val provider: String, + val config: JsonObject, + val normalizedPayload: Boolean, + ) + + private fun normalizeTalkProviderId(raw: String?): String? { + val trimmed = raw?.trim()?.lowercase().orEmpty() + return trimmed.takeIf { it.isNotEmpty() } + } + + internal fun selectTalkProviderConfig(talk: JsonObject?): TalkProviderConfigSelection? { + if (talk == null) return null + val rawProvider = talk["provider"].asStringOrNull() + val rawProviders = talk["providers"].asObjectOrNull() + val hasNormalizedPayload = rawProvider != null || rawProviders != null + if (hasNormalizedPayload) { + val providers = + rawProviders?.entries?.mapNotNull { (key, value) -> + val providerId = normalizeTalkProviderId(key) ?: return@mapNotNull null + val providerConfig = value.asObjectOrNull() ?: return@mapNotNull null + providerId to providerConfig + }?.toMap().orEmpty() + val providerId = + normalizeTalkProviderId(rawProvider) + ?: providers.keys.sorted().firstOrNull() + ?: defaultTalkProvider + return TalkProviderConfigSelection( + provider = providerId, + config = providers[providerId] ?: buildJsonObject {}, + normalizedPayload = true, + ) + } + return TalkProviderConfigSelection( + provider = defaultTalkProvider, + config = talk, + normalizedPayload = false, + ) + } } private val mainHandler = Handler(Looper.getMainLooper()) @@ -818,30 +859,49 @@ class TalkModeManager( val root = json.parseToJsonElement(res).asObjectOrNull() val config = root?.get("config").asObjectOrNull() val talk = config?.get("talk").asObjectOrNull() + val selection = selectTalkProviderConfig(talk) + val activeProvider = selection?.provider ?: defaultTalkProvider + val activeConfig = selection?.config val sessionCfg = config?.get("session").asObjectOrNull() val mainKey = normalizeMainKey(sessionCfg?.get("mainKey").asStringOrNull()) - val voice = talk?.get("voiceId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } + val voice = activeConfig?.get("voiceId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } val aliases = - talk?.get("voiceAliases").asObjectOrNull()?.entries?.mapNotNull { (key, value) -> + activeConfig?.get("voiceAliases").asObjectOrNull()?.entries?.mapNotNull { (key, value) -> val id = value.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } ?: return@mapNotNull null normalizeAliasKey(key).takeIf { it.isNotEmpty() }?.let { it to id } }?.toMap().orEmpty() - val model = talk?.get("modelId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } - val outputFormat = talk?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } - val key = talk?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } + val model = activeConfig?.get("modelId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } + val outputFormat = + activeConfig?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } + val key = activeConfig?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull() if (!isCanonicalMainSessionKey(mainSessionKey)) { mainSessionKey = mainKey } - defaultVoiceId = voice ?: envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() } + defaultVoiceId = + if (activeProvider == defaultTalkProvider) { + voice ?: envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() } + } else { + voice + } voiceAliases = aliases if (!voiceOverrideActive) currentVoiceId = defaultVoiceId defaultModelId = model ?: defaultModelIdFallback if (!modelOverrideActive) currentModelId = defaultModelId defaultOutputFormat = outputFormat ?: defaultOutputFormatFallback - apiKey = key ?: envKey?.takeIf { it.isNotEmpty() } + apiKey = + if (activeProvider == defaultTalkProvider) { + key ?: envKey?.takeIf { it.isNotEmpty() } + } else { + null + } if (interrupt != null) interruptOnSpeech = interrupt + if (activeProvider != defaultTalkProvider) { + Log.w(tag, "talk provider $activeProvider unsupported; using system voice fallback") + } else if (selection?.normalizedPayload == true) { + Log.d(tag, "talk config provider=elevenlabs") + } } catch (_: Throwable) { defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() } defaultModelId = defaultModelIdFallback diff --git a/apps/android/app/src/test/java/ai/openclaw/android/voice/TalkModeConfigParsingTest.kt b/apps/android/app/src/test/java/ai/openclaw/android/voice/TalkModeConfigParsingTest.kt new file mode 100644 index 00000000000..5daa62080d7 --- /dev/null +++ b/apps/android/app/src/test/java/ai/openclaw/android/voice/TalkModeConfigParsingTest.kt @@ -0,0 +1,59 @@ +package ai.openclaw.android.voice + +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.jsonPrimitive +import kotlinx.serialization.json.jsonObject +import org.junit.Assert.assertEquals +import org.junit.Assert.assertNotNull +import org.junit.Assert.assertTrue +import org.junit.Test + +class TalkModeConfigParsingTest { + private val json = Json { ignoreUnknownKeys = true } + + @Test + fun prefersNormalizedTalkProviderPayload() { + val talk = + json.parseToJsonElement( + """ + { + "provider": "elevenlabs", + "providers": { + "elevenlabs": { + "voiceId": "voice-normalized" + } + }, + "voiceId": "voice-legacy" + } + """.trimIndent(), + ) + .jsonObject + + val selection = TalkModeManager.selectTalkProviderConfig(talk) + assertNotNull(selection) + assertEquals("elevenlabs", selection?.provider) + assertTrue(selection?.normalizedPayload == true) + assertEquals("voice-normalized", selection?.config?.get("voiceId")?.jsonPrimitive?.content) + } + + @Test + fun fallsBackToLegacyTalkFieldsWhenNormalizedPayloadMissing() { + val talk = + json.parseToJsonElement( + """ + { + "voiceId": "voice-legacy", + "apiKey": "legacy-key" + } + """.trimIndent(), + ) + .jsonObject + + val selection = TalkModeManager.selectTalkProviderConfig(talk) + assertNotNull(selection) + assertEquals("elevenlabs", selection?.provider) + assertTrue(selection?.normalizedPayload == false) + assertEquals("voice-legacy", selection?.config?.get("voiceId")?.jsonPrimitive?.content) + assertEquals("legacy-key", selection?.config?.get("apiKey")?.jsonPrimitive?.content) + } +} diff --git a/apps/ios/Sources/Gateway/GatewaySettingsStore.swift b/apps/ios/Sources/Gateway/GatewaySettingsStore.swift index 3ff57ad2e67..264aa8aa50d 100644 --- a/apps/ios/Sources/Gateway/GatewaySettingsStore.swift +++ b/apps/ios/Sources/Gateway/GatewaySettingsStore.swift @@ -25,7 +25,8 @@ enum GatewaySettingsStore { private static let instanceIdAccount = "instanceId" private static let preferredGatewayStableIDAccount = "preferredStableID" private static let lastDiscoveredGatewayStableIDAccount = "lastDiscoveredStableID" - private static let talkElevenLabsApiKeyAccount = "elevenlabs.apiKey" + private static let talkProviderApiKeyAccountPrefix = "provider.apiKey." + private static let talkElevenLabsApiKeyLegacyAccount = "elevenlabs.apiKey" static func bootstrapPersistence() { self.ensureStableInstanceID() @@ -145,25 +146,52 @@ enum GatewaySettingsStore { case discovered } - static func loadTalkElevenLabsApiKey() -> String? { + static func loadTalkProviderApiKey(provider: String) -> String? { + guard let providerId = self.normalizedTalkProviderID(provider) else { return nil } + let account = self.talkProviderApiKeyAccount(providerId: providerId) let value = KeychainStore.loadString( service: self.talkService, - account: self.talkElevenLabsApiKeyAccount)? + account: account)? .trimmingCharacters(in: .whitespacesAndNewlines) if value?.isEmpty == false { return value } + + if providerId == "elevenlabs" { + let legacyValue = KeychainStore.loadString( + service: self.talkService, + account: self.talkElevenLabsApiKeyLegacyAccount)? + .trimmingCharacters(in: .whitespacesAndNewlines) + if legacyValue?.isEmpty == false { + _ = KeychainStore.saveString(legacyValue!, service: self.talkService, account: account) + return legacyValue + } + } + return nil } - static func saveTalkElevenLabsApiKey(_ apiKey: String?) { + static func saveTalkProviderApiKey(_ apiKey: String?, provider: String) { + guard let providerId = self.normalizedTalkProviderID(provider) else { return } + let account = self.talkProviderApiKeyAccount(providerId: providerId) let trimmed = apiKey?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" if trimmed.isEmpty { - _ = KeychainStore.delete(service: self.talkService, account: self.talkElevenLabsApiKeyAccount) + _ = KeychainStore.delete(service: self.talkService, account: account) + if providerId == "elevenlabs" { + _ = KeychainStore.delete(service: self.talkService, account: self.talkElevenLabsApiKeyLegacyAccount) + } return } - _ = KeychainStore.saveString( - trimmed, - service: self.talkService, - account: self.talkElevenLabsApiKeyAccount) + _ = KeychainStore.saveString(trimmed, service: self.talkService, account: account) + if providerId == "elevenlabs" { + _ = KeychainStore.delete(service: self.talkService, account: self.talkElevenLabsApiKeyLegacyAccount) + } + } + + static func loadTalkElevenLabsApiKey() -> String? { + self.loadTalkProviderApiKey(provider: "elevenlabs") + } + + static func saveTalkElevenLabsApiKey(_ apiKey: String?) { + self.saveTalkProviderApiKey(apiKey, provider: "elevenlabs") } static func saveLastGatewayConnectionManual(host: String, port: Int, useTLS: Bool, stableID: String) { @@ -278,6 +306,15 @@ enum GatewaySettingsStore { "gateway-password.\(instanceId)" } + private static func talkProviderApiKeyAccount(providerId: String) -> String { + self.talkProviderApiKeyAccountPrefix + providerId + } + + private static func normalizedTalkProviderID(_ provider: String) -> String? { + let trimmed = provider.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + return trimmed.isEmpty ? nil : trimmed + } + private static func ensureStableInstanceID() { let defaults = UserDefaults.standard diff --git a/apps/ios/Sources/Voice/TalkModeManager.swift b/apps/ios/Sources/Voice/TalkModeManager.swift index 8f208c66d50..4e1a67945f1 100644 --- a/apps/ios/Sources/Voice/TalkModeManager.swift +++ b/apps/ios/Sources/Voice/TalkModeManager.swift @@ -16,6 +16,7 @@ import Speech final class TalkModeManager: NSObject { private typealias SpeechRequest = SFSpeechAudioBufferRecognitionRequest private static let defaultModelIdFallback = "eleven_v3" + private static let defaultTalkProvider = "elevenlabs" private static let redactedConfigSentinel = "__OPENCLAW_REDACTED__" var isEnabled: Bool = false var isListening: Bool = false @@ -1885,6 +1886,46 @@ extension TalkModeManager { return trimmed } + struct TalkProviderConfigSelection { + let provider: String + let config: [String: Any] + let normalizedPayload: Bool + } + + private static func normalizedTalkProviderID(_ raw: String?) -> String? { + let trimmed = (raw ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + return trimmed.isEmpty ? nil : trimmed + } + + static func selectTalkProviderConfig(_ talk: [String: Any]?) -> TalkProviderConfigSelection? { + guard let talk else { return nil } + let rawProvider = talk["provider"] as? String + let rawProviders = talk["providers"] as? [String: Any] + let hasNormalized = rawProvider != nil || rawProviders != nil + if hasNormalized { + let providers = rawProviders ?? [:] + let normalizedProviders = providers.reduce(into: [String: [String: Any]]()) { acc, entry in + guard + let providerID = Self.normalizedTalkProviderID(entry.key), + let config = entry.value as? [String: Any] + else { return } + acc[providerID] = config + } + let providerID = + Self.normalizedTalkProviderID(rawProvider) ?? + normalizedProviders.keys.sorted().first ?? + Self.defaultTalkProvider + return TalkProviderConfigSelection( + provider: providerID, + config: normalizedProviders[providerID] ?? [:], + normalizedPayload: true) + } + return TalkProviderConfigSelection( + provider: Self.defaultTalkProvider, + config: talk, + normalizedPayload: false) + } + func reloadConfig() async { guard let gateway else { return } do { @@ -1892,8 +1933,12 @@ extension TalkModeManager { guard let json = try JSONSerialization.jsonObject(with: res) as? [String: Any] else { return } guard let config = json["config"] as? [String: Any] else { return } let talk = config["talk"] as? [String: Any] - self.defaultVoiceId = (talk?["voiceId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) - if let aliases = talk?["voiceAliases"] as? [String: Any] { + let selection = Self.selectTalkProviderConfig(talk) + let activeProvider = selection?.provider ?? Self.defaultTalkProvider + let activeConfig = selection?.config + self.defaultVoiceId = (activeConfig?["voiceId"] as? String)? + .trimmingCharacters(in: .whitespacesAndNewlines) + if let aliases = activeConfig?["voiceAliases"] as? [String: Any] { var resolved: [String: String] = [:] for (key, value) in aliases { guard let id = value as? String else { continue } @@ -1909,22 +1954,28 @@ extension TalkModeManager { if !self.voiceOverrideActive { self.currentVoiceId = self.defaultVoiceId } - let model = (talk?["modelId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) + let model = (activeConfig?["modelId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) self.defaultModelId = (model?.isEmpty == false) ? model : Self.defaultModelIdFallback if !self.modelOverrideActive { self.currentModelId = self.defaultModelId } - self.defaultOutputFormat = (talk?["outputFormat"] as? String)? + self.defaultOutputFormat = (activeConfig?["outputFormat"] as? String)? .trimmingCharacters(in: .whitespacesAndNewlines) - let rawConfigApiKey = (talk?["apiKey"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) + let rawConfigApiKey = (activeConfig?["apiKey"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) let configApiKey = Self.normalizedTalkApiKey(rawConfigApiKey) - let localApiKey = Self.normalizedTalkApiKey(GatewaySettingsStore.loadTalkElevenLabsApiKey()) + let localApiKey = Self.normalizedTalkApiKey( + GatewaySettingsStore.loadTalkProviderApiKey(provider: activeProvider)) if rawConfigApiKey == Self.redactedConfigSentinel { self.apiKey = (localApiKey?.isEmpty == false) ? localApiKey : nil GatewayDiagnostics.log("talk config apiKey redacted; using local override if present") } else { self.apiKey = (localApiKey?.isEmpty == false) ? localApiKey : configApiKey } + if activeProvider != Self.defaultTalkProvider { + self.apiKey = nil + GatewayDiagnostics.log( + "talk provider '\(activeProvider)' not yet supported on iOS; using system voice fallback") + } self.gatewayTalkDefaultVoiceId = self.defaultVoiceId self.gatewayTalkDefaultModelId = self.defaultModelId self.gatewayTalkApiKeyConfigured = (self.apiKey?.isEmpty == false) @@ -1932,6 +1983,9 @@ extension TalkModeManager { if let interrupt = talk?["interruptOnSpeech"] as? Bool { self.interruptOnSpeech = interrupt } + if selection?.normalizedPayload == true { + GatewayDiagnostics.log("talk config provider=\(activeProvider)") + } } catch { self.defaultModelId = Self.defaultModelIdFallback if !self.modelOverrideActive { diff --git a/apps/ios/Tests/GatewaySettingsStoreTests.swift b/apps/ios/Tests/GatewaySettingsStoreTests.swift index 7e67ab84a97..ec879b3a0f3 100644 --- a/apps/ios/Tests/GatewaySettingsStoreTests.swift +++ b/apps/ios/Tests/GatewaySettingsStoreTests.swift @@ -9,9 +9,15 @@ private struct KeychainEntry: Hashable { private let gatewayService = "ai.openclaw.gateway" private let nodeService = "ai.openclaw.node" +private let talkService = "ai.openclaw.talk" private let instanceIdEntry = KeychainEntry(service: nodeService, account: "instanceId") private let preferredGatewayEntry = KeychainEntry(service: gatewayService, account: "preferredStableID") private let lastGatewayEntry = KeychainEntry(service: gatewayService, account: "lastDiscoveredStableID") +private let talkElevenLabsLegacyEntry = KeychainEntry(service: talkService, account: "elevenlabs.apiKey") +private let talkElevenLabsProviderEntry = KeychainEntry( + service: talkService, + account: "provider.apiKey.elevenlabs") +private let talkAcmeProviderEntry = KeychainEntry(service: talkService, account: "provider.apiKey.acme") private func snapshotDefaults(_ keys: [String]) -> [String: Any?] { let defaults = UserDefaults.standard @@ -196,4 +202,34 @@ private func restoreKeychain(_ snapshot: [KeychainEntry: String?]) { let loaded = GatewaySettingsStore.loadLastGatewayConnection() #expect(loaded == .manual(host: "example.org", port: 18789, useTLS: false, stableID: "manual|example.org|18789")) } + + @Test func talkProviderApiKey_genericRoundTrip() { + let keychainSnapshot = snapshotKeychain([talkAcmeProviderEntry]) + defer { restoreKeychain(keychainSnapshot) } + + _ = KeychainStore.delete(service: talkService, account: talkAcmeProviderEntry.account) + + GatewaySettingsStore.saveTalkProviderApiKey("acme-key", provider: "acme") + #expect(GatewaySettingsStore.loadTalkProviderApiKey(provider: "acme") == "acme-key") + + GatewaySettingsStore.saveTalkProviderApiKey(nil, provider: "acme") + #expect(GatewaySettingsStore.loadTalkProviderApiKey(provider: "acme") == nil) + } + + @Test func talkProviderApiKey_elevenlabsLegacyFallbackMigratesToProviderKey() { + let keychainSnapshot = snapshotKeychain([talkElevenLabsLegacyEntry, talkElevenLabsProviderEntry]) + defer { restoreKeychain(keychainSnapshot) } + + _ = KeychainStore.delete(service: talkService, account: talkElevenLabsProviderEntry.account) + _ = KeychainStore.saveString( + "legacy-eleven-key", + service: talkService, + account: talkElevenLabsLegacyEntry.account) + + let loaded = GatewaySettingsStore.loadTalkProviderApiKey(provider: "elevenlabs") + #expect(loaded == "legacy-eleven-key") + #expect( + KeychainStore.loadString(service: talkService, account: talkElevenLabsProviderEntry.account) + == "legacy-eleven-key") + } } diff --git a/apps/ios/Tests/TalkModeConfigParsingTests.swift b/apps/ios/Tests/TalkModeConfigParsingTests.swift new file mode 100644 index 00000000000..fd5c3d0f392 --- /dev/null +++ b/apps/ios/Tests/TalkModeConfigParsingTests.swift @@ -0,0 +1,34 @@ +import Testing +@testable import OpenClaw + +@Suite struct TalkModeConfigParsingTests { + @Test func prefersNormalizedTalkProviderPayload() async { + let talk: [String: Any] = [ + "provider": "elevenlabs", + "providers": [ + "elevenlabs": [ + "voiceId": "voice-normalized", + ], + ], + "voiceId": "voice-legacy", + ] + + let selection = await MainActor.run { TalkModeManager.selectTalkProviderConfig(talk) } + #expect(selection?.provider == "elevenlabs") + #expect(selection?.normalizedPayload == true) + #expect(selection?.config["voiceId"] as? String == "voice-normalized") + } + + @Test func fallsBackToLegacyTalkFieldsWhenNormalizedPayloadMissing() async { + let talk: [String: Any] = [ + "voiceId": "voice-legacy", + "apiKey": "legacy-key", + ] + + let selection = await MainActor.run { TalkModeManager.selectTalkProviderConfig(talk) } + #expect(selection?.provider == "elevenlabs") + #expect(selection?.normalizedPayload == false) + #expect(selection?.config["voiceId"] as? String == "voice-legacy") + #expect(selection?.config["apiKey"] as? String == "legacy-key") + } +} diff --git a/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift b/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift index 47b041a5873..443bc192295 100644 --- a/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift +++ b/apps/macos/Sources/OpenClaw/TalkModeRuntime.swift @@ -11,6 +11,7 @@ actor TalkModeRuntime { private let logger = Logger(subsystem: "ai.openclaw", category: "talk.runtime") private let ttsLogger = Logger(subsystem: "ai.openclaw", category: "talk.tts") private static let defaultModelIdFallback = "eleven_v3" + private static let defaultTalkProvider = "elevenlabs" private final class RMSMeter: @unchecked Sendable { private let lock = NSLock() @@ -792,6 +793,48 @@ extension TalkModeRuntime { let apiKey: String? } + struct TalkProviderConfigSelection { + let provider: String + let config: [String: AnyCodable] + let normalizedPayload: Bool + } + + private static func normalizedTalkProviderID(_ raw: String?) -> String? { + let trimmed = raw?.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() ?? "" + return trimmed.isEmpty ? nil : trimmed + } + + static func selectTalkProviderConfig( + _ talk: [String: AnyCodable]?) -> TalkProviderConfigSelection? + { + guard let talk else { return nil } + let rawProvider = talk["provider"]?.stringValue + let rawProviders = talk["providers"]?.dictionaryValue + let hasNormalizedPayload = rawProvider != nil || rawProviders != nil + if hasNormalizedPayload { + let normalizedProviders = + rawProviders?.reduce(into: [String: [String: AnyCodable]]()) { acc, entry in + guard + let providerID = Self.normalizedTalkProviderID(entry.key), + let providerConfig = entry.value.dictionaryValue + else { return } + acc[providerID] = providerConfig + } ?? [:] + let providerID = + Self.normalizedTalkProviderID(rawProvider) ?? + normalizedProviders.keys.sorted().first ?? + Self.defaultTalkProvider + return TalkProviderConfigSelection( + provider: providerID, + config: normalizedProviders[providerID] ?? [:], + normalizedPayload: true) + } + return TalkProviderConfigSelection( + provider: Self.defaultTalkProvider, + config: talk, + normalizedPayload: false) + } + private func fetchTalkConfig() async -> TalkRuntimeConfig { let env = ProcessInfo.processInfo.environment let envVoice = env["ELEVENLABS_VOICE_ID"]?.trimmingCharacters(in: .whitespacesAndNewlines) @@ -804,13 +847,16 @@ extension TalkModeRuntime { params: ["includeSecrets": AnyCodable(true)], timeoutMs: 8000) let talk = snap.config?["talk"]?.dictionaryValue + let selection = Self.selectTalkProviderConfig(talk) + let activeProvider = selection?.provider ?? Self.defaultTalkProvider + let activeConfig = selection?.config let ui = snap.config?["ui"]?.dictionaryValue let rawSeam = ui?["seamColor"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" await MainActor.run { AppStateStore.shared.seamColorHex = rawSeam.isEmpty ? nil : rawSeam } - let voice = talk?["voiceId"]?.stringValue - let rawAliases = talk?["voiceAliases"]?.dictionaryValue + let voice = activeConfig?["voiceId"]?.stringValue + let rawAliases = activeConfig?["voiceAliases"]?.dictionaryValue let resolvedAliases: [String: String] = rawAliases?.reduce(into: [:]) { acc, entry in let key = entry.key.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() @@ -818,18 +864,30 @@ extension TalkModeRuntime { guard !key.isEmpty, !value.isEmpty else { return } acc[key] = value } ?? [:] - let model = talk?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) + let model = activeConfig?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) let resolvedModel = (model?.isEmpty == false) ? model! : Self.defaultModelIdFallback - let outputFormat = talk?["outputFormat"]?.stringValue + let outputFormat = activeConfig?["outputFormat"]?.stringValue let interrupt = talk?["interruptOnSpeech"]?.boolValue - let apiKey = talk?["apiKey"]?.stringValue - let resolvedVoice = + let apiKey = activeConfig?["apiKey"]?.stringValue + let resolvedVoice: String? = if activeProvider == Self.defaultTalkProvider { (voice?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? voice : nil) ?? (envVoice?.isEmpty == false ? envVoice : nil) ?? (sagVoice?.isEmpty == false ? sagVoice : nil) - let resolvedApiKey = + } else { + (voice?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? voice : nil) + } + let resolvedApiKey: String? = if activeProvider == Self.defaultTalkProvider { (envApiKey?.isEmpty == false ? envApiKey : nil) ?? (apiKey?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? apiKey : nil) + } else { + nil + } + if activeProvider != Self.defaultTalkProvider { + self.ttsLogger + .info("talk provider \(activeProvider, privacy: .public) unsupported; using system voice") + } else if selection?.normalizedPayload == true { + self.ttsLogger.info("talk config provider elevenlabs") + } return TalkRuntimeConfig( voiceId: resolvedVoice, voiceAliases: resolvedAliases, diff --git a/apps/macos/Tests/OpenClawIPCTests/TalkModeConfigParsingTests.swift b/apps/macos/Tests/OpenClawIPCTests/TalkModeConfigParsingTests.swift new file mode 100644 index 00000000000..5ee30af273d --- /dev/null +++ b/apps/macos/Tests/OpenClawIPCTests/TalkModeConfigParsingTests.swift @@ -0,0 +1,36 @@ +import OpenClawProtocol +import Testing + +@testable import OpenClaw + +@Suite struct TalkModeConfigParsingTests { + @Test func prefersNormalizedTalkProviderPayload() { + let talk: [String: AnyCodable] = [ + "provider": AnyCodable("elevenlabs"), + "providers": AnyCodable([ + "elevenlabs": [ + "voiceId": "voice-normalized", + ], + ]), + "voiceId": AnyCodable("voice-legacy"), + ] + + let selection = TalkModeRuntime.selectTalkProviderConfig(talk) + #expect(selection?.provider == "elevenlabs") + #expect(selection?.normalizedPayload == true) + #expect(selection?.config["voiceId"]?.stringValue == "voice-normalized") + } + + @Test func fallsBackToLegacyTalkFieldsWhenNormalizedPayloadMissing() { + let talk: [String: AnyCodable] = [ + "voiceId": AnyCodable("voice-legacy"), + "apiKey": AnyCodable("legacy-key"), + ] + + let selection = TalkModeRuntime.selectTalkProviderConfig(talk) + #expect(selection?.provider == "elevenlabs") + #expect(selection?.normalizedPayload == false) + #expect(selection?.config["voiceId"]?.stringValue == "voice-legacy") + #expect(selection?.config["apiKey"]?.stringValue == "legacy-key") + } +} diff --git a/src/config/defaults.ts b/src/config/defaults.ts index 0d281c36566..7c652e6c319 100644 --- a/src/config/defaults.ts +++ b/src/config/defaults.ts @@ -2,7 +2,12 @@ import { DEFAULT_CONTEXT_TOKENS } from "../agents/defaults.js"; import { normalizeProviderId, parseModelRef } from "../agents/model-selection.js"; import { DEFAULT_AGENT_MAX_CONCURRENT, DEFAULT_SUBAGENT_MAX_CONCURRENT } from "./agent-limits.js"; import { resolveAgentModelPrimaryValue } from "./model-input.js"; -import { resolveTalkApiKey } from "./talk.js"; +import { + DEFAULT_TALK_PROVIDER, + normalizeTalkConfig, + resolveActiveTalkProviderConfig, + resolveTalkApiKey, +} from "./talk.js"; import type { OpenClawConfig } from "./types.js"; import type { ModelDefinitionConfig } from "./types.models.js"; @@ -163,21 +168,46 @@ export function applySessionDefaults( } export function applyTalkApiKey(config: OpenClawConfig): OpenClawConfig { + const normalized = normalizeTalkConfig(config); const resolved = resolveTalkApiKey(); if (!resolved) { - return config; + return normalized; } - const existing = config.talk?.apiKey?.trim(); - if (existing) { - return config; + + const talk = normalized.talk; + const active = resolveActiveTalkProviderConfig(talk); + if (active.provider && active.provider !== DEFAULT_TALK_PROVIDER) { + return normalized; } - return { - ...config, - talk: { - ...config.talk, - apiKey: resolved, - }, + + const existingProviderApiKey = + typeof active.config?.apiKey === "string" ? active.config.apiKey.trim() : ""; + const existingLegacyApiKey = typeof talk?.apiKey === "string" ? talk.apiKey.trim() : ""; + if (existingProviderApiKey || existingLegacyApiKey) { + return normalized; + } + + const providerId = active.provider ?? DEFAULT_TALK_PROVIDER; + const providers = { ...talk?.providers }; + const providerConfig = { ...providers[providerId], apiKey: resolved }; + providers[providerId] = providerConfig; + + const nextTalk = { + ...talk, + provider: talk?.provider ?? providerId, + providers, + // Keep legacy shape populated during compatibility rollout. + apiKey: resolved, }; + + return { + ...normalized, + talk: nextTalk, + }; +} + +export function applyTalkConfigNormalization(config: OpenClawConfig): OpenClawConfig { + return normalizeTalkConfig(config); } export function applyModelDefaults(cfg: OpenClawConfig): OpenClawConfig { diff --git a/src/config/io.ts b/src/config/io.ts index 01e691f1e60..c74992c4938 100644 --- a/src/config/io.ts +++ b/src/config/io.ts @@ -24,6 +24,7 @@ import { applyMessageDefaults, applyModelDefaults, applySessionDefaults, + applyTalkConfigNormalization, applyTalkApiKey, } from "./defaults.js"; import { restoreEnvVarRefs } from "./env-preserve.js"; @@ -720,11 +721,13 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) { deps.logger.warn(`Config warnings:\\n${details}`); } warnIfConfigFromFuture(validated.config, deps.logger); - const cfg = applyModelDefaults( - applyCompactionDefaults( - applyContextPruningDefaults( - applyAgentDefaults( - applySessionDefaults(applyLoggingDefaults(applyMessageDefaults(validated.config))), + const cfg = applyTalkConfigNormalization( + applyModelDefaults( + applyCompactionDefaults( + applyContextPruningDefaults( + applyAgentDefaults( + applySessionDefaults(applyLoggingDefaults(applyMessageDefaults(validated.config))), + ), ), ), ), @@ -809,10 +812,12 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) { if (!exists) { const hash = hashConfigRaw(null); const config = applyTalkApiKey( - applyModelDefaults( - applyCompactionDefaults( - applyContextPruningDefaults( - applyAgentDefaults(applySessionDefaults(applyMessageDefaults({}))), + applyTalkConfigNormalization( + applyModelDefaults( + applyCompactionDefaults( + applyContextPruningDefaults( + applyAgentDefaults(applySessionDefaults(applyMessageDefaults({}))), + ), ), ), ), @@ -933,9 +938,11 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) { warnIfConfigFromFuture(validated.config, deps.logger); const snapshotConfig = normalizeConfigPaths( applyTalkApiKey( - applyModelDefaults( - applyAgentDefaults( - applySessionDefaults(applyLoggingDefaults(applyMessageDefaults(validated.config))), + applyTalkConfigNormalization( + applyModelDefaults( + applyAgentDefaults( + applySessionDefaults(applyLoggingDefaults(applyMessageDefaults(validated.config))), + ), ), ), ), diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 8beedf5c78f..8bc07121e3d 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -133,14 +133,24 @@ export const FIELD_HELP: Record = { "gateway.remote.sshTarget": "Remote gateway over SSH (tunnels the gateway port to localhost). Format: user@host or user@host:port.", "gateway.remote.sshIdentity": "Optional SSH identity file path (passed to ssh -i).", + "talk.provider": 'Active Talk provider id (for example "elevenlabs").', + "talk.providers": + "Provider-specific Talk settings keyed by provider id. During migration, prefer this over legacy talk.* keys.", + "talk.providers.*.voiceId": "Provider default voice ID for Talk mode.", + "talk.providers.*.voiceAliases": "Optional provider voice alias map for Talk directives.", + "talk.providers.*.modelId": "Provider default model ID for Talk mode.", + "talk.providers.*.outputFormat": "Provider default output format for Talk mode.", + "talk.providers.*.apiKey": "Provider API key for Talk mode.", "talk.voiceId": - "Default ElevenLabs voice ID for Talk mode (iOS/macOS/Android). Falls back to ELEVENLABS_VOICE_ID or SAG_VOICE_ID when unset.", + "Legacy ElevenLabs default voice ID for Talk mode. Prefer talk.providers.elevenlabs.voiceId.", "talk.voiceAliases": - 'Optional map of friendly names to ElevenLabs voice IDs for Talk directives (for example {"Clawd":"EXAVITQu4vr4xnSDxMaL"}).', - "talk.modelId": "Default ElevenLabs model ID for Talk mode (default: eleven_v3).", + 'Legacy ElevenLabs voice alias map (for example {"Clawd":"EXAVITQu4vr4xnSDxMaL"}). Prefer talk.providers.elevenlabs.voiceAliases.', + "talk.modelId": + "Legacy ElevenLabs model ID for Talk mode (default: eleven_v3). Prefer talk.providers.elevenlabs.modelId.", "talk.outputFormat": - "Default ElevenLabs output format for Talk mode (for example pcm_44100 or mp3_44100_128).", - "talk.apiKey": "ElevenLabs API key for Talk mode. Falls back to ELEVENLABS_API_KEY when unset.", + "Legacy ElevenLabs output format for Talk mode (for example pcm_44100 or mp3_44100_128). Prefer talk.providers.elevenlabs.outputFormat.", + "talk.apiKey": + "Legacy ElevenLabs API key for Talk mode. Prefer talk.providers.elevenlabs.apiKey (fallback: ELEVENLABS_API_KEY).", "talk.interruptOnSpeech": "If true (default), stop assistant speech when the user starts speaking in Talk mode.", "agents.list.*.skills": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 986f3c4b3aa..397376f6e11 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -600,6 +600,13 @@ export const FIELD_LABELS: Record = { "messages.inbound.debounceMs": "Inbound Message Debounce (ms)", "messages.inbound.byChannel": "Inbound Debounce by Channel (ms)", "messages.tts": "Message Text-to-Speech", + "talk.provider": "Talk Active Provider", + "talk.providers": "Talk Provider Settings", + "talk.providers.*.voiceId": "Talk Provider Voice ID", + "talk.providers.*.voiceAliases": "Talk Provider Voice Aliases", + "talk.providers.*.modelId": "Talk Provider Model ID", + "talk.providers.*.outputFormat": "Talk Provider Output Format", + "talk.providers.*.apiKey": "Talk Provider API Key", "talk.apiKey": "Talk API Key", channels: "Channels", "channels.defaults": "Channel Defaults", diff --git a/src/config/talk.normalize.test.ts b/src/config/talk.normalize.test.ts new file mode 100644 index 00000000000..a61af099bf3 --- /dev/null +++ b/src/config/talk.normalize.test.ts @@ -0,0 +1,150 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { describe, expect, it } from "vitest"; +import { createConfigIO } from "./io.js"; +import { normalizeTalkSection } from "./talk.js"; + +async function withTempConfig( + config: unknown, + run: (configPath: string) => Promise, +): Promise { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-talk-")); + const configPath = path.join(dir, "openclaw.json"); + await fs.writeFile(configPath, JSON.stringify(config, null, 2)); + try { + await run(configPath); + } finally { + await fs.rm(dir, { recursive: true, force: true }); + } +} + +async function withEnv( + updates: Record, + run: () => Promise, +): Promise { + const previous = new Map(); + for (const [key, value] of Object.entries(updates)) { + previous.set(key, process.env[key]); + if (value === undefined) { + delete process.env[key]; + } else { + process.env[key] = value; + } + } + + try { + await run(); + } finally { + for (const [key, value] of previous.entries()) { + if (value === undefined) { + delete process.env[key]; + } else { + process.env[key] = value; + } + } + } +} + +describe("talk normalization", () => { + it("maps legacy ElevenLabs fields into provider/providers", () => { + const normalized = normalizeTalkSection({ + voiceId: "voice-123", + voiceAliases: { Clawd: "EXAVITQu4vr4xnSDxMaL" }, + modelId: "eleven_v3", + outputFormat: "pcm_44100", + apiKey: "secret-key", + interruptOnSpeech: false, + }); + + expect(normalized).toEqual({ + provider: "elevenlabs", + providers: { + elevenlabs: { + voiceId: "voice-123", + voiceAliases: { Clawd: "EXAVITQu4vr4xnSDxMaL" }, + modelId: "eleven_v3", + outputFormat: "pcm_44100", + apiKey: "secret-key", + }, + }, + voiceId: "voice-123", + voiceAliases: { Clawd: "EXAVITQu4vr4xnSDxMaL" }, + modelId: "eleven_v3", + outputFormat: "pcm_44100", + apiKey: "secret-key", + interruptOnSpeech: false, + }); + }); + + it("uses new provider/providers shape directly when present", () => { + const normalized = normalizeTalkSection({ + provider: "acme", + providers: { + acme: { + voiceId: "acme-voice", + custom: true, + }, + }, + voiceId: "legacy-voice", + interruptOnSpeech: true, + }); + + expect(normalized).toEqual({ + provider: "acme", + providers: { + acme: { + voiceId: "acme-voice", + custom: true, + }, + }, + voiceId: "legacy-voice", + interruptOnSpeech: true, + }); + }); + + it("merges ELEVENLABS_API_KEY into normalized defaults for legacy configs", async () => { + await withEnv({ ELEVENLABS_API_KEY: "env-eleven-key" }, async () => { + await withTempConfig( + { + talk: { + voiceId: "voice-123", + }, + }, + async (configPath) => { + const io = createConfigIO({ configPath }); + const snapshot = await io.readConfigFileSnapshot(); + expect(snapshot.config.talk?.provider).toBe("elevenlabs"); + expect(snapshot.config.talk?.providers?.elevenlabs?.voiceId).toBe("voice-123"); + expect(snapshot.config.talk?.providers?.elevenlabs?.apiKey).toBe("env-eleven-key"); + expect(snapshot.config.talk?.apiKey).toBe("env-eleven-key"); + }, + ); + }); + }); + + it("does not apply ELEVENLABS_API_KEY when active provider is not elevenlabs", async () => { + await withEnv({ ELEVENLABS_API_KEY: "env-eleven-key" }, async () => { + await withTempConfig( + { + talk: { + provider: "acme", + providers: { + acme: { + voiceId: "acme-voice", + }, + }, + }, + }, + async (configPath) => { + const io = createConfigIO({ configPath }); + const snapshot = await io.readConfigFileSnapshot(); + expect(snapshot.config.talk?.provider).toBe("acme"); + expect(snapshot.config.talk?.providers?.acme?.voiceId).toBe("acme-voice"); + expect(snapshot.config.talk?.providers?.acme?.apiKey).toBeUndefined(); + expect(snapshot.config.talk?.apiKey).toBeUndefined(); + }, + ); + }); + }); +}); diff --git a/src/config/talk.ts b/src/config/talk.ts index f7856dc6796..e8de2e39801 100644 --- a/src/config/talk.ts +++ b/src/config/talk.ts @@ -1,6 +1,8 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; +import type { TalkConfig, TalkProviderConfig } from "./types.gateway.js"; +import type { OpenClawConfig } from "./types.js"; type TalkApiKeyDeps = { fs?: typeof fs; @@ -8,6 +10,266 @@ type TalkApiKeyDeps = { path?: typeof path; }; +export const DEFAULT_TALK_PROVIDER = "elevenlabs"; + +function isPlainObject(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function normalizeString(value: unknown): string | undefined { + if (typeof value !== "string") { + return undefined; + } + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : undefined; +} + +function normalizeVoiceAliases(value: unknown): Record | undefined { + if (!isPlainObject(value)) { + return undefined; + } + const aliases: Record = {}; + for (const [alias, rawId] of Object.entries(value)) { + if (typeof rawId !== "string") { + continue; + } + aliases[alias] = rawId; + } + return Object.keys(aliases).length > 0 ? aliases : undefined; +} + +function normalizeTalkProviderConfig(value: unknown): TalkProviderConfig | undefined { + if (!isPlainObject(value)) { + return undefined; + } + + const provider: TalkProviderConfig = {}; + for (const [key, raw] of Object.entries(value)) { + if (raw === undefined) { + continue; + } + if (key === "voiceAliases") { + const aliases = normalizeVoiceAliases(raw); + if (aliases) { + provider.voiceAliases = aliases; + } + continue; + } + if (key === "voiceId" || key === "modelId" || key === "outputFormat" || key === "apiKey") { + const normalized = normalizeString(raw); + if (normalized) { + provider[key] = normalized; + } + continue; + } + provider[key] = raw; + } + + return Object.keys(provider).length > 0 ? provider : undefined; +} + +function normalizeTalkProviders(value: unknown): Record | undefined { + if (!isPlainObject(value)) { + return undefined; + } + const providers: Record = {}; + for (const [rawProviderId, providerConfig] of Object.entries(value)) { + const providerId = normalizeString(rawProviderId); + if (!providerId) { + continue; + } + const normalizedProvider = normalizeTalkProviderConfig(providerConfig); + if (!normalizedProvider) { + continue; + } + providers[providerId] = normalizedProvider; + } + return Object.keys(providers).length > 0 ? providers : undefined; +} + +function normalizedLegacyTalkFields(source: Record): Partial { + const legacy: Partial = {}; + const voiceId = normalizeString(source.voiceId); + if (voiceId) { + legacy.voiceId = voiceId; + } + const voiceAliases = normalizeVoiceAliases(source.voiceAliases); + if (voiceAliases) { + legacy.voiceAliases = voiceAliases; + } + const modelId = normalizeString(source.modelId); + if (modelId) { + legacy.modelId = modelId; + } + const outputFormat = normalizeString(source.outputFormat); + if (outputFormat) { + legacy.outputFormat = outputFormat; + } + const apiKey = normalizeString(source.apiKey); + if (apiKey) { + legacy.apiKey = apiKey; + } + return legacy; +} + +function legacyProviderConfigFromTalk( + source: Record, +): TalkProviderConfig | undefined { + return normalizeTalkProviderConfig({ + voiceId: source.voiceId, + voiceAliases: source.voiceAliases, + modelId: source.modelId, + outputFormat: source.outputFormat, + apiKey: source.apiKey, + }); +} + +function activeProviderFromTalk(talk: TalkConfig): string | undefined { + const provider = normalizeString(talk.provider); + if (provider) { + return provider; + } + const providerIds = talk.providers ? Object.keys(talk.providers) : []; + return providerIds.length === 1 ? providerIds[0] : undefined; +} + +function legacyTalkFieldsFromProviderConfig( + config: TalkProviderConfig | undefined, +): Partial { + if (!config) { + return {}; + } + const legacy: Partial = {}; + if (typeof config.voiceId === "string") { + legacy.voiceId = config.voiceId; + } + if ( + config.voiceAliases && + typeof config.voiceAliases === "object" && + !Array.isArray(config.voiceAliases) + ) { + const aliases = normalizeVoiceAliases(config.voiceAliases); + if (aliases) { + legacy.voiceAliases = aliases; + } + } + if (typeof config.modelId === "string") { + legacy.modelId = config.modelId; + } + if (typeof config.outputFormat === "string") { + legacy.outputFormat = config.outputFormat; + } + if (typeof config.apiKey === "string") { + legacy.apiKey = config.apiKey; + } + return legacy; +} + +export function normalizeTalkSection(value: TalkConfig | undefined): TalkConfig | undefined { + if (!isPlainObject(value)) { + return undefined; + } + + const source = value as Record; + const hasNormalizedShape = typeof source.provider === "string" || isPlainObject(source.providers); + const normalized: TalkConfig = {}; + const legacy = normalizedLegacyTalkFields(source); + if (Object.keys(legacy).length > 0) { + Object.assign(normalized, legacy); + } + if (typeof source.interruptOnSpeech === "boolean") { + normalized.interruptOnSpeech = source.interruptOnSpeech; + } + + if (hasNormalizedShape) { + const providers = normalizeTalkProviders(source.providers); + const provider = normalizeString(source.provider); + if (providers) { + normalized.providers = providers; + } + if (provider) { + normalized.provider = provider; + } else if (providers) { + const ids = Object.keys(providers); + if (ids.length === 1) { + normalized.provider = ids[0]; + } + } + return Object.keys(normalized).length > 0 ? normalized : undefined; + } + + const legacyProviderConfig = legacyProviderConfigFromTalk(source); + if (legacyProviderConfig) { + normalized.provider = DEFAULT_TALK_PROVIDER; + normalized.providers = { [DEFAULT_TALK_PROVIDER]: legacyProviderConfig }; + } + return Object.keys(normalized).length > 0 ? normalized : undefined; +} + +export function normalizeTalkConfig(config: OpenClawConfig): OpenClawConfig { + if (!config.talk) { + return config; + } + const normalizedTalk = normalizeTalkSection(config.talk); + if (!normalizedTalk) { + return config; + } + return { + ...config, + talk: normalizedTalk, + }; +} + +export function resolveActiveTalkProviderConfig(talk: TalkConfig | undefined): { + provider?: string; + config?: TalkProviderConfig; +} { + const normalizedTalk = normalizeTalkSection(talk); + if (!normalizedTalk) { + return {}; + } + const provider = activeProviderFromTalk(normalizedTalk); + if (!provider) { + return {}; + } + return { + provider, + config: normalizedTalk.providers?.[provider], + }; +} + +export function buildTalkConfigResponse(value: unknown): TalkConfig | undefined { + if (!isPlainObject(value)) { + return undefined; + } + const normalized = normalizeTalkSection(value as TalkConfig); + if (!normalized) { + return undefined; + } + + const payload: TalkConfig = {}; + if (typeof normalized.interruptOnSpeech === "boolean") { + payload.interruptOnSpeech = normalized.interruptOnSpeech; + } + if (normalized.providers && Object.keys(normalized.providers).length > 0) { + payload.providers = normalized.providers; + } + if (typeof normalized.provider === "string") { + payload.provider = normalized.provider; + } + + const activeProvider = activeProviderFromTalk(normalized); + const providerConfig = activeProvider ? normalized.providers?.[activeProvider] : undefined; + const providerCompatibilityLegacy = legacyTalkFieldsFromProviderConfig(providerConfig); + const compatibilityLegacy = + Object.keys(providerCompatibilityLegacy).length > 0 + ? providerCompatibilityLegacy + : normalizedLegacyTalkFields(normalized as unknown as Record); + Object.assign(payload, compatibilityLegacy); + + return Object.keys(payload).length > 0 ? payload : undefined; +} + export function readTalkApiKeyFromProfile(deps: TalkApiKeyDeps = {}): string | null { const fsImpl = deps.fs ?? fs; const osImpl = deps.os ?? os; diff --git a/src/config/types.gateway.ts b/src/config/types.gateway.ts index 5a18da09678..5e644db40eb 100644 --- a/src/config/types.gateway.ts +++ b/src/config/types.gateway.ts @@ -46,19 +46,38 @@ export type CanvasHostConfig = { liveReload?: boolean; }; -export type TalkConfig = { - /** Default ElevenLabs voice ID for Talk mode. */ +export type TalkProviderConfig = { + /** Default voice ID for the provider's Talk mode implementation. */ voiceId?: string; - /** Optional voice name -> ElevenLabs voice ID map. */ + /** Optional voice name -> provider voice ID map. */ voiceAliases?: Record; - /** Default ElevenLabs model ID for Talk mode. */ + /** Default provider model ID for Talk mode. */ modelId?: string; - /** Default ElevenLabs output format (e.g. mp3_44100_128). */ + /** Default provider output format (for example pcm_44100). */ outputFormat?: string; - /** ElevenLabs API key (optional; falls back to ELEVENLABS_API_KEY). */ + /** Provider API key (optional; provider-specific env fallback may apply). */ apiKey?: string; + /** Provider-specific extensions. */ + [key: string]: unknown; +}; + +export type TalkConfig = { + /** Active Talk TTS provider (for example "elevenlabs"). */ + provider?: string; + /** Provider-specific Talk config keyed by provider id. */ + providers?: Record; /** Stop speaking when user starts talking (default: true). */ interruptOnSpeech?: boolean; + + /** + * Legacy ElevenLabs compatibility fields. + * Kept during rollout while older clients migrate to provider/providers. + */ + voiceId?: string; + voiceAliases?: Record; + modelId?: string; + outputFormat?: string; + apiKey?: string; }; export type GatewayControlUiConfig = { diff --git a/src/config/zod-schema.ts b/src/config/zod-schema.ts index dd6b1b1c1d0..6ea3bd00287 100644 --- a/src/config/zod-schema.ts +++ b/src/config/zod-schema.ts @@ -439,6 +439,21 @@ export const OpenClawSchema = z .optional(), talk: z .object({ + provider: z.string().optional(), + providers: z + .record( + z.string(), + z + .object({ + voiceId: z.string().optional(), + voiceAliases: z.record(z.string(), z.string()).optional(), + modelId: z.string().optional(), + outputFormat: z.string().optional(), + apiKey: z.string().optional().register(sensitive), + }) + .catchall(z.unknown()), + ) + .optional(), voiceId: z.string().optional(), voiceAliases: z.record(z.string(), z.string()).optional(), modelId: z.string().optional(), diff --git a/src/gateway/protocol/schema/channels.ts b/src/gateway/protocol/schema/channels.ts index 7d864209888..51f5194cc83 100644 --- a/src/gateway/protocol/schema/channels.ts +++ b/src/gateway/protocol/schema/channels.ts @@ -16,6 +16,17 @@ export const TalkConfigParamsSchema = Type.Object( { additionalProperties: false }, ); +const TalkProviderConfigSchema = Type.Object( + { + voiceId: Type.Optional(Type.String()), + voiceAliases: Type.Optional(Type.Record(Type.String(), Type.String())), + modelId: Type.Optional(Type.String()), + outputFormat: Type.Optional(Type.String()), + apiKey: Type.Optional(Type.String()), + }, + { additionalProperties: true }, +); + export const TalkConfigResultSchema = Type.Object( { config: Type.Object( @@ -23,6 +34,8 @@ export const TalkConfigResultSchema = Type.Object( talk: Type.Optional( Type.Object( { + provider: Type.Optional(Type.String()), + providers: Type.Optional(Type.Record(Type.String(), TalkProviderConfigSchema)), voiceId: Type.Optional(Type.String()), voiceAliases: Type.Optional(Type.Record(Type.String(), Type.String())), modelId: Type.Optional(Type.String()), diff --git a/src/gateway/server-methods/talk.ts b/src/gateway/server-methods/talk.ts index 760f4cc9310..693f3447537 100644 --- a/src/gateway/server-methods/talk.ts +++ b/src/gateway/server-methods/talk.ts @@ -1,5 +1,6 @@ import { readConfigFileSnapshot } from "../../config/config.js"; import { redactConfigObject } from "../../config/redact-snapshot.js"; +import { buildTalkConfigResponse } from "../../config/talk.js"; import { ErrorCodes, errorShape, @@ -17,46 +18,6 @@ function canReadTalkSecrets(client: { connect?: { scopes?: string[] } } | null): return scopes.includes(ADMIN_SCOPE) || scopes.includes(TALK_SECRETS_SCOPE); } -function normalizeTalkConfigSection(value: unknown): Record | undefined { - if (!value || typeof value !== "object" || Array.isArray(value)) { - return undefined; - } - const source = value as Record; - const talk: Record = {}; - if (typeof source.voiceId === "string") { - talk.voiceId = source.voiceId; - } - if ( - source.voiceAliases && - typeof source.voiceAliases === "object" && - !Array.isArray(source.voiceAliases) - ) { - const aliases: Record = {}; - for (const [alias, id] of Object.entries(source.voiceAliases as Record)) { - if (typeof id !== "string") { - continue; - } - aliases[alias] = id; - } - if (Object.keys(aliases).length > 0) { - talk.voiceAliases = aliases; - } - } - if (typeof source.modelId === "string") { - talk.modelId = source.modelId; - } - if (typeof source.outputFormat === "string") { - talk.outputFormat = source.outputFormat; - } - if (typeof source.apiKey === "string") { - talk.apiKey = source.apiKey; - } - if (typeof source.interruptOnSpeech === "boolean") { - talk.interruptOnSpeech = source.interruptOnSpeech; - } - return Object.keys(talk).length > 0 ? talk : undefined; -} - export const talkHandlers: GatewayRequestHandlers = { "talk.config": async ({ params, respond, client }) => { if (!validateTalkConfigParams(params)) { @@ -87,7 +48,7 @@ export const talkHandlers: GatewayRequestHandlers = { const talkSource = includeSecrets ? snapshot.config.talk : redactConfigObject(snapshot.config.talk); - const talk = normalizeTalkConfigSection(talkSource); + const talk = buildTalkConfigResponse(talkSource); if (talk) { configPayload.talk = talk; } diff --git a/src/gateway/server.talk-config.test.ts b/src/gateway/server.talk-config.test.ts index 856e54ecebd..107d8a83263 100644 --- a/src/gateway/server.talk-config.test.ts +++ b/src/gateway/server.talk-config.test.ts @@ -79,12 +79,24 @@ describe("gateway talk.config", () => { await withServer(async (ws) => { await connectOperator(ws, ["operator.read"]); - const res = await rpcReq<{ config?: { talk?: { apiKey?: string; voiceId?: string } } }>( - ws, - "talk.config", - {}, - ); + const res = await rpcReq<{ + config?: { + talk?: { + provider?: string; + providers?: { + elevenlabs?: { voiceId?: string; apiKey?: string }; + }; + apiKey?: string; + voiceId?: string; + }; + }; + }>(ws, "talk.config", {}); expect(res.ok).toBe(true); + expect(res.payload?.config?.talk?.provider).toBe("elevenlabs"); + expect(res.payload?.config?.talk?.providers?.elevenlabs?.voiceId).toBe("voice-123"); + expect(res.payload?.config?.talk?.providers?.elevenlabs?.apiKey).toBe( + "__OPENCLAW_REDACTED__", + ); expect(res.payload?.config?.talk?.voiceId).toBe("voice-123"); expect(res.payload?.config?.talk?.apiKey).toBe("__OPENCLAW_REDACTED__"); }); @@ -113,4 +125,38 @@ describe("gateway talk.config", () => { expect(res.payload?.config?.talk?.apiKey).toBe("secret-key-abc"); }); }); + + it("prefers normalized provider payload over conflicting legacy talk keys", async () => { + const { writeConfigFile } = await import("../config/config.js"); + await writeConfigFile({ + talk: { + provider: "elevenlabs", + providers: { + elevenlabs: { + voiceId: "voice-normalized", + }, + }, + voiceId: "voice-legacy", + }, + }); + + await withServer(async (ws) => { + await connectOperator(ws, ["operator.read"]); + const res = await rpcReq<{ + config?: { + talk?: { + provider?: string; + providers?: { + elevenlabs?: { voiceId?: string }; + }; + voiceId?: string; + }; + }; + }>(ws, "talk.config", {}); + expect(res.ok).toBe(true); + expect(res.payload?.config?.talk?.provider).toBe("elevenlabs"); + expect(res.payload?.config?.talk?.providers?.elevenlabs?.voiceId).toBe("voice-normalized"); + expect(res.payload?.config?.talk?.voiceId).toBe("voice-normalized"); + }); + }); });