mirror of
https://github.com/moltbot/moltbot.git
synced 2026-03-08 06:54:24 +00:00
feat(talk): add provider-agnostic config with legacy compatibility
This commit is contained in:
committed by
Peter Steinberger
parent
d1f28c954e
commit
d58f71571a
@@ -54,6 +54,47 @@ class TalkModeManager(
|
||||
private const val tag = "TalkMode"
|
||||
private const val defaultModelIdFallback = "eleven_v3"
|
||||
private const val defaultOutputFormatFallback = "pcm_24000"
|
||||
private const val defaultTalkProvider = "elevenlabs"
|
||||
|
||||
internal data class TalkProviderConfigSelection(
|
||||
val provider: String,
|
||||
val config: JsonObject,
|
||||
val normalizedPayload: Boolean,
|
||||
)
|
||||
|
||||
private fun normalizeTalkProviderId(raw: String?): String? {
|
||||
val trimmed = raw?.trim()?.lowercase().orEmpty()
|
||||
return trimmed.takeIf { it.isNotEmpty() }
|
||||
}
|
||||
|
||||
internal fun selectTalkProviderConfig(talk: JsonObject?): TalkProviderConfigSelection? {
|
||||
if (talk == null) return null
|
||||
val rawProvider = talk["provider"].asStringOrNull()
|
||||
val rawProviders = talk["providers"].asObjectOrNull()
|
||||
val hasNormalizedPayload = rawProvider != null || rawProviders != null
|
||||
if (hasNormalizedPayload) {
|
||||
val providers =
|
||||
rawProviders?.entries?.mapNotNull { (key, value) ->
|
||||
val providerId = normalizeTalkProviderId(key) ?: return@mapNotNull null
|
||||
val providerConfig = value.asObjectOrNull() ?: return@mapNotNull null
|
||||
providerId to providerConfig
|
||||
}?.toMap().orEmpty()
|
||||
val providerId =
|
||||
normalizeTalkProviderId(rawProvider)
|
||||
?: providers.keys.sorted().firstOrNull()
|
||||
?: defaultTalkProvider
|
||||
return TalkProviderConfigSelection(
|
||||
provider = providerId,
|
||||
config = providers[providerId] ?: buildJsonObject {},
|
||||
normalizedPayload = true,
|
||||
)
|
||||
}
|
||||
return TalkProviderConfigSelection(
|
||||
provider = defaultTalkProvider,
|
||||
config = talk,
|
||||
normalizedPayload = false,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private val mainHandler = Handler(Looper.getMainLooper())
|
||||
@@ -818,30 +859,49 @@ class TalkModeManager(
|
||||
val root = json.parseToJsonElement(res).asObjectOrNull()
|
||||
val config = root?.get("config").asObjectOrNull()
|
||||
val talk = config?.get("talk").asObjectOrNull()
|
||||
val selection = selectTalkProviderConfig(talk)
|
||||
val activeProvider = selection?.provider ?: defaultTalkProvider
|
||||
val activeConfig = selection?.config
|
||||
val sessionCfg = config?.get("session").asObjectOrNull()
|
||||
val mainKey = normalizeMainKey(sessionCfg?.get("mainKey").asStringOrNull())
|
||||
val voice = talk?.get("voiceId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val voice = activeConfig?.get("voiceId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val aliases =
|
||||
talk?.get("voiceAliases").asObjectOrNull()?.entries?.mapNotNull { (key, value) ->
|
||||
activeConfig?.get("voiceAliases").asObjectOrNull()?.entries?.mapNotNull { (key, value) ->
|
||||
val id = value.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() } ?: return@mapNotNull null
|
||||
normalizeAliasKey(key).takeIf { it.isNotEmpty() }?.let { it to id }
|
||||
}?.toMap().orEmpty()
|
||||
val model = talk?.get("modelId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val outputFormat = talk?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val key = talk?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val model = activeConfig?.get("modelId")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val outputFormat =
|
||||
activeConfig?.get("outputFormat")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val key = activeConfig?.get("apiKey")?.asStringOrNull()?.trim()?.takeIf { it.isNotEmpty() }
|
||||
val interrupt = talk?.get("interruptOnSpeech")?.asBooleanOrNull()
|
||||
|
||||
if (!isCanonicalMainSessionKey(mainSessionKey)) {
|
||||
mainSessionKey = mainKey
|
||||
}
|
||||
defaultVoiceId = voice ?: envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
|
||||
defaultVoiceId =
|
||||
if (activeProvider == defaultTalkProvider) {
|
||||
voice ?: envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
|
||||
} else {
|
||||
voice
|
||||
}
|
||||
voiceAliases = aliases
|
||||
if (!voiceOverrideActive) currentVoiceId = defaultVoiceId
|
||||
defaultModelId = model ?: defaultModelIdFallback
|
||||
if (!modelOverrideActive) currentModelId = defaultModelId
|
||||
defaultOutputFormat = outputFormat ?: defaultOutputFormatFallback
|
||||
apiKey = key ?: envKey?.takeIf { it.isNotEmpty() }
|
||||
apiKey =
|
||||
if (activeProvider == defaultTalkProvider) {
|
||||
key ?: envKey?.takeIf { it.isNotEmpty() }
|
||||
} else {
|
||||
null
|
||||
}
|
||||
if (interrupt != null) interruptOnSpeech = interrupt
|
||||
if (activeProvider != defaultTalkProvider) {
|
||||
Log.w(tag, "talk provider $activeProvider unsupported; using system voice fallback")
|
||||
} else if (selection?.normalizedPayload == true) {
|
||||
Log.d(tag, "talk config provider=elevenlabs")
|
||||
}
|
||||
} catch (_: Throwable) {
|
||||
defaultVoiceId = envVoice?.takeIf { it.isNotEmpty() } ?: sagVoice?.takeIf { it.isNotEmpty() }
|
||||
defaultModelId = defaultModelIdFallback
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
package ai.openclaw.android.voice
|
||||
|
||||
import kotlinx.serialization.json.Json
|
||||
import kotlinx.serialization.json.jsonPrimitive
|
||||
import kotlinx.serialization.json.jsonObject
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Assert.assertNotNull
|
||||
import org.junit.Assert.assertTrue
|
||||
import org.junit.Test
|
||||
|
||||
class TalkModeConfigParsingTest {
|
||||
private val json = Json { ignoreUnknownKeys = true }
|
||||
|
||||
@Test
|
||||
fun prefersNormalizedTalkProviderPayload() {
|
||||
val talk =
|
||||
json.parseToJsonElement(
|
||||
"""
|
||||
{
|
||||
"provider": "elevenlabs",
|
||||
"providers": {
|
||||
"elevenlabs": {
|
||||
"voiceId": "voice-normalized"
|
||||
}
|
||||
},
|
||||
"voiceId": "voice-legacy"
|
||||
}
|
||||
""".trimIndent(),
|
||||
)
|
||||
.jsonObject
|
||||
|
||||
val selection = TalkModeManager.selectTalkProviderConfig(talk)
|
||||
assertNotNull(selection)
|
||||
assertEquals("elevenlabs", selection?.provider)
|
||||
assertTrue(selection?.normalizedPayload == true)
|
||||
assertEquals("voice-normalized", selection?.config?.get("voiceId")?.jsonPrimitive?.content)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun fallsBackToLegacyTalkFieldsWhenNormalizedPayloadMissing() {
|
||||
val talk =
|
||||
json.parseToJsonElement(
|
||||
"""
|
||||
{
|
||||
"voiceId": "voice-legacy",
|
||||
"apiKey": "legacy-key"
|
||||
}
|
||||
""".trimIndent(),
|
||||
)
|
||||
.jsonObject
|
||||
|
||||
val selection = TalkModeManager.selectTalkProviderConfig(talk)
|
||||
assertNotNull(selection)
|
||||
assertEquals("elevenlabs", selection?.provider)
|
||||
assertTrue(selection?.normalizedPayload == false)
|
||||
assertEquals("voice-legacy", selection?.config?.get("voiceId")?.jsonPrimitive?.content)
|
||||
assertEquals("legacy-key", selection?.config?.get("apiKey")?.jsonPrimitive?.content)
|
||||
}
|
||||
}
|
||||
@@ -25,7 +25,8 @@ enum GatewaySettingsStore {
|
||||
private static let instanceIdAccount = "instanceId"
|
||||
private static let preferredGatewayStableIDAccount = "preferredStableID"
|
||||
private static let lastDiscoveredGatewayStableIDAccount = "lastDiscoveredStableID"
|
||||
private static let talkElevenLabsApiKeyAccount = "elevenlabs.apiKey"
|
||||
private static let talkProviderApiKeyAccountPrefix = "provider.apiKey."
|
||||
private static let talkElevenLabsApiKeyLegacyAccount = "elevenlabs.apiKey"
|
||||
|
||||
static func bootstrapPersistence() {
|
||||
self.ensureStableInstanceID()
|
||||
@@ -145,25 +146,52 @@ enum GatewaySettingsStore {
|
||||
case discovered
|
||||
}
|
||||
|
||||
static func loadTalkElevenLabsApiKey() -> String? {
|
||||
static func loadTalkProviderApiKey(provider: String) -> String? {
|
||||
guard let providerId = self.normalizedTalkProviderID(provider) else { return nil }
|
||||
let account = self.talkProviderApiKeyAccount(providerId: providerId)
|
||||
let value = KeychainStore.loadString(
|
||||
service: self.talkService,
|
||||
account: self.talkElevenLabsApiKeyAccount)?
|
||||
account: account)?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if value?.isEmpty == false { return value }
|
||||
|
||||
if providerId == "elevenlabs" {
|
||||
let legacyValue = KeychainStore.loadString(
|
||||
service: self.talkService,
|
||||
account: self.talkElevenLabsApiKeyLegacyAccount)?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if legacyValue?.isEmpty == false {
|
||||
_ = KeychainStore.saveString(legacyValue!, service: self.talkService, account: account)
|
||||
return legacyValue
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
static func saveTalkElevenLabsApiKey(_ apiKey: String?) {
|
||||
static func saveTalkProviderApiKey(_ apiKey: String?, provider: String) {
|
||||
guard let providerId = self.normalizedTalkProviderID(provider) else { return }
|
||||
let account = self.talkProviderApiKeyAccount(providerId: providerId)
|
||||
let trimmed = apiKey?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
||||
if trimmed.isEmpty {
|
||||
_ = KeychainStore.delete(service: self.talkService, account: self.talkElevenLabsApiKeyAccount)
|
||||
_ = KeychainStore.delete(service: self.talkService, account: account)
|
||||
if providerId == "elevenlabs" {
|
||||
_ = KeychainStore.delete(service: self.talkService, account: self.talkElevenLabsApiKeyLegacyAccount)
|
||||
}
|
||||
return
|
||||
}
|
||||
_ = KeychainStore.saveString(
|
||||
trimmed,
|
||||
service: self.talkService,
|
||||
account: self.talkElevenLabsApiKeyAccount)
|
||||
_ = KeychainStore.saveString(trimmed, service: self.talkService, account: account)
|
||||
if providerId == "elevenlabs" {
|
||||
_ = KeychainStore.delete(service: self.talkService, account: self.talkElevenLabsApiKeyLegacyAccount)
|
||||
}
|
||||
}
|
||||
|
||||
static func loadTalkElevenLabsApiKey() -> String? {
|
||||
self.loadTalkProviderApiKey(provider: "elevenlabs")
|
||||
}
|
||||
|
||||
static func saveTalkElevenLabsApiKey(_ apiKey: String?) {
|
||||
self.saveTalkProviderApiKey(apiKey, provider: "elevenlabs")
|
||||
}
|
||||
|
||||
static func saveLastGatewayConnectionManual(host: String, port: Int, useTLS: Bool, stableID: String) {
|
||||
@@ -278,6 +306,15 @@ enum GatewaySettingsStore {
|
||||
"gateway-password.\(instanceId)"
|
||||
}
|
||||
|
||||
private static func talkProviderApiKeyAccount(providerId: String) -> String {
|
||||
self.talkProviderApiKeyAccountPrefix + providerId
|
||||
}
|
||||
|
||||
private static func normalizedTalkProviderID(_ provider: String) -> String? {
|
||||
let trimmed = provider.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
|
||||
return trimmed.isEmpty ? nil : trimmed
|
||||
}
|
||||
|
||||
private static func ensureStableInstanceID() {
|
||||
let defaults = UserDefaults.standard
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ import Speech
|
||||
final class TalkModeManager: NSObject {
|
||||
private typealias SpeechRequest = SFSpeechAudioBufferRecognitionRequest
|
||||
private static let defaultModelIdFallback = "eleven_v3"
|
||||
private static let defaultTalkProvider = "elevenlabs"
|
||||
private static let redactedConfigSentinel = "__OPENCLAW_REDACTED__"
|
||||
var isEnabled: Bool = false
|
||||
var isListening: Bool = false
|
||||
@@ -1885,6 +1886,46 @@ extension TalkModeManager {
|
||||
return trimmed
|
||||
}
|
||||
|
||||
struct TalkProviderConfigSelection {
|
||||
let provider: String
|
||||
let config: [String: Any]
|
||||
let normalizedPayload: Bool
|
||||
}
|
||||
|
||||
private static func normalizedTalkProviderID(_ raw: String?) -> String? {
|
||||
let trimmed = (raw ?? "").trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
|
||||
return trimmed.isEmpty ? nil : trimmed
|
||||
}
|
||||
|
||||
static func selectTalkProviderConfig(_ talk: [String: Any]?) -> TalkProviderConfigSelection? {
|
||||
guard let talk else { return nil }
|
||||
let rawProvider = talk["provider"] as? String
|
||||
let rawProviders = talk["providers"] as? [String: Any]
|
||||
let hasNormalized = rawProvider != nil || rawProviders != nil
|
||||
if hasNormalized {
|
||||
let providers = rawProviders ?? [:]
|
||||
let normalizedProviders = providers.reduce(into: [String: [String: Any]]()) { acc, entry in
|
||||
guard
|
||||
let providerID = Self.normalizedTalkProviderID(entry.key),
|
||||
let config = entry.value as? [String: Any]
|
||||
else { return }
|
||||
acc[providerID] = config
|
||||
}
|
||||
let providerID =
|
||||
Self.normalizedTalkProviderID(rawProvider) ??
|
||||
normalizedProviders.keys.sorted().first ??
|
||||
Self.defaultTalkProvider
|
||||
return TalkProviderConfigSelection(
|
||||
provider: providerID,
|
||||
config: normalizedProviders[providerID] ?? [:],
|
||||
normalizedPayload: true)
|
||||
}
|
||||
return TalkProviderConfigSelection(
|
||||
provider: Self.defaultTalkProvider,
|
||||
config: talk,
|
||||
normalizedPayload: false)
|
||||
}
|
||||
|
||||
func reloadConfig() async {
|
||||
guard let gateway else { return }
|
||||
do {
|
||||
@@ -1892,8 +1933,12 @@ extension TalkModeManager {
|
||||
guard let json = try JSONSerialization.jsonObject(with: res) as? [String: Any] else { return }
|
||||
guard let config = json["config"] as? [String: Any] else { return }
|
||||
let talk = config["talk"] as? [String: Any]
|
||||
self.defaultVoiceId = (talk?["voiceId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if let aliases = talk?["voiceAliases"] as? [String: Any] {
|
||||
let selection = Self.selectTalkProviderConfig(talk)
|
||||
let activeProvider = selection?.provider ?? Self.defaultTalkProvider
|
||||
let activeConfig = selection?.config
|
||||
self.defaultVoiceId = (activeConfig?["voiceId"] as? String)?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if let aliases = activeConfig?["voiceAliases"] as? [String: Any] {
|
||||
var resolved: [String: String] = [:]
|
||||
for (key, value) in aliases {
|
||||
guard let id = value as? String else { continue }
|
||||
@@ -1909,22 +1954,28 @@ extension TalkModeManager {
|
||||
if !self.voiceOverrideActive {
|
||||
self.currentVoiceId = self.defaultVoiceId
|
||||
}
|
||||
let model = (talk?["modelId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let model = (activeConfig?["modelId"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
self.defaultModelId = (model?.isEmpty == false) ? model : Self.defaultModelIdFallback
|
||||
if !self.modelOverrideActive {
|
||||
self.currentModelId = self.defaultModelId
|
||||
}
|
||||
self.defaultOutputFormat = (talk?["outputFormat"] as? String)?
|
||||
self.defaultOutputFormat = (activeConfig?["outputFormat"] as? String)?
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let rawConfigApiKey = (talk?["apiKey"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let rawConfigApiKey = (activeConfig?["apiKey"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let configApiKey = Self.normalizedTalkApiKey(rawConfigApiKey)
|
||||
let localApiKey = Self.normalizedTalkApiKey(GatewaySettingsStore.loadTalkElevenLabsApiKey())
|
||||
let localApiKey = Self.normalizedTalkApiKey(
|
||||
GatewaySettingsStore.loadTalkProviderApiKey(provider: activeProvider))
|
||||
if rawConfigApiKey == Self.redactedConfigSentinel {
|
||||
self.apiKey = (localApiKey?.isEmpty == false) ? localApiKey : nil
|
||||
GatewayDiagnostics.log("talk config apiKey redacted; using local override if present")
|
||||
} else {
|
||||
self.apiKey = (localApiKey?.isEmpty == false) ? localApiKey : configApiKey
|
||||
}
|
||||
if activeProvider != Self.defaultTalkProvider {
|
||||
self.apiKey = nil
|
||||
GatewayDiagnostics.log(
|
||||
"talk provider '\(activeProvider)' not yet supported on iOS; using system voice fallback")
|
||||
}
|
||||
self.gatewayTalkDefaultVoiceId = self.defaultVoiceId
|
||||
self.gatewayTalkDefaultModelId = self.defaultModelId
|
||||
self.gatewayTalkApiKeyConfigured = (self.apiKey?.isEmpty == false)
|
||||
@@ -1932,6 +1983,9 @@ extension TalkModeManager {
|
||||
if let interrupt = talk?["interruptOnSpeech"] as? Bool {
|
||||
self.interruptOnSpeech = interrupt
|
||||
}
|
||||
if selection?.normalizedPayload == true {
|
||||
GatewayDiagnostics.log("talk config provider=\(activeProvider)")
|
||||
}
|
||||
} catch {
|
||||
self.defaultModelId = Self.defaultModelIdFallback
|
||||
if !self.modelOverrideActive {
|
||||
|
||||
@@ -9,9 +9,15 @@ private struct KeychainEntry: Hashable {
|
||||
|
||||
private let gatewayService = "ai.openclaw.gateway"
|
||||
private let nodeService = "ai.openclaw.node"
|
||||
private let talkService = "ai.openclaw.talk"
|
||||
private let instanceIdEntry = KeychainEntry(service: nodeService, account: "instanceId")
|
||||
private let preferredGatewayEntry = KeychainEntry(service: gatewayService, account: "preferredStableID")
|
||||
private let lastGatewayEntry = KeychainEntry(service: gatewayService, account: "lastDiscoveredStableID")
|
||||
private let talkElevenLabsLegacyEntry = KeychainEntry(service: talkService, account: "elevenlabs.apiKey")
|
||||
private let talkElevenLabsProviderEntry = KeychainEntry(
|
||||
service: talkService,
|
||||
account: "provider.apiKey.elevenlabs")
|
||||
private let talkAcmeProviderEntry = KeychainEntry(service: talkService, account: "provider.apiKey.acme")
|
||||
|
||||
private func snapshotDefaults(_ keys: [String]) -> [String: Any?] {
|
||||
let defaults = UserDefaults.standard
|
||||
@@ -196,4 +202,34 @@ private func restoreKeychain(_ snapshot: [KeychainEntry: String?]) {
|
||||
let loaded = GatewaySettingsStore.loadLastGatewayConnection()
|
||||
#expect(loaded == .manual(host: "example.org", port: 18789, useTLS: false, stableID: "manual|example.org|18789"))
|
||||
}
|
||||
|
||||
@Test func talkProviderApiKey_genericRoundTrip() {
|
||||
let keychainSnapshot = snapshotKeychain([talkAcmeProviderEntry])
|
||||
defer { restoreKeychain(keychainSnapshot) }
|
||||
|
||||
_ = KeychainStore.delete(service: talkService, account: talkAcmeProviderEntry.account)
|
||||
|
||||
GatewaySettingsStore.saveTalkProviderApiKey("acme-key", provider: "acme")
|
||||
#expect(GatewaySettingsStore.loadTalkProviderApiKey(provider: "acme") == "acme-key")
|
||||
|
||||
GatewaySettingsStore.saveTalkProviderApiKey(nil, provider: "acme")
|
||||
#expect(GatewaySettingsStore.loadTalkProviderApiKey(provider: "acme") == nil)
|
||||
}
|
||||
|
||||
@Test func talkProviderApiKey_elevenlabsLegacyFallbackMigratesToProviderKey() {
|
||||
let keychainSnapshot = snapshotKeychain([talkElevenLabsLegacyEntry, talkElevenLabsProviderEntry])
|
||||
defer { restoreKeychain(keychainSnapshot) }
|
||||
|
||||
_ = KeychainStore.delete(service: talkService, account: talkElevenLabsProviderEntry.account)
|
||||
_ = KeychainStore.saveString(
|
||||
"legacy-eleven-key",
|
||||
service: talkService,
|
||||
account: talkElevenLabsLegacyEntry.account)
|
||||
|
||||
let loaded = GatewaySettingsStore.loadTalkProviderApiKey(provider: "elevenlabs")
|
||||
#expect(loaded == "legacy-eleven-key")
|
||||
#expect(
|
||||
KeychainStore.loadString(service: talkService, account: talkElevenLabsProviderEntry.account)
|
||||
== "legacy-eleven-key")
|
||||
}
|
||||
}
|
||||
|
||||
34
apps/ios/Tests/TalkModeConfigParsingTests.swift
Normal file
34
apps/ios/Tests/TalkModeConfigParsingTests.swift
Normal file
@@ -0,0 +1,34 @@
|
||||
import Testing
|
||||
@testable import OpenClaw
|
||||
|
||||
@Suite struct TalkModeConfigParsingTests {
|
||||
@Test func prefersNormalizedTalkProviderPayload() async {
|
||||
let talk: [String: Any] = [
|
||||
"provider": "elevenlabs",
|
||||
"providers": [
|
||||
"elevenlabs": [
|
||||
"voiceId": "voice-normalized",
|
||||
],
|
||||
],
|
||||
"voiceId": "voice-legacy",
|
||||
]
|
||||
|
||||
let selection = await MainActor.run { TalkModeManager.selectTalkProviderConfig(talk) }
|
||||
#expect(selection?.provider == "elevenlabs")
|
||||
#expect(selection?.normalizedPayload == true)
|
||||
#expect(selection?.config["voiceId"] as? String == "voice-normalized")
|
||||
}
|
||||
|
||||
@Test func fallsBackToLegacyTalkFieldsWhenNormalizedPayloadMissing() async {
|
||||
let talk: [String: Any] = [
|
||||
"voiceId": "voice-legacy",
|
||||
"apiKey": "legacy-key",
|
||||
]
|
||||
|
||||
let selection = await MainActor.run { TalkModeManager.selectTalkProviderConfig(talk) }
|
||||
#expect(selection?.provider == "elevenlabs")
|
||||
#expect(selection?.normalizedPayload == false)
|
||||
#expect(selection?.config["voiceId"] as? String == "voice-legacy")
|
||||
#expect(selection?.config["apiKey"] as? String == "legacy-key")
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,7 @@ actor TalkModeRuntime {
|
||||
private let logger = Logger(subsystem: "ai.openclaw", category: "talk.runtime")
|
||||
private let ttsLogger = Logger(subsystem: "ai.openclaw", category: "talk.tts")
|
||||
private static let defaultModelIdFallback = "eleven_v3"
|
||||
private static let defaultTalkProvider = "elevenlabs"
|
||||
|
||||
private final class RMSMeter: @unchecked Sendable {
|
||||
private let lock = NSLock()
|
||||
@@ -792,6 +793,48 @@ extension TalkModeRuntime {
|
||||
let apiKey: String?
|
||||
}
|
||||
|
||||
struct TalkProviderConfigSelection {
|
||||
let provider: String
|
||||
let config: [String: AnyCodable]
|
||||
let normalizedPayload: Bool
|
||||
}
|
||||
|
||||
private static func normalizedTalkProviderID(_ raw: String?) -> String? {
|
||||
let trimmed = raw?.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() ?? ""
|
||||
return trimmed.isEmpty ? nil : trimmed
|
||||
}
|
||||
|
||||
static func selectTalkProviderConfig(
|
||||
_ talk: [String: AnyCodable]?) -> TalkProviderConfigSelection?
|
||||
{
|
||||
guard let talk else { return nil }
|
||||
let rawProvider = talk["provider"]?.stringValue
|
||||
let rawProviders = talk["providers"]?.dictionaryValue
|
||||
let hasNormalizedPayload = rawProvider != nil || rawProviders != nil
|
||||
if hasNormalizedPayload {
|
||||
let normalizedProviders =
|
||||
rawProviders?.reduce(into: [String: [String: AnyCodable]]()) { acc, entry in
|
||||
guard
|
||||
let providerID = Self.normalizedTalkProviderID(entry.key),
|
||||
let providerConfig = entry.value.dictionaryValue
|
||||
else { return }
|
||||
acc[providerID] = providerConfig
|
||||
} ?? [:]
|
||||
let providerID =
|
||||
Self.normalizedTalkProviderID(rawProvider) ??
|
||||
normalizedProviders.keys.sorted().first ??
|
||||
Self.defaultTalkProvider
|
||||
return TalkProviderConfigSelection(
|
||||
provider: providerID,
|
||||
config: normalizedProviders[providerID] ?? [:],
|
||||
normalizedPayload: true)
|
||||
}
|
||||
return TalkProviderConfigSelection(
|
||||
provider: Self.defaultTalkProvider,
|
||||
config: talk,
|
||||
normalizedPayload: false)
|
||||
}
|
||||
|
||||
private func fetchTalkConfig() async -> TalkRuntimeConfig {
|
||||
let env = ProcessInfo.processInfo.environment
|
||||
let envVoice = env["ELEVENLABS_VOICE_ID"]?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
@@ -804,13 +847,16 @@ extension TalkModeRuntime {
|
||||
params: ["includeSecrets": AnyCodable(true)],
|
||||
timeoutMs: 8000)
|
||||
let talk = snap.config?["talk"]?.dictionaryValue
|
||||
let selection = Self.selectTalkProviderConfig(talk)
|
||||
let activeProvider = selection?.provider ?? Self.defaultTalkProvider
|
||||
let activeConfig = selection?.config
|
||||
let ui = snap.config?["ui"]?.dictionaryValue
|
||||
let rawSeam = ui?["seamColor"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
||||
await MainActor.run {
|
||||
AppStateStore.shared.seamColorHex = rawSeam.isEmpty ? nil : rawSeam
|
||||
}
|
||||
let voice = talk?["voiceId"]?.stringValue
|
||||
let rawAliases = talk?["voiceAliases"]?.dictionaryValue
|
||||
let voice = activeConfig?["voiceId"]?.stringValue
|
||||
let rawAliases = activeConfig?["voiceAliases"]?.dictionaryValue
|
||||
let resolvedAliases: [String: String] =
|
||||
rawAliases?.reduce(into: [:]) { acc, entry in
|
||||
let key = entry.key.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
|
||||
@@ -818,18 +864,30 @@ extension TalkModeRuntime {
|
||||
guard !key.isEmpty, !value.isEmpty else { return }
|
||||
acc[key] = value
|
||||
} ?? [:]
|
||||
let model = talk?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let model = activeConfig?["modelId"]?.stringValue?.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let resolvedModel = (model?.isEmpty == false) ? model! : Self.defaultModelIdFallback
|
||||
let outputFormat = talk?["outputFormat"]?.stringValue
|
||||
let outputFormat = activeConfig?["outputFormat"]?.stringValue
|
||||
let interrupt = talk?["interruptOnSpeech"]?.boolValue
|
||||
let apiKey = talk?["apiKey"]?.stringValue
|
||||
let resolvedVoice =
|
||||
let apiKey = activeConfig?["apiKey"]?.stringValue
|
||||
let resolvedVoice: String? = if activeProvider == Self.defaultTalkProvider {
|
||||
(voice?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? voice : nil) ??
|
||||
(envVoice?.isEmpty == false ? envVoice : nil) ??
|
||||
(sagVoice?.isEmpty == false ? sagVoice : nil)
|
||||
let resolvedApiKey =
|
||||
} else {
|
||||
(voice?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? voice : nil)
|
||||
}
|
||||
let resolvedApiKey: String? = if activeProvider == Self.defaultTalkProvider {
|
||||
(envApiKey?.isEmpty == false ? envApiKey : nil) ??
|
||||
(apiKey?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false ? apiKey : nil)
|
||||
} else {
|
||||
nil
|
||||
}
|
||||
if activeProvider != Self.defaultTalkProvider {
|
||||
self.ttsLogger
|
||||
.info("talk provider \(activeProvider, privacy: .public) unsupported; using system voice")
|
||||
} else if selection?.normalizedPayload == true {
|
||||
self.ttsLogger.info("talk config provider elevenlabs")
|
||||
}
|
||||
return TalkRuntimeConfig(
|
||||
voiceId: resolvedVoice,
|
||||
voiceAliases: resolvedAliases,
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
import OpenClawProtocol
|
||||
import Testing
|
||||
|
||||
@testable import OpenClaw
|
||||
|
||||
@Suite struct TalkModeConfigParsingTests {
|
||||
@Test func prefersNormalizedTalkProviderPayload() {
|
||||
let talk: [String: AnyCodable] = [
|
||||
"provider": AnyCodable("elevenlabs"),
|
||||
"providers": AnyCodable([
|
||||
"elevenlabs": [
|
||||
"voiceId": "voice-normalized",
|
||||
],
|
||||
]),
|
||||
"voiceId": AnyCodable("voice-legacy"),
|
||||
]
|
||||
|
||||
let selection = TalkModeRuntime.selectTalkProviderConfig(talk)
|
||||
#expect(selection?.provider == "elevenlabs")
|
||||
#expect(selection?.normalizedPayload == true)
|
||||
#expect(selection?.config["voiceId"]?.stringValue == "voice-normalized")
|
||||
}
|
||||
|
||||
@Test func fallsBackToLegacyTalkFieldsWhenNormalizedPayloadMissing() {
|
||||
let talk: [String: AnyCodable] = [
|
||||
"voiceId": AnyCodable("voice-legacy"),
|
||||
"apiKey": AnyCodable("legacy-key"),
|
||||
]
|
||||
|
||||
let selection = TalkModeRuntime.selectTalkProviderConfig(talk)
|
||||
#expect(selection?.provider == "elevenlabs")
|
||||
#expect(selection?.normalizedPayload == false)
|
||||
#expect(selection?.config["voiceId"]?.stringValue == "voice-legacy")
|
||||
#expect(selection?.config["apiKey"]?.stringValue == "legacy-key")
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,12 @@ import { DEFAULT_CONTEXT_TOKENS } from "../agents/defaults.js";
|
||||
import { normalizeProviderId, parseModelRef } from "../agents/model-selection.js";
|
||||
import { DEFAULT_AGENT_MAX_CONCURRENT, DEFAULT_SUBAGENT_MAX_CONCURRENT } from "./agent-limits.js";
|
||||
import { resolveAgentModelPrimaryValue } from "./model-input.js";
|
||||
import { resolveTalkApiKey } from "./talk.js";
|
||||
import {
|
||||
DEFAULT_TALK_PROVIDER,
|
||||
normalizeTalkConfig,
|
||||
resolveActiveTalkProviderConfig,
|
||||
resolveTalkApiKey,
|
||||
} from "./talk.js";
|
||||
import type { OpenClawConfig } from "./types.js";
|
||||
import type { ModelDefinitionConfig } from "./types.models.js";
|
||||
|
||||
@@ -163,21 +168,46 @@ export function applySessionDefaults(
|
||||
}
|
||||
|
||||
export function applyTalkApiKey(config: OpenClawConfig): OpenClawConfig {
|
||||
const normalized = normalizeTalkConfig(config);
|
||||
const resolved = resolveTalkApiKey();
|
||||
if (!resolved) {
|
||||
return config;
|
||||
return normalized;
|
||||
}
|
||||
const existing = config.talk?.apiKey?.trim();
|
||||
if (existing) {
|
||||
return config;
|
||||
|
||||
const talk = normalized.talk;
|
||||
const active = resolveActiveTalkProviderConfig(talk);
|
||||
if (active.provider && active.provider !== DEFAULT_TALK_PROVIDER) {
|
||||
return normalized;
|
||||
}
|
||||
return {
|
||||
...config,
|
||||
talk: {
|
||||
...config.talk,
|
||||
apiKey: resolved,
|
||||
},
|
||||
|
||||
const existingProviderApiKey =
|
||||
typeof active.config?.apiKey === "string" ? active.config.apiKey.trim() : "";
|
||||
const existingLegacyApiKey = typeof talk?.apiKey === "string" ? talk.apiKey.trim() : "";
|
||||
if (existingProviderApiKey || existingLegacyApiKey) {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
const providerId = active.provider ?? DEFAULT_TALK_PROVIDER;
|
||||
const providers = { ...talk?.providers };
|
||||
const providerConfig = { ...providers[providerId], apiKey: resolved };
|
||||
providers[providerId] = providerConfig;
|
||||
|
||||
const nextTalk = {
|
||||
...talk,
|
||||
provider: talk?.provider ?? providerId,
|
||||
providers,
|
||||
// Keep legacy shape populated during compatibility rollout.
|
||||
apiKey: resolved,
|
||||
};
|
||||
|
||||
return {
|
||||
...normalized,
|
||||
talk: nextTalk,
|
||||
};
|
||||
}
|
||||
|
||||
export function applyTalkConfigNormalization(config: OpenClawConfig): OpenClawConfig {
|
||||
return normalizeTalkConfig(config);
|
||||
}
|
||||
|
||||
export function applyModelDefaults(cfg: OpenClawConfig): OpenClawConfig {
|
||||
|
||||
@@ -24,6 +24,7 @@ import {
|
||||
applyMessageDefaults,
|
||||
applyModelDefaults,
|
||||
applySessionDefaults,
|
||||
applyTalkConfigNormalization,
|
||||
applyTalkApiKey,
|
||||
} from "./defaults.js";
|
||||
import { restoreEnvVarRefs } from "./env-preserve.js";
|
||||
@@ -720,11 +721,13 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
|
||||
deps.logger.warn(`Config warnings:\\n${details}`);
|
||||
}
|
||||
warnIfConfigFromFuture(validated.config, deps.logger);
|
||||
const cfg = applyModelDefaults(
|
||||
applyCompactionDefaults(
|
||||
applyContextPruningDefaults(
|
||||
applyAgentDefaults(
|
||||
applySessionDefaults(applyLoggingDefaults(applyMessageDefaults(validated.config))),
|
||||
const cfg = applyTalkConfigNormalization(
|
||||
applyModelDefaults(
|
||||
applyCompactionDefaults(
|
||||
applyContextPruningDefaults(
|
||||
applyAgentDefaults(
|
||||
applySessionDefaults(applyLoggingDefaults(applyMessageDefaults(validated.config))),
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
@@ -809,10 +812,12 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
|
||||
if (!exists) {
|
||||
const hash = hashConfigRaw(null);
|
||||
const config = applyTalkApiKey(
|
||||
applyModelDefaults(
|
||||
applyCompactionDefaults(
|
||||
applyContextPruningDefaults(
|
||||
applyAgentDefaults(applySessionDefaults(applyMessageDefaults({}))),
|
||||
applyTalkConfigNormalization(
|
||||
applyModelDefaults(
|
||||
applyCompactionDefaults(
|
||||
applyContextPruningDefaults(
|
||||
applyAgentDefaults(applySessionDefaults(applyMessageDefaults({}))),
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
@@ -933,9 +938,11 @@ export function createConfigIO(overrides: ConfigIoDeps = {}) {
|
||||
warnIfConfigFromFuture(validated.config, deps.logger);
|
||||
const snapshotConfig = normalizeConfigPaths(
|
||||
applyTalkApiKey(
|
||||
applyModelDefaults(
|
||||
applyAgentDefaults(
|
||||
applySessionDefaults(applyLoggingDefaults(applyMessageDefaults(validated.config))),
|
||||
applyTalkConfigNormalization(
|
||||
applyModelDefaults(
|
||||
applyAgentDefaults(
|
||||
applySessionDefaults(applyLoggingDefaults(applyMessageDefaults(validated.config))),
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
|
||||
@@ -133,14 +133,24 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"gateway.remote.sshTarget":
|
||||
"Remote gateway over SSH (tunnels the gateway port to localhost). Format: user@host or user@host:port.",
|
||||
"gateway.remote.sshIdentity": "Optional SSH identity file path (passed to ssh -i).",
|
||||
"talk.provider": 'Active Talk provider id (for example "elevenlabs").',
|
||||
"talk.providers":
|
||||
"Provider-specific Talk settings keyed by provider id. During migration, prefer this over legacy talk.* keys.",
|
||||
"talk.providers.*.voiceId": "Provider default voice ID for Talk mode.",
|
||||
"talk.providers.*.voiceAliases": "Optional provider voice alias map for Talk directives.",
|
||||
"talk.providers.*.modelId": "Provider default model ID for Talk mode.",
|
||||
"talk.providers.*.outputFormat": "Provider default output format for Talk mode.",
|
||||
"talk.providers.*.apiKey": "Provider API key for Talk mode.",
|
||||
"talk.voiceId":
|
||||
"Default ElevenLabs voice ID for Talk mode (iOS/macOS/Android). Falls back to ELEVENLABS_VOICE_ID or SAG_VOICE_ID when unset.",
|
||||
"Legacy ElevenLabs default voice ID for Talk mode. Prefer talk.providers.elevenlabs.voiceId.",
|
||||
"talk.voiceAliases":
|
||||
'Optional map of friendly names to ElevenLabs voice IDs for Talk directives (for example {"Clawd":"EXAVITQu4vr4xnSDxMaL"}).',
|
||||
"talk.modelId": "Default ElevenLabs model ID for Talk mode (default: eleven_v3).",
|
||||
'Legacy ElevenLabs voice alias map (for example {"Clawd":"EXAVITQu4vr4xnSDxMaL"}). Prefer talk.providers.elevenlabs.voiceAliases.',
|
||||
"talk.modelId":
|
||||
"Legacy ElevenLabs model ID for Talk mode (default: eleven_v3). Prefer talk.providers.elevenlabs.modelId.",
|
||||
"talk.outputFormat":
|
||||
"Default ElevenLabs output format for Talk mode (for example pcm_44100 or mp3_44100_128).",
|
||||
"talk.apiKey": "ElevenLabs API key for Talk mode. Falls back to ELEVENLABS_API_KEY when unset.",
|
||||
"Legacy ElevenLabs output format for Talk mode (for example pcm_44100 or mp3_44100_128). Prefer talk.providers.elevenlabs.outputFormat.",
|
||||
"talk.apiKey":
|
||||
"Legacy ElevenLabs API key for Talk mode. Prefer talk.providers.elevenlabs.apiKey (fallback: ELEVENLABS_API_KEY).",
|
||||
"talk.interruptOnSpeech":
|
||||
"If true (default), stop assistant speech when the user starts speaking in Talk mode.",
|
||||
"agents.list.*.skills":
|
||||
|
||||
@@ -600,6 +600,13 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"messages.inbound.debounceMs": "Inbound Message Debounce (ms)",
|
||||
"messages.inbound.byChannel": "Inbound Debounce by Channel (ms)",
|
||||
"messages.tts": "Message Text-to-Speech",
|
||||
"talk.provider": "Talk Active Provider",
|
||||
"talk.providers": "Talk Provider Settings",
|
||||
"talk.providers.*.voiceId": "Talk Provider Voice ID",
|
||||
"talk.providers.*.voiceAliases": "Talk Provider Voice Aliases",
|
||||
"talk.providers.*.modelId": "Talk Provider Model ID",
|
||||
"talk.providers.*.outputFormat": "Talk Provider Output Format",
|
||||
"talk.providers.*.apiKey": "Talk Provider API Key",
|
||||
"talk.apiKey": "Talk API Key",
|
||||
channels: "Channels",
|
||||
"channels.defaults": "Channel Defaults",
|
||||
|
||||
150
src/config/talk.normalize.test.ts
Normal file
150
src/config/talk.normalize.test.ts
Normal file
@@ -0,0 +1,150 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { createConfigIO } from "./io.js";
|
||||
import { normalizeTalkSection } from "./talk.js";
|
||||
|
||||
async function withTempConfig(
|
||||
config: unknown,
|
||||
run: (configPath: string) => Promise<void>,
|
||||
): Promise<void> {
|
||||
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-talk-"));
|
||||
const configPath = path.join(dir, "openclaw.json");
|
||||
await fs.writeFile(configPath, JSON.stringify(config, null, 2));
|
||||
try {
|
||||
await run(configPath);
|
||||
} finally {
|
||||
await fs.rm(dir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
async function withEnv(
|
||||
updates: Record<string, string | undefined>,
|
||||
run: () => Promise<void>,
|
||||
): Promise<void> {
|
||||
const previous = new Map<string, string | undefined>();
|
||||
for (const [key, value] of Object.entries(updates)) {
|
||||
previous.set(key, process.env[key]);
|
||||
if (value === undefined) {
|
||||
delete process.env[key];
|
||||
} else {
|
||||
process.env[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await run();
|
||||
} finally {
|
||||
for (const [key, value] of previous.entries()) {
|
||||
if (value === undefined) {
|
||||
delete process.env[key];
|
||||
} else {
|
||||
process.env[key] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
describe("talk normalization", () => {
|
||||
it("maps legacy ElevenLabs fields into provider/providers", () => {
|
||||
const normalized = normalizeTalkSection({
|
||||
voiceId: "voice-123",
|
||||
voiceAliases: { Clawd: "EXAVITQu4vr4xnSDxMaL" },
|
||||
modelId: "eleven_v3",
|
||||
outputFormat: "pcm_44100",
|
||||
apiKey: "secret-key",
|
||||
interruptOnSpeech: false,
|
||||
});
|
||||
|
||||
expect(normalized).toEqual({
|
||||
provider: "elevenlabs",
|
||||
providers: {
|
||||
elevenlabs: {
|
||||
voiceId: "voice-123",
|
||||
voiceAliases: { Clawd: "EXAVITQu4vr4xnSDxMaL" },
|
||||
modelId: "eleven_v3",
|
||||
outputFormat: "pcm_44100",
|
||||
apiKey: "secret-key",
|
||||
},
|
||||
},
|
||||
voiceId: "voice-123",
|
||||
voiceAliases: { Clawd: "EXAVITQu4vr4xnSDxMaL" },
|
||||
modelId: "eleven_v3",
|
||||
outputFormat: "pcm_44100",
|
||||
apiKey: "secret-key",
|
||||
interruptOnSpeech: false,
|
||||
});
|
||||
});
|
||||
|
||||
it("uses new provider/providers shape directly when present", () => {
|
||||
const normalized = normalizeTalkSection({
|
||||
provider: "acme",
|
||||
providers: {
|
||||
acme: {
|
||||
voiceId: "acme-voice",
|
||||
custom: true,
|
||||
},
|
||||
},
|
||||
voiceId: "legacy-voice",
|
||||
interruptOnSpeech: true,
|
||||
});
|
||||
|
||||
expect(normalized).toEqual({
|
||||
provider: "acme",
|
||||
providers: {
|
||||
acme: {
|
||||
voiceId: "acme-voice",
|
||||
custom: true,
|
||||
},
|
||||
},
|
||||
voiceId: "legacy-voice",
|
||||
interruptOnSpeech: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("merges ELEVENLABS_API_KEY into normalized defaults for legacy configs", async () => {
|
||||
await withEnv({ ELEVENLABS_API_KEY: "env-eleven-key" }, async () => {
|
||||
await withTempConfig(
|
||||
{
|
||||
talk: {
|
||||
voiceId: "voice-123",
|
||||
},
|
||||
},
|
||||
async (configPath) => {
|
||||
const io = createConfigIO({ configPath });
|
||||
const snapshot = await io.readConfigFileSnapshot();
|
||||
expect(snapshot.config.talk?.provider).toBe("elevenlabs");
|
||||
expect(snapshot.config.talk?.providers?.elevenlabs?.voiceId).toBe("voice-123");
|
||||
expect(snapshot.config.talk?.providers?.elevenlabs?.apiKey).toBe("env-eleven-key");
|
||||
expect(snapshot.config.talk?.apiKey).toBe("env-eleven-key");
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it("does not apply ELEVENLABS_API_KEY when active provider is not elevenlabs", async () => {
|
||||
await withEnv({ ELEVENLABS_API_KEY: "env-eleven-key" }, async () => {
|
||||
await withTempConfig(
|
||||
{
|
||||
talk: {
|
||||
provider: "acme",
|
||||
providers: {
|
||||
acme: {
|
||||
voiceId: "acme-voice",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
async (configPath) => {
|
||||
const io = createConfigIO({ configPath });
|
||||
const snapshot = await io.readConfigFileSnapshot();
|
||||
expect(snapshot.config.talk?.provider).toBe("acme");
|
||||
expect(snapshot.config.talk?.providers?.acme?.voiceId).toBe("acme-voice");
|
||||
expect(snapshot.config.talk?.providers?.acme?.apiKey).toBeUndefined();
|
||||
expect(snapshot.config.talk?.apiKey).toBeUndefined();
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,6 +1,8 @@
|
||||
import fs from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import type { TalkConfig, TalkProviderConfig } from "./types.gateway.js";
|
||||
import type { OpenClawConfig } from "./types.js";
|
||||
|
||||
type TalkApiKeyDeps = {
|
||||
fs?: typeof fs;
|
||||
@@ -8,6 +10,266 @@ type TalkApiKeyDeps = {
|
||||
path?: typeof path;
|
||||
};
|
||||
|
||||
export const DEFAULT_TALK_PROVIDER = "elevenlabs";
|
||||
|
||||
function isPlainObject(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function normalizeString(value: unknown): string | undefined {
|
||||
if (typeof value !== "string") {
|
||||
return undefined;
|
||||
}
|
||||
const trimmed = value.trim();
|
||||
return trimmed.length > 0 ? trimmed : undefined;
|
||||
}
|
||||
|
||||
function normalizeVoiceAliases(value: unknown): Record<string, string> | undefined {
|
||||
if (!isPlainObject(value)) {
|
||||
return undefined;
|
||||
}
|
||||
const aliases: Record<string, string> = {};
|
||||
for (const [alias, rawId] of Object.entries(value)) {
|
||||
if (typeof rawId !== "string") {
|
||||
continue;
|
||||
}
|
||||
aliases[alias] = rawId;
|
||||
}
|
||||
return Object.keys(aliases).length > 0 ? aliases : undefined;
|
||||
}
|
||||
|
||||
function normalizeTalkProviderConfig(value: unknown): TalkProviderConfig | undefined {
|
||||
if (!isPlainObject(value)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const provider: TalkProviderConfig = {};
|
||||
for (const [key, raw] of Object.entries(value)) {
|
||||
if (raw === undefined) {
|
||||
continue;
|
||||
}
|
||||
if (key === "voiceAliases") {
|
||||
const aliases = normalizeVoiceAliases(raw);
|
||||
if (aliases) {
|
||||
provider.voiceAliases = aliases;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (key === "voiceId" || key === "modelId" || key === "outputFormat" || key === "apiKey") {
|
||||
const normalized = normalizeString(raw);
|
||||
if (normalized) {
|
||||
provider[key] = normalized;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
provider[key] = raw;
|
||||
}
|
||||
|
||||
return Object.keys(provider).length > 0 ? provider : undefined;
|
||||
}
|
||||
|
||||
function normalizeTalkProviders(value: unknown): Record<string, TalkProviderConfig> | undefined {
|
||||
if (!isPlainObject(value)) {
|
||||
return undefined;
|
||||
}
|
||||
const providers: Record<string, TalkProviderConfig> = {};
|
||||
for (const [rawProviderId, providerConfig] of Object.entries(value)) {
|
||||
const providerId = normalizeString(rawProviderId);
|
||||
if (!providerId) {
|
||||
continue;
|
||||
}
|
||||
const normalizedProvider = normalizeTalkProviderConfig(providerConfig);
|
||||
if (!normalizedProvider) {
|
||||
continue;
|
||||
}
|
||||
providers[providerId] = normalizedProvider;
|
||||
}
|
||||
return Object.keys(providers).length > 0 ? providers : undefined;
|
||||
}
|
||||
|
||||
function normalizedLegacyTalkFields(source: Record<string, unknown>): Partial<TalkConfig> {
|
||||
const legacy: Partial<TalkConfig> = {};
|
||||
const voiceId = normalizeString(source.voiceId);
|
||||
if (voiceId) {
|
||||
legacy.voiceId = voiceId;
|
||||
}
|
||||
const voiceAliases = normalizeVoiceAliases(source.voiceAliases);
|
||||
if (voiceAliases) {
|
||||
legacy.voiceAliases = voiceAliases;
|
||||
}
|
||||
const modelId = normalizeString(source.modelId);
|
||||
if (modelId) {
|
||||
legacy.modelId = modelId;
|
||||
}
|
||||
const outputFormat = normalizeString(source.outputFormat);
|
||||
if (outputFormat) {
|
||||
legacy.outputFormat = outputFormat;
|
||||
}
|
||||
const apiKey = normalizeString(source.apiKey);
|
||||
if (apiKey) {
|
||||
legacy.apiKey = apiKey;
|
||||
}
|
||||
return legacy;
|
||||
}
|
||||
|
||||
function legacyProviderConfigFromTalk(
|
||||
source: Record<string, unknown>,
|
||||
): TalkProviderConfig | undefined {
|
||||
return normalizeTalkProviderConfig({
|
||||
voiceId: source.voiceId,
|
||||
voiceAliases: source.voiceAliases,
|
||||
modelId: source.modelId,
|
||||
outputFormat: source.outputFormat,
|
||||
apiKey: source.apiKey,
|
||||
});
|
||||
}
|
||||
|
||||
function activeProviderFromTalk(talk: TalkConfig): string | undefined {
|
||||
const provider = normalizeString(talk.provider);
|
||||
if (provider) {
|
||||
return provider;
|
||||
}
|
||||
const providerIds = talk.providers ? Object.keys(talk.providers) : [];
|
||||
return providerIds.length === 1 ? providerIds[0] : undefined;
|
||||
}
|
||||
|
||||
function legacyTalkFieldsFromProviderConfig(
|
||||
config: TalkProviderConfig | undefined,
|
||||
): Partial<TalkConfig> {
|
||||
if (!config) {
|
||||
return {};
|
||||
}
|
||||
const legacy: Partial<TalkConfig> = {};
|
||||
if (typeof config.voiceId === "string") {
|
||||
legacy.voiceId = config.voiceId;
|
||||
}
|
||||
if (
|
||||
config.voiceAliases &&
|
||||
typeof config.voiceAliases === "object" &&
|
||||
!Array.isArray(config.voiceAliases)
|
||||
) {
|
||||
const aliases = normalizeVoiceAliases(config.voiceAliases);
|
||||
if (aliases) {
|
||||
legacy.voiceAliases = aliases;
|
||||
}
|
||||
}
|
||||
if (typeof config.modelId === "string") {
|
||||
legacy.modelId = config.modelId;
|
||||
}
|
||||
if (typeof config.outputFormat === "string") {
|
||||
legacy.outputFormat = config.outputFormat;
|
||||
}
|
||||
if (typeof config.apiKey === "string") {
|
||||
legacy.apiKey = config.apiKey;
|
||||
}
|
||||
return legacy;
|
||||
}
|
||||
|
||||
export function normalizeTalkSection(value: TalkConfig | undefined): TalkConfig | undefined {
|
||||
if (!isPlainObject(value)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const source = value as Record<string, unknown>;
|
||||
const hasNormalizedShape = typeof source.provider === "string" || isPlainObject(source.providers);
|
||||
const normalized: TalkConfig = {};
|
||||
const legacy = normalizedLegacyTalkFields(source);
|
||||
if (Object.keys(legacy).length > 0) {
|
||||
Object.assign(normalized, legacy);
|
||||
}
|
||||
if (typeof source.interruptOnSpeech === "boolean") {
|
||||
normalized.interruptOnSpeech = source.interruptOnSpeech;
|
||||
}
|
||||
|
||||
if (hasNormalizedShape) {
|
||||
const providers = normalizeTalkProviders(source.providers);
|
||||
const provider = normalizeString(source.provider);
|
||||
if (providers) {
|
||||
normalized.providers = providers;
|
||||
}
|
||||
if (provider) {
|
||||
normalized.provider = provider;
|
||||
} else if (providers) {
|
||||
const ids = Object.keys(providers);
|
||||
if (ids.length === 1) {
|
||||
normalized.provider = ids[0];
|
||||
}
|
||||
}
|
||||
return Object.keys(normalized).length > 0 ? normalized : undefined;
|
||||
}
|
||||
|
||||
const legacyProviderConfig = legacyProviderConfigFromTalk(source);
|
||||
if (legacyProviderConfig) {
|
||||
normalized.provider = DEFAULT_TALK_PROVIDER;
|
||||
normalized.providers = { [DEFAULT_TALK_PROVIDER]: legacyProviderConfig };
|
||||
}
|
||||
return Object.keys(normalized).length > 0 ? normalized : undefined;
|
||||
}
|
||||
|
||||
export function normalizeTalkConfig(config: OpenClawConfig): OpenClawConfig {
|
||||
if (!config.talk) {
|
||||
return config;
|
||||
}
|
||||
const normalizedTalk = normalizeTalkSection(config.talk);
|
||||
if (!normalizedTalk) {
|
||||
return config;
|
||||
}
|
||||
return {
|
||||
...config,
|
||||
talk: normalizedTalk,
|
||||
};
|
||||
}
|
||||
|
||||
export function resolveActiveTalkProviderConfig(talk: TalkConfig | undefined): {
|
||||
provider?: string;
|
||||
config?: TalkProviderConfig;
|
||||
} {
|
||||
const normalizedTalk = normalizeTalkSection(talk);
|
||||
if (!normalizedTalk) {
|
||||
return {};
|
||||
}
|
||||
const provider = activeProviderFromTalk(normalizedTalk);
|
||||
if (!provider) {
|
||||
return {};
|
||||
}
|
||||
return {
|
||||
provider,
|
||||
config: normalizedTalk.providers?.[provider],
|
||||
};
|
||||
}
|
||||
|
||||
export function buildTalkConfigResponse(value: unknown): TalkConfig | undefined {
|
||||
if (!isPlainObject(value)) {
|
||||
return undefined;
|
||||
}
|
||||
const normalized = normalizeTalkSection(value as TalkConfig);
|
||||
if (!normalized) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const payload: TalkConfig = {};
|
||||
if (typeof normalized.interruptOnSpeech === "boolean") {
|
||||
payload.interruptOnSpeech = normalized.interruptOnSpeech;
|
||||
}
|
||||
if (normalized.providers && Object.keys(normalized.providers).length > 0) {
|
||||
payload.providers = normalized.providers;
|
||||
}
|
||||
if (typeof normalized.provider === "string") {
|
||||
payload.provider = normalized.provider;
|
||||
}
|
||||
|
||||
const activeProvider = activeProviderFromTalk(normalized);
|
||||
const providerConfig = activeProvider ? normalized.providers?.[activeProvider] : undefined;
|
||||
const providerCompatibilityLegacy = legacyTalkFieldsFromProviderConfig(providerConfig);
|
||||
const compatibilityLegacy =
|
||||
Object.keys(providerCompatibilityLegacy).length > 0
|
||||
? providerCompatibilityLegacy
|
||||
: normalizedLegacyTalkFields(normalized as unknown as Record<string, unknown>);
|
||||
Object.assign(payload, compatibilityLegacy);
|
||||
|
||||
return Object.keys(payload).length > 0 ? payload : undefined;
|
||||
}
|
||||
|
||||
export function readTalkApiKeyFromProfile(deps: TalkApiKeyDeps = {}): string | null {
|
||||
const fsImpl = deps.fs ?? fs;
|
||||
const osImpl = deps.os ?? os;
|
||||
|
||||
@@ -46,19 +46,38 @@ export type CanvasHostConfig = {
|
||||
liveReload?: boolean;
|
||||
};
|
||||
|
||||
export type TalkConfig = {
|
||||
/** Default ElevenLabs voice ID for Talk mode. */
|
||||
export type TalkProviderConfig = {
|
||||
/** Default voice ID for the provider's Talk mode implementation. */
|
||||
voiceId?: string;
|
||||
/** Optional voice name -> ElevenLabs voice ID map. */
|
||||
/** Optional voice name -> provider voice ID map. */
|
||||
voiceAliases?: Record<string, string>;
|
||||
/** Default ElevenLabs model ID for Talk mode. */
|
||||
/** Default provider model ID for Talk mode. */
|
||||
modelId?: string;
|
||||
/** Default ElevenLabs output format (e.g. mp3_44100_128). */
|
||||
/** Default provider output format (for example pcm_44100). */
|
||||
outputFormat?: string;
|
||||
/** ElevenLabs API key (optional; falls back to ELEVENLABS_API_KEY). */
|
||||
/** Provider API key (optional; provider-specific env fallback may apply). */
|
||||
apiKey?: string;
|
||||
/** Provider-specific extensions. */
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
export type TalkConfig = {
|
||||
/** Active Talk TTS provider (for example "elevenlabs"). */
|
||||
provider?: string;
|
||||
/** Provider-specific Talk config keyed by provider id. */
|
||||
providers?: Record<string, TalkProviderConfig>;
|
||||
/** Stop speaking when user starts talking (default: true). */
|
||||
interruptOnSpeech?: boolean;
|
||||
|
||||
/**
|
||||
* Legacy ElevenLabs compatibility fields.
|
||||
* Kept during rollout while older clients migrate to provider/providers.
|
||||
*/
|
||||
voiceId?: string;
|
||||
voiceAliases?: Record<string, string>;
|
||||
modelId?: string;
|
||||
outputFormat?: string;
|
||||
apiKey?: string;
|
||||
};
|
||||
|
||||
export type GatewayControlUiConfig = {
|
||||
|
||||
@@ -439,6 +439,21 @@ export const OpenClawSchema = z
|
||||
.optional(),
|
||||
talk: z
|
||||
.object({
|
||||
provider: z.string().optional(),
|
||||
providers: z
|
||||
.record(
|
||||
z.string(),
|
||||
z
|
||||
.object({
|
||||
voiceId: z.string().optional(),
|
||||
voiceAliases: z.record(z.string(), z.string()).optional(),
|
||||
modelId: z.string().optional(),
|
||||
outputFormat: z.string().optional(),
|
||||
apiKey: z.string().optional().register(sensitive),
|
||||
})
|
||||
.catchall(z.unknown()),
|
||||
)
|
||||
.optional(),
|
||||
voiceId: z.string().optional(),
|
||||
voiceAliases: z.record(z.string(), z.string()).optional(),
|
||||
modelId: z.string().optional(),
|
||||
|
||||
@@ -16,6 +16,17 @@ export const TalkConfigParamsSchema = Type.Object(
|
||||
{ additionalProperties: false },
|
||||
);
|
||||
|
||||
const TalkProviderConfigSchema = Type.Object(
|
||||
{
|
||||
voiceId: Type.Optional(Type.String()),
|
||||
voiceAliases: Type.Optional(Type.Record(Type.String(), Type.String())),
|
||||
modelId: Type.Optional(Type.String()),
|
||||
outputFormat: Type.Optional(Type.String()),
|
||||
apiKey: Type.Optional(Type.String()),
|
||||
},
|
||||
{ additionalProperties: true },
|
||||
);
|
||||
|
||||
export const TalkConfigResultSchema = Type.Object(
|
||||
{
|
||||
config: Type.Object(
|
||||
@@ -23,6 +34,8 @@ export const TalkConfigResultSchema = Type.Object(
|
||||
talk: Type.Optional(
|
||||
Type.Object(
|
||||
{
|
||||
provider: Type.Optional(Type.String()),
|
||||
providers: Type.Optional(Type.Record(Type.String(), TalkProviderConfigSchema)),
|
||||
voiceId: Type.Optional(Type.String()),
|
||||
voiceAliases: Type.Optional(Type.Record(Type.String(), Type.String())),
|
||||
modelId: Type.Optional(Type.String()),
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { readConfigFileSnapshot } from "../../config/config.js";
|
||||
import { redactConfigObject } from "../../config/redact-snapshot.js";
|
||||
import { buildTalkConfigResponse } from "../../config/talk.js";
|
||||
import {
|
||||
ErrorCodes,
|
||||
errorShape,
|
||||
@@ -17,46 +18,6 @@ function canReadTalkSecrets(client: { connect?: { scopes?: string[] } } | null):
|
||||
return scopes.includes(ADMIN_SCOPE) || scopes.includes(TALK_SECRETS_SCOPE);
|
||||
}
|
||||
|
||||
function normalizeTalkConfigSection(value: unknown): Record<string, unknown> | undefined {
|
||||
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
||||
return undefined;
|
||||
}
|
||||
const source = value as Record<string, unknown>;
|
||||
const talk: Record<string, unknown> = {};
|
||||
if (typeof source.voiceId === "string") {
|
||||
talk.voiceId = source.voiceId;
|
||||
}
|
||||
if (
|
||||
source.voiceAliases &&
|
||||
typeof source.voiceAliases === "object" &&
|
||||
!Array.isArray(source.voiceAliases)
|
||||
) {
|
||||
const aliases: Record<string, string> = {};
|
||||
for (const [alias, id] of Object.entries(source.voiceAliases as Record<string, unknown>)) {
|
||||
if (typeof id !== "string") {
|
||||
continue;
|
||||
}
|
||||
aliases[alias] = id;
|
||||
}
|
||||
if (Object.keys(aliases).length > 0) {
|
||||
talk.voiceAliases = aliases;
|
||||
}
|
||||
}
|
||||
if (typeof source.modelId === "string") {
|
||||
talk.modelId = source.modelId;
|
||||
}
|
||||
if (typeof source.outputFormat === "string") {
|
||||
talk.outputFormat = source.outputFormat;
|
||||
}
|
||||
if (typeof source.apiKey === "string") {
|
||||
talk.apiKey = source.apiKey;
|
||||
}
|
||||
if (typeof source.interruptOnSpeech === "boolean") {
|
||||
talk.interruptOnSpeech = source.interruptOnSpeech;
|
||||
}
|
||||
return Object.keys(talk).length > 0 ? talk : undefined;
|
||||
}
|
||||
|
||||
export const talkHandlers: GatewayRequestHandlers = {
|
||||
"talk.config": async ({ params, respond, client }) => {
|
||||
if (!validateTalkConfigParams(params)) {
|
||||
@@ -87,7 +48,7 @@ export const talkHandlers: GatewayRequestHandlers = {
|
||||
const talkSource = includeSecrets
|
||||
? snapshot.config.talk
|
||||
: redactConfigObject(snapshot.config.talk);
|
||||
const talk = normalizeTalkConfigSection(talkSource);
|
||||
const talk = buildTalkConfigResponse(talkSource);
|
||||
if (talk) {
|
||||
configPayload.talk = talk;
|
||||
}
|
||||
|
||||
@@ -79,12 +79,24 @@ describe("gateway talk.config", () => {
|
||||
|
||||
await withServer(async (ws) => {
|
||||
await connectOperator(ws, ["operator.read"]);
|
||||
const res = await rpcReq<{ config?: { talk?: { apiKey?: string; voiceId?: string } } }>(
|
||||
ws,
|
||||
"talk.config",
|
||||
{},
|
||||
);
|
||||
const res = await rpcReq<{
|
||||
config?: {
|
||||
talk?: {
|
||||
provider?: string;
|
||||
providers?: {
|
||||
elevenlabs?: { voiceId?: string; apiKey?: string };
|
||||
};
|
||||
apiKey?: string;
|
||||
voiceId?: string;
|
||||
};
|
||||
};
|
||||
}>(ws, "talk.config", {});
|
||||
expect(res.ok).toBe(true);
|
||||
expect(res.payload?.config?.talk?.provider).toBe("elevenlabs");
|
||||
expect(res.payload?.config?.talk?.providers?.elevenlabs?.voiceId).toBe("voice-123");
|
||||
expect(res.payload?.config?.talk?.providers?.elevenlabs?.apiKey).toBe(
|
||||
"__OPENCLAW_REDACTED__",
|
||||
);
|
||||
expect(res.payload?.config?.talk?.voiceId).toBe("voice-123");
|
||||
expect(res.payload?.config?.talk?.apiKey).toBe("__OPENCLAW_REDACTED__");
|
||||
});
|
||||
@@ -113,4 +125,38 @@ describe("gateway talk.config", () => {
|
||||
expect(res.payload?.config?.talk?.apiKey).toBe("secret-key-abc");
|
||||
});
|
||||
});
|
||||
|
||||
it("prefers normalized provider payload over conflicting legacy talk keys", async () => {
|
||||
const { writeConfigFile } = await import("../config/config.js");
|
||||
await writeConfigFile({
|
||||
talk: {
|
||||
provider: "elevenlabs",
|
||||
providers: {
|
||||
elevenlabs: {
|
||||
voiceId: "voice-normalized",
|
||||
},
|
||||
},
|
||||
voiceId: "voice-legacy",
|
||||
},
|
||||
});
|
||||
|
||||
await withServer(async (ws) => {
|
||||
await connectOperator(ws, ["operator.read"]);
|
||||
const res = await rpcReq<{
|
||||
config?: {
|
||||
talk?: {
|
||||
provider?: string;
|
||||
providers?: {
|
||||
elevenlabs?: { voiceId?: string };
|
||||
};
|
||||
voiceId?: string;
|
||||
};
|
||||
};
|
||||
}>(ws, "talk.config", {});
|
||||
expect(res.ok).toBe(true);
|
||||
expect(res.payload?.config?.talk?.provider).toBe("elevenlabs");
|
||||
expect(res.payload?.config?.talk?.providers?.elevenlabs?.voiceId).toBe("voice-normalized");
|
||||
expect(res.payload?.config?.talk?.voiceId).toBe("voice-normalized");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user