fix(ios): guard talk TTS callbacks to active utterance (#33304)

Merged via squash.

Prepared head SHA: dd88886e41
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Co-authored-by: mbelinky <132747814+mbelinky@users.noreply.github.com>
Reviewed-by: @mbelinky
This commit is contained in:
Mariano
2026-03-03 22:34:09 +00:00
committed by GitHub
parent a8dd9ffea1
commit 22e33ddda9
2 changed files with 34 additions and 5 deletions

View File

@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Docs/security hardening guidance: document Docker `DOCKER-USER` + UFW policy and add cross-linking from Docker install docs for VPS/public-host setups. (#27613) thanks @dorukardahan.
- iOS/Voice timing safety: guard system speech start/finish callbacks to the active utterance to avoid misattributed start events during rapid stop/restart cycles. (#33304) thanks @mbelinky; original implementation direction by @ngutman.
- Docs/tool-loop detection config keys: align `docs/tools/loop-detection.md` examples and field names with the current `tools.loopDetection` schema to prevent copy-paste validation failures from outdated keys. (#33182) Thanks @Mylszd.
- Gateway/session agent discovery: include disk-scanned agent IDs in `listConfiguredAgentIds` even when `agents.list` is configured, so disk-only/ACP agent sessions remain visible in gateway session aggregation and listings. (#32831) thanks @Sid-Qin.
- Discord/inbound debouncer: skip bot-own MESSAGE_CREATE events before they reach the debounce queue to avoid self-triggered slowdowns in busy servers. Thanks @thewilloftheshadow.

View File

@@ -12,6 +12,7 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
private let synth = AVSpeechSynthesizer()
private var speakContinuation: CheckedContinuation<Void, Error>?
private var currentUtterance: AVSpeechUtterance?
private var didStartCallback: (() -> Void)?
private var currentToken = UUID()
private var watchdog: Task<Void, Never>?
@@ -26,17 +27,23 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
self.currentToken = UUID()
self.watchdog?.cancel()
self.watchdog = nil
self.didStartCallback = nil
self.synth.stopSpeaking(at: .immediate)
self.finishCurrent(with: SpeakError.canceled)
}
public func speak(text: String, language: String? = nil) async throws {
public func speak(
text: String,
language: String? = nil,
onStart: (() -> Void)? = nil
) async throws {
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty else { return }
self.stop()
let token = UUID()
self.currentToken = token
self.didStartCallback = onStart
let utterance = AVSpeechUtterance(string: trimmed)
if let language, let voice = AVSpeechSynthesisVoice(language: language) {
@@ -76,8 +83,13 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
}
}
private func handleFinish(error: Error?) {
guard self.currentUtterance != nil else { return }
private func matchesCurrentUtterance(_ utteranceID: ObjectIdentifier) -> Bool {
guard let currentUtterance = self.currentUtterance else { return false }
return ObjectIdentifier(currentUtterance) == utteranceID
}
private func handleFinish(utteranceID: ObjectIdentifier, error: Error?) {
guard self.matchesCurrentUtterance(utteranceID) else { return }
self.watchdog?.cancel()
self.watchdog = nil
self.finishCurrent(with: error)
@@ -85,6 +97,7 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
private func finishCurrent(with error: Error?) {
self.currentUtterance = nil
self.didStartCallback = nil
let cont = self.speakContinuation
self.speakContinuation = nil
if let error {
@@ -96,12 +109,26 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
}
extension TalkSystemSpeechSynthesizer: AVSpeechSynthesizerDelegate {
public nonisolated func speechSynthesizer(
_ synthesizer: AVSpeechSynthesizer,
didStart utterance: AVSpeechUtterance)
{
let utteranceID = ObjectIdentifier(utterance)
Task { @MainActor in
guard self.matchesCurrentUtterance(utteranceID) else { return }
let callback = self.didStartCallback
self.didStartCallback = nil
callback?()
}
}
public nonisolated func speechSynthesizer(
_ synthesizer: AVSpeechSynthesizer,
didFinish utterance: AVSpeechUtterance)
{
let utteranceID = ObjectIdentifier(utterance)
Task { @MainActor in
self.handleFinish(error: nil)
self.handleFinish(utteranceID: utteranceID, error: nil)
}
}
@@ -109,8 +136,9 @@ extension TalkSystemSpeechSynthesizer: AVSpeechSynthesizerDelegate {
_ synthesizer: AVSpeechSynthesizer,
didCancel utterance: AVSpeechUtterance)
{
let utteranceID = ObjectIdentifier(utterance)
Task { @MainActor in
self.handleFinish(error: SpeakError.canceled)
self.handleFinish(utteranceID: utteranceID, error: SpeakError.canceled)
}
}
}