ios – AVSpeechSynthesizer gets terminated immediately without speaking


Here is my AVSpeechSynthesizer and AVSpeechSynthesizerDelegate wrapped into an actor for better usage and testing:

import AVFAudio.AVSpeechSynthesis

actor SpeechSynthesizer {
    var delegate: SpeechSynthesisDelegate?
    var synthesizer: AVSpeechSynthesizer?

    enum DelegateAction: Equatable {
        case didCancel(AVSpeechUtterance)
        case didContinue(AVSpeechUtterance)
        case didFinish(AVSpeechUtterance)
        case didPause(AVSpeechUtterance)
        case didStart(AVSpeechUtterance)
    }

    func stop() {
        self.synthesizer?.stopSpeaking(at: .immediate)
    }

    func start(text: String) async throws -> DelegateAction {
        self.stop()

        let stream = AsyncThrowingStream<DelegateAction, Error> { continuation in
            self.delegate = SpeechSynthesisDelegate(
                didCancel: { utterance in
                    continuation.yield(.didCancel(utterance))
                }, didContinue: { utterance in
                    continuation.yield(.didContinue(utterance))
                }, didFinish: { utterance in
                    continuation.yield(.didFinish(utterance))
                    continuation.finish()
                }, didPause: { utterance in
                    continuation.yield(.didPause(utterance))
                }, didStart: { utterance in
                    continuation.yield(.didStart(utterance))
                }
            )
            let synthesizer = AVSpeechSynthesizer()
            self.synthesizer = synthesizer
            synthesizer.delegate = self.delegate

            continuation.onTermination = { [weak synthesizer] _ in
                synthesizer?.stopSpeaking(at: .immediate)
            }

            let utterance = AVSpeechUtterance(string: text)
            utterance.voice = AVSpeechSynthesisVoice(identifier: "en-US")
            utterance.rate = 0.52
            self.synthesizer?.speak(utterance)
        }

        for try await didChange in stream {
            return didChange
        }
        throw CancellationError()
    }
}

final class SpeechSynthesisDelegate: NSObject, AVSpeechSynthesizerDelegate, Sendable {
    let didCancel: @Sendable (AVSpeechUtterance) -> Void
    let didContinue: @Sendable (AVSpeechUtterance) -> Void
    let didFinish: @Sendable (AVSpeechUtterance) -> Void
    let didPause: @Sendable (AVSpeechUtterance) -> Void
    let didStart: @Sendable (AVSpeechUtterance) -> Void

    init(
        didCancel: @escaping @Sendable (AVSpeechUtterance) -> Void,
        didContinue: @escaping @Sendable (AVSpeechUtterance) -> Void,
        didFinish: @escaping @Sendable (AVSpeechUtterance) -> Void,
        didPause: @escaping @Sendable (AVSpeechUtterance) -> Void,
        didStart: @escaping @Sendable (AVSpeechUtterance) -> Void
    ) {
        self.didCancel = didCancel
        self.didContinue = didContinue
        self.didFinish = didFinish
        self.didPause = didPause
        self.didStart = didStart
    }

    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didCancel utterance: AVSpeechUtterance) {
        self.didCancel(utterance)
    }

    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didContinue utterance: AVSpeechUtterance) {
        self.didContinue(utterance)
    }

    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
        self.didFinish(utterance)
    }

    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didPause utterance: AVSpeechUtterance) {
        self.didPause(utterance)
    }

    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didStart utterance: AVSpeechUtterance) {
        self.didStart(utterance)
    }
}

Are is a sample App to use

import SwiftUI

@main
struct SampleApp: App {
    private let synthesizer = SpeechSynthesizer()

    var body: some Scene {
        WindowGroup {
            Button {
                Task {
                    do {
                        let result = try await synthesizer.start(text: "Hello, world!")
                        switch result {
                        case .didFinish(let utterance):
                            print("Finished speaking: \(utterance.speechString)")
                        case .didStart(let utterance):
                            print("Started speaking: \(utterance.speechString)")
                        default:
                            break
                        }
                    } catch {
                        print("Speech synthesis error: \(error)")
                    }
                }
            } label: {
                Text("Speak")
            }
        }
    }
}

On button tap, I am receiving the Started speaking: Hello, world! on the console but nothing is spoken and the Finished speaking: Hello, world! is not called either. Tested on simulator + device.

Having set a breakpoint at

continuation.onTermination = { [weak synthesizer] _ in
>>>>>    synthesizer?.stopSpeaking(at: .immediate)
}

I am guessing that the weak reference on synthesizer “deinit” the synthesizer immediately and nothing is spoken.

Any guess on how to solve this?

Latest articles

spot_imgspot_img

Related articles

Leave a reply

Please enter your comment!
Please enter your name here

spot_imgspot_img