feat: add TranscriptionManager orchestrating audio, WS, and output

This commit is contained in:
Carsten Abele 2026-04-07 19:42:41 +02:00
parent 99b091abc8
commit 285b833ba9

View file

@ -0,0 +1,99 @@
import SwiftUI
enum RecordingState: Equatable {
case idle
case recording
case error(String)
}
@MainActor
final class TranscriptionManager: ObservableObject {
@Published var state: RecordingState = .idle
@Published var currentText: String = ""
private let audioCapture = AudioCapture()
private let wsClient = VoxtralWebSocketClient()
private let settings = AppSettings.shared
private var hasRetried = false
var isRecording: Bool { state == .recording }
func toggle() {
if isRecording {
stop()
} else {
start()
}
}
func start() {
guard settings.hasAPIKey else {
state = .error("No API key set. Open Settings.")
return
}
currentText = ""
hasRetried = false
wsClient.onEvent = { [weak self] event in
self?.handleEvent(event)
}
wsClient.connect(apiKey: settings.apiKey, delayMs: settings.streamingDelayMs)
audioCapture.onChunk = { [weak self] chunk in
Task { @MainActor in
self?.wsClient.sendAudio(chunk)
}
}
do {
try audioCapture.start()
state = .recording
} catch {
state = .error("Mic error: \(error.localizedDescription)")
}
}
func stop() {
audioCapture.stop()
wsClient.flush()
wsClient.disconnect()
state = .idle
if !currentText.isEmpty {
TranscriptionLogger.append(text: currentText)
}
}
private func handleEvent(_ event: VoxtralEvent) {
switch event {
case .sessionCreated:
break
case .textDelta(let text):
currentText += text
if settings.outputMode == .cursorInjection {
CursorInjector.typeText(text)
}
case .segment:
break
case .language:
break
case .done(let text):
if currentText.isEmpty {
currentText = text
}
case .error(let message):
if !hasRetried && state == .recording {
hasRetried = true
wsClient.disconnect()
wsClient.connect(apiKey: settings.apiKey, delayMs: settings.streamingDelayMs)
} else {
state = .error(message)
audioCapture.stop()
}
case .unknown:
break
}
}
}