myVoxtral/MyVoxtral/MyVoxtral/Network/VoxtralWebSocketClient.swift
Carsten Abele 89b1c12d58 feat: auto-stop on silence, fix transcription hangs
- Auto-stop recording after 20 seconds of continuous silence
  (RMS-based silence detection in AudioCapture)
- Add WebSocket ping every 10s to keep connection alive
- Auto-reconnect if no events received for 30s (stale connection)
- Add diagnostic logging for all event types to help debug issues

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 13:02:21 +02:00

106 lines
3.6 KiB
Swift

import Foundation
@MainActor
final class VoxtralWebSocketClient {
private var webSocketTask: URLSessionWebSocketTask?
private var session: URLSession?
private let encoder = JSONEncoder()
private var intentionalDisconnect = false
var onEvent: ((VoxtralEvent) -> Void)?
func connect(apiKey: String, model: String = "voxtral-mini-transcribe-realtime-2602", delayMs: Int) {
var components = URLComponents(string: "wss://api.mistral.ai/v1/audio/transcriptions/realtime")!
components.queryItems = [URLQueryItem(name: "model", value: model)]
guard let url = components.url else { return }
var request = URLRequest(url: url)
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
let config = URLSessionConfiguration.default
config.httpAdditionalHeaders = ["Authorization": "Bearer \(apiKey)"]
session = URLSession(configuration: config)
webSocketTask = session?.webSocketTask(with: request)
webSocketTask?.resume()
intentionalDisconnect = false
// Start receiving first, then send session config
receiveLoop()
// Send session config after connection
let sessionConfig = SessionUpdateMessage(
session: SessionConfig(
audioFormat: AudioFormatConfig(),
targetStreamingDelayMs: delayMs
)
)
sendJSON(sessionConfig)
}
func sendAudio(_ pcmData: Data) {
let base64 = pcmData.base64EncodedString()
let msg = AudioAppendMessage(audio: base64)
sendJSON(msg)
}
func flush() {
sendJSON(AudioFlushMessage())
}
func ping() {
webSocketTask?.sendPing { error in
if let error {
print("[MyVoxtral] Ping failed: \(error.localizedDescription)")
}
}
}
func disconnect() {
intentionalDisconnect = true
sendJSON(AudioEndMessage())
webSocketTask?.cancel(with: .normalClosure, reason: nil)
webSocketTask = nil
session?.invalidateAndCancel()
session = nil
}
private func sendJSON<T: Encodable>(_ value: T) {
guard let data = try? encoder.encode(value),
let string = String(data: data, encoding: .utf8) else { return }
webSocketTask?.send(.string(string)) { error in
if let error {
print("WebSocket send error: \(error)")
}
}
}
private func receiveLoop() {
webSocketTask?.receive { [weak self] result in
switch result {
case .success(let message):
switch message {
case .string(let text):
if let data = text.data(using: .utf8) {
let event = parseVoxtralEvent(from: data)
Task { @MainActor in
self?.onEvent?(event)
}
}
case .data(let data):
let event = parseVoxtralEvent(from: data)
Task { @MainActor in
self?.onEvent?(event)
}
@unknown default:
break
}
Task { @MainActor in self?.receiveLoop() }
case .failure(let error):
Task { @MainActor in
guard let self, !self.intentionalDisconnect else { return }
self.onEvent?(.error("Connection lost: \(error.localizedDescription)"))
}
}
}
}
}