- Auto-stop recording after 20 seconds of continuous silence (RMS-based silence detection in AudioCapture) - Add WebSocket ping every 10s to keep connection alive - Auto-reconnect if no events received for 30s (stale connection) - Add diagnostic logging for all event types to help debug issues Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
106 lines
3.6 KiB
Swift
106 lines
3.6 KiB
Swift
import Foundation
|
|
|
|
@MainActor
|
|
final class VoxtralWebSocketClient {
|
|
private var webSocketTask: URLSessionWebSocketTask?
|
|
private var session: URLSession?
|
|
private let encoder = JSONEncoder()
|
|
private var intentionalDisconnect = false
|
|
|
|
var onEvent: ((VoxtralEvent) -> Void)?
|
|
|
|
func connect(apiKey: String, model: String = "voxtral-mini-transcribe-realtime-2602", delayMs: Int) {
|
|
var components = URLComponents(string: "wss://api.mistral.ai/v1/audio/transcriptions/realtime")!
|
|
components.queryItems = [URLQueryItem(name: "model", value: model)]
|
|
guard let url = components.url else { return }
|
|
|
|
var request = URLRequest(url: url)
|
|
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
|
|
|
|
let config = URLSessionConfiguration.default
|
|
config.httpAdditionalHeaders = ["Authorization": "Bearer \(apiKey)"]
|
|
session = URLSession(configuration: config)
|
|
webSocketTask = session?.webSocketTask(with: request)
|
|
webSocketTask?.resume()
|
|
|
|
intentionalDisconnect = false
|
|
// Start receiving first, then send session config
|
|
receiveLoop()
|
|
|
|
// Send session config after connection
|
|
let sessionConfig = SessionUpdateMessage(
|
|
session: SessionConfig(
|
|
audioFormat: AudioFormatConfig(),
|
|
targetStreamingDelayMs: delayMs
|
|
)
|
|
)
|
|
sendJSON(sessionConfig)
|
|
}
|
|
|
|
func sendAudio(_ pcmData: Data) {
|
|
let base64 = pcmData.base64EncodedString()
|
|
let msg = AudioAppendMessage(audio: base64)
|
|
sendJSON(msg)
|
|
}
|
|
|
|
func flush() {
|
|
sendJSON(AudioFlushMessage())
|
|
}
|
|
|
|
func ping() {
|
|
webSocketTask?.sendPing { error in
|
|
if let error {
|
|
print("[MyVoxtral] Ping failed: \(error.localizedDescription)")
|
|
}
|
|
}
|
|
}
|
|
|
|
func disconnect() {
|
|
intentionalDisconnect = true
|
|
sendJSON(AudioEndMessage())
|
|
webSocketTask?.cancel(with: .normalClosure, reason: nil)
|
|
webSocketTask = nil
|
|
session?.invalidateAndCancel()
|
|
session = nil
|
|
}
|
|
|
|
private func sendJSON<T: Encodable>(_ value: T) {
|
|
guard let data = try? encoder.encode(value),
|
|
let string = String(data: data, encoding: .utf8) else { return }
|
|
webSocketTask?.send(.string(string)) { error in
|
|
if let error {
|
|
print("WebSocket send error: \(error)")
|
|
}
|
|
}
|
|
}
|
|
|
|
private func receiveLoop() {
|
|
webSocketTask?.receive { [weak self] result in
|
|
switch result {
|
|
case .success(let message):
|
|
switch message {
|
|
case .string(let text):
|
|
if let data = text.data(using: .utf8) {
|
|
let event = parseVoxtralEvent(from: data)
|
|
Task { @MainActor in
|
|
self?.onEvent?(event)
|
|
}
|
|
}
|
|
case .data(let data):
|
|
let event = parseVoxtralEvent(from: data)
|
|
Task { @MainActor in
|
|
self?.onEvent?(event)
|
|
}
|
|
@unknown default:
|
|
break
|
|
}
|
|
Task { @MainActor in self?.receiveLoop() }
|
|
case .failure(let error):
|
|
Task { @MainActor in
|
|
guard let self, !self.intentionalDisconnect else { return }
|
|
self.onEvent?(.error("Connection lost: \(error.localizedDescription)"))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|