import Foundation @MainActor final class VoxtralWebSocketClient { private var webSocketTask: URLSessionWebSocketTask? private var session: URLSession? private let encoder = JSONEncoder() private var intentionalDisconnect = false var onEvent: ((VoxtralEvent) -> Void)? func connect(apiKey: String, model: String = "voxtral-mini-transcribe-realtime-2602", delayMs: Int) { var components = URLComponents(string: "wss://api.mistral.ai/v1/audio/transcriptions/realtime")! components.queryItems = [URLQueryItem(name: "model", value: model)] guard let url = components.url else { return } var request = URLRequest(url: url) request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") let config = URLSessionConfiguration.default config.httpAdditionalHeaders = ["Authorization": "Bearer \(apiKey)"] session = URLSession(configuration: config) webSocketTask = session?.webSocketTask(with: request) webSocketTask?.resume() intentionalDisconnect = false // Start receiving first, then send session config receiveLoop() // Send session config after connection let sessionConfig = SessionUpdateMessage( session: SessionConfig( audioFormat: AudioFormatConfig(), targetStreamingDelayMs: delayMs ) ) sendJSON(sessionConfig) } func sendAudio(_ pcmData: Data) { let base64 = pcmData.base64EncodedString() let msg = AudioAppendMessage(audio: base64) sendJSON(msg) } func flush() { sendJSON(AudioFlushMessage()) } func ping() { webSocketTask?.sendPing { error in if let error { print("[MyVoxtral] Ping failed: \(error.localizedDescription)") } } } func disconnect() { intentionalDisconnect = true sendJSON(AudioEndMessage()) webSocketTask?.cancel(with: .normalClosure, reason: nil) webSocketTask = nil session?.invalidateAndCancel() session = nil } private func sendJSON(_ value: T) { guard let data = try? encoder.encode(value), let string = String(data: data, encoding: .utf8) else { return } webSocketTask?.send(.string(string)) { error in if let error { print("WebSocket send error: \(error)") } } } private func receiveLoop() { webSocketTask?.receive { [weak self] result in switch result { case .success(let message): switch message { case .string(let text): if let data = text.data(using: .utf8) { let event = parseVoxtralEvent(from: data) Task { @MainActor in self?.onEvent?(event) } } case .data(let data): let event = parseVoxtralEvent(from: data) Task { @MainActor in self?.onEvent?(event) } @unknown default: break } Task { @MainActor in self?.receiveLoop() } case .failure(let error): Task { @MainActor in guard let self, !self.intentionalDisconnect else { return } self.onEvent?(.error("Connection lost: \(error.localizedDescription)")) } } } } }