feat: add Voxtral WebSocket message types and parser
This commit is contained in:
parent
4b1cae1b5f
commit
e5395017c2
1 changed files with 123 additions and 0 deletions
123
MyVoxtral/MyVoxtral/Network/VoxtralMessages.swift
Normal file
123
MyVoxtral/MyVoxtral/Network/VoxtralMessages.swift
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
import Foundation
|
||||
|
||||
// MARK: - Outbound Messages (Client → Server)
|
||||
|
||||
struct AudioAppendMessage: Encodable {
|
||||
let type = "input_audio.append"
|
||||
let audio: String // base64-encoded PCM
|
||||
}
|
||||
|
||||
struct AudioFlushMessage: Encodable {
|
||||
let type = "input_audio.flush"
|
||||
}
|
||||
|
||||
struct AudioEndMessage: Encodable {
|
||||
let type = "input_audio.end"
|
||||
}
|
||||
|
||||
struct SessionUpdateMessage: Encodable {
|
||||
let type = "session.update"
|
||||
let session: SessionConfig
|
||||
}
|
||||
|
||||
struct SessionConfig: Encodable {
|
||||
let audioFormat: AudioFormatConfig
|
||||
let targetStreamingDelayMs: Int
|
||||
|
||||
enum CodingKeys: String, CodingKey {
|
||||
case audioFormat = "audio_format"
|
||||
case targetStreamingDelayMs = "target_streaming_delay_ms"
|
||||
}
|
||||
}
|
||||
|
||||
struct AudioFormatConfig: Encodable {
|
||||
let encoding = "pcm_s16le"
|
||||
let sampleRate = 16000
|
||||
|
||||
enum CodingKeys: String, CodingKey {
|
||||
case encoding
|
||||
case sampleRate = "sample_rate"
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Inbound Messages (Server → Client)
|
||||
|
||||
enum VoxtralEvent {
|
||||
case sessionCreated
|
||||
case textDelta(String)
|
||||
case segment(text: String, start: Double, end: Double)
|
||||
case language(String)
|
||||
case done(text: String)
|
||||
case error(String)
|
||||
case unknown(String)
|
||||
}
|
||||
|
||||
struct IncomingEvent: Decodable {
|
||||
let type: String
|
||||
}
|
||||
|
||||
struct TextDeltaEvent: Decodable {
|
||||
let text: String
|
||||
}
|
||||
|
||||
struct LanguageEvent: Decodable {
|
||||
let audioLanguage: String
|
||||
|
||||
enum CodingKeys: String, CodingKey {
|
||||
case audioLanguage = "audio_language"
|
||||
}
|
||||
}
|
||||
|
||||
struct SegmentEvent: Decodable {
|
||||
let text: String
|
||||
let start: Double
|
||||
let end: Double
|
||||
}
|
||||
|
||||
struct DoneEvent: Decodable {
|
||||
let text: String
|
||||
}
|
||||
|
||||
struct ErrorEvent: Decodable {
|
||||
let error: ErrorDetail?
|
||||
}
|
||||
|
||||
struct ErrorDetail: Decodable {
|
||||
let message: ErrorMessage?
|
||||
}
|
||||
|
||||
struct ErrorMessage: Decodable {
|
||||
let detail: String?
|
||||
}
|
||||
|
||||
// MARK: - Event Parsing
|
||||
|
||||
func parseVoxtralEvent(from data: Data) -> VoxtralEvent {
|
||||
guard let envelope = try? JSONDecoder().decode(IncomingEvent.self, from: data) else {
|
||||
return .unknown(String(data: data, encoding: .utf8) ?? "")
|
||||
}
|
||||
|
||||
switch envelope.type {
|
||||
case "session.created":
|
||||
return .sessionCreated
|
||||
case "transcription.text.delta":
|
||||
guard let e = try? JSONDecoder().decode(TextDeltaEvent.self, from: data) else { return .unknown("") }
|
||||
return .textDelta(e.text)
|
||||
case "transcription.segment":
|
||||
guard let e = try? JSONDecoder().decode(SegmentEvent.self, from: data) else { return .unknown("") }
|
||||
return .segment(text: e.text, start: e.start, end: e.end)
|
||||
case "transcription.language":
|
||||
guard let e = try? JSONDecoder().decode(LanguageEvent.self, from: data) else { return .unknown("") }
|
||||
return .language(e.audioLanguage)
|
||||
case "transcription.done":
|
||||
guard let e = try? JSONDecoder().decode(DoneEvent.self, from: data) else { return .unknown("") }
|
||||
return .done(text: e.text)
|
||||
case "error":
|
||||
if let e = try? JSONDecoder().decode(ErrorEvent.self, from: data) {
|
||||
return .error(e.error?.message?.detail ?? "Unknown error")
|
||||
}
|
||||
return .error("Unknown error")
|
||||
default:
|
||||
return .unknown(envelope.type)
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue