feat: add Voxtral WebSocket message types and parser
This commit is contained in:
parent
4b1cae1b5f
commit
e5395017c2
1 changed files with 123 additions and 0 deletions
123
MyVoxtral/MyVoxtral/Network/VoxtralMessages.swift
Normal file
123
MyVoxtral/MyVoxtral/Network/VoxtralMessages.swift
Normal file
|
|
@ -0,0 +1,123 @@
|
||||||
|
import Foundation
|
||||||
|
|
||||||
|
// MARK: - Outbound Messages (Client → Server)
|
||||||
|
|
||||||
|
struct AudioAppendMessage: Encodable {
|
||||||
|
let type = "input_audio.append"
|
||||||
|
let audio: String // base64-encoded PCM
|
||||||
|
}
|
||||||
|
|
||||||
|
struct AudioFlushMessage: Encodable {
|
||||||
|
let type = "input_audio.flush"
|
||||||
|
}
|
||||||
|
|
||||||
|
struct AudioEndMessage: Encodable {
|
||||||
|
let type = "input_audio.end"
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SessionUpdateMessage: Encodable {
|
||||||
|
let type = "session.update"
|
||||||
|
let session: SessionConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SessionConfig: Encodable {
|
||||||
|
let audioFormat: AudioFormatConfig
|
||||||
|
let targetStreamingDelayMs: Int
|
||||||
|
|
||||||
|
enum CodingKeys: String, CodingKey {
|
||||||
|
case audioFormat = "audio_format"
|
||||||
|
case targetStreamingDelayMs = "target_streaming_delay_ms"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct AudioFormatConfig: Encodable {
|
||||||
|
let encoding = "pcm_s16le"
|
||||||
|
let sampleRate = 16000
|
||||||
|
|
||||||
|
enum CodingKeys: String, CodingKey {
|
||||||
|
case encoding
|
||||||
|
case sampleRate = "sample_rate"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Inbound Messages (Server → Client)
|
||||||
|
|
||||||
|
enum VoxtralEvent {
|
||||||
|
case sessionCreated
|
||||||
|
case textDelta(String)
|
||||||
|
case segment(text: String, start: Double, end: Double)
|
||||||
|
case language(String)
|
||||||
|
case done(text: String)
|
||||||
|
case error(String)
|
||||||
|
case unknown(String)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct IncomingEvent: Decodable {
|
||||||
|
let type: String
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TextDeltaEvent: Decodable {
|
||||||
|
let text: String
|
||||||
|
}
|
||||||
|
|
||||||
|
struct LanguageEvent: Decodable {
|
||||||
|
let audioLanguage: String
|
||||||
|
|
||||||
|
enum CodingKeys: String, CodingKey {
|
||||||
|
case audioLanguage = "audio_language"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SegmentEvent: Decodable {
|
||||||
|
let text: String
|
||||||
|
let start: Double
|
||||||
|
let end: Double
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DoneEvent: Decodable {
|
||||||
|
let text: String
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ErrorEvent: Decodable {
|
||||||
|
let error: ErrorDetail?
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ErrorDetail: Decodable {
|
||||||
|
let message: ErrorMessage?
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ErrorMessage: Decodable {
|
||||||
|
let detail: String?
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Event Parsing
|
||||||
|
|
||||||
|
func parseVoxtralEvent(from data: Data) -> VoxtralEvent {
|
||||||
|
guard let envelope = try? JSONDecoder().decode(IncomingEvent.self, from: data) else {
|
||||||
|
return .unknown(String(data: data, encoding: .utf8) ?? "")
|
||||||
|
}
|
||||||
|
|
||||||
|
switch envelope.type {
|
||||||
|
case "session.created":
|
||||||
|
return .sessionCreated
|
||||||
|
case "transcription.text.delta":
|
||||||
|
guard let e = try? JSONDecoder().decode(TextDeltaEvent.self, from: data) else { return .unknown("") }
|
||||||
|
return .textDelta(e.text)
|
||||||
|
case "transcription.segment":
|
||||||
|
guard let e = try? JSONDecoder().decode(SegmentEvent.self, from: data) else { return .unknown("") }
|
||||||
|
return .segment(text: e.text, start: e.start, end: e.end)
|
||||||
|
case "transcription.language":
|
||||||
|
guard let e = try? JSONDecoder().decode(LanguageEvent.self, from: data) else { return .unknown("") }
|
||||||
|
return .language(e.audioLanguage)
|
||||||
|
case "transcription.done":
|
||||||
|
guard let e = try? JSONDecoder().decode(DoneEvent.self, from: data) else { return .unknown("") }
|
||||||
|
return .done(text: e.text)
|
||||||
|
case "error":
|
||||||
|
if let e = try? JSONDecoder().decode(ErrorEvent.self, from: data) {
|
||||||
|
return .error(e.error?.message?.detail ?? "Unknown error")
|
||||||
|
}
|
||||||
|
return .error("Unknown error")
|
||||||
|
default:
|
||||||
|
return .unknown(envelope.type)
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue