From 3c2cf277bbd26625cf6240aa58e6e8cc52fbd0b8 Mon Sep 17 00:00:00 2001 From: Carsten Abele Date: Tue, 7 Apr 2026 21:38:49 +0200 Subject: [PATCH] feat: add usage cost display in menu bar dropdown Shows session and cumulative audio duration and cost ($0.006/min) from the transcription.done event's usage.prompt_audio_seconds field. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../Models/TranscriptionManager.swift | 16 ++++++++++++++- .../MyVoxtral/Network/VoxtralMessages.swift | 19 ++++++++++++++++-- MyVoxtral/MyVoxtral/Views/MenuBarView.swift | 20 +++++++++++++++++++ 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/MyVoxtral/MyVoxtral/Models/TranscriptionManager.swift b/MyVoxtral/MyVoxtral/Models/TranscriptionManager.swift index 76f9312..0b483f9 100644 --- a/MyVoxtral/MyVoxtral/Models/TranscriptionManager.swift +++ b/MyVoxtral/MyVoxtral/Models/TranscriptionManager.swift @@ -8,8 +8,14 @@ enum RecordingState: Equatable { @MainActor final class TranscriptionManager: ObservableObject { + static let costPerSecond: Double = 0.0001 // $0.006/min for voxtral-mini-transcribe-realtime + @Published var state: RecordingState = .idle @Published var currentText: String = "" + @Published var sessionAudioSeconds: Int = 0 + @Published var totalAudioSeconds: Int = 0 + @Published var sessionCost: Double = 0 + @Published var totalCost: Double = 0 private let audioCapture = AudioCapture() private let wsClient = VoxtralWebSocketClient() @@ -33,6 +39,8 @@ final class TranscriptionManager: ObservableObject { } currentText = "" + sessionAudioSeconds = 0 + sessionCost = 0 hasRetried = false wsClient.onEvent = { [weak self] event in @@ -79,10 +87,16 @@ final class TranscriptionManager: ObservableObject { break case .language: break - case .done(let text): + case .done(let text, let audioSeconds): if currentText.isEmpty { currentText = text } + if let secs = audioSeconds { + sessionAudioSeconds = secs + sessionCost = Double(secs) * Self.costPerSecond + totalAudioSeconds += secs + totalCost += sessionCost + } case .error(let message): if !hasRetried && state == .recording { hasRetried = true diff --git a/MyVoxtral/MyVoxtral/Network/VoxtralMessages.swift b/MyVoxtral/MyVoxtral/Network/VoxtralMessages.swift index 7f9edbb..832d154 100644 --- a/MyVoxtral/MyVoxtral/Network/VoxtralMessages.swift +++ b/MyVoxtral/MyVoxtral/Network/VoxtralMessages.swift @@ -47,7 +47,7 @@ enum VoxtralEvent { case textDelta(String) case segment(text: String, start: Double, end: Double) case language(String) - case done(text: String) + case done(text: String, audioSeconds: Int?) case error(String) case unknown(String) } @@ -76,6 +76,21 @@ struct SegmentEvent: Decodable { struct DoneEvent: Decodable { let text: String + let usage: UsageInfo? +} + +struct UsageInfo: Decodable { + let promptTokens: Int? + let completionTokens: Int? + let totalTokens: Int? + let promptAudioSeconds: Int? + + enum CodingKeys: String, CodingKey { + case promptTokens = "prompt_tokens" + case completionTokens = "completion_tokens" + case totalTokens = "total_tokens" + case promptAudioSeconds = "prompt_audio_seconds" + } } struct ErrorEvent: Decodable { @@ -111,7 +126,7 @@ func parseVoxtralEvent(from data: Data) -> VoxtralEvent { return .language(e.audioLanguage) case "transcription.done": guard let e = try? JSONDecoder().decode(DoneEvent.self, from: data) else { return .unknown("") } - return .done(text: e.text) + return .done(text: e.text, audioSeconds: e.usage?.promptAudioSeconds) case "error": if let e = try? JSONDecoder().decode(ErrorEvent.self, from: data) { return .error(e.error?.message?.detail ?? "Unknown error") diff --git a/MyVoxtral/MyVoxtral/Views/MenuBarView.swift b/MyVoxtral/MyVoxtral/Views/MenuBarView.swift index 0ce2ba0..ac56a71 100644 --- a/MyVoxtral/MyVoxtral/Views/MenuBarView.swift +++ b/MyVoxtral/MyVoxtral/Views/MenuBarView.swift @@ -20,6 +20,19 @@ struct MenuBarView: View { .padding(.horizontal, 8) } + if manager.sessionCost > 0 || manager.totalCost > 0 { + Divider() + VStack(alignment: .leading, spacing: 2) { + if manager.sessionCost > 0 { + Text("Session: \(manager.sessionAudioSeconds)s — $\(manager.sessionCost, specifier: "%.4f")") + } + Text("Total: \(formatDuration(manager.totalAudioSeconds)) — $\(manager.totalCost, specifier: "%.4f")") + } + .font(.caption) + .foregroundStyle(.secondary) + .padding(.horizontal, 8) + } + Divider() Button("Show Transcription") { @@ -39,4 +52,11 @@ struct MenuBarView: View { } .padding(.vertical, 4) } + + private func formatDuration(_ seconds: Int) -> String { + if seconds < 60 { return "\(seconds)s" } + let min = seconds / 60 + let sec = seconds % 60 + return "\(min)m \(sec)s" + } }