feat: add usage cost display in menu bar dropdown

Shows session and cumulative audio duration and cost ($0.006/min)
from the transcription.done event's usage.prompt_audio_seconds field.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Carsten Abele 2026-04-07 21:38:49 +02:00
parent b57bec4273
commit 3c2cf277bb
3 changed files with 52 additions and 3 deletions

View file

@ -8,8 +8,14 @@ enum RecordingState: Equatable {
@MainActor @MainActor
final class TranscriptionManager: ObservableObject { final class TranscriptionManager: ObservableObject {
static let costPerSecond: Double = 0.0001 // $0.006/min for voxtral-mini-transcribe-realtime
@Published var state: RecordingState = .idle @Published var state: RecordingState = .idle
@Published var currentText: String = "" @Published var currentText: String = ""
@Published var sessionAudioSeconds: Int = 0
@Published var totalAudioSeconds: Int = 0
@Published var sessionCost: Double = 0
@Published var totalCost: Double = 0
private let audioCapture = AudioCapture() private let audioCapture = AudioCapture()
private let wsClient = VoxtralWebSocketClient() private let wsClient = VoxtralWebSocketClient()
@ -33,6 +39,8 @@ final class TranscriptionManager: ObservableObject {
} }
currentText = "" currentText = ""
sessionAudioSeconds = 0
sessionCost = 0
hasRetried = false hasRetried = false
wsClient.onEvent = { [weak self] event in wsClient.onEvent = { [weak self] event in
@ -79,10 +87,16 @@ final class TranscriptionManager: ObservableObject {
break break
case .language: case .language:
break break
case .done(let text): case .done(let text, let audioSeconds):
if currentText.isEmpty { if currentText.isEmpty {
currentText = text currentText = text
} }
if let secs = audioSeconds {
sessionAudioSeconds = secs
sessionCost = Double(secs) * Self.costPerSecond
totalAudioSeconds += secs
totalCost += sessionCost
}
case .error(let message): case .error(let message):
if !hasRetried && state == .recording { if !hasRetried && state == .recording {
hasRetried = true hasRetried = true

View file

@ -47,7 +47,7 @@ enum VoxtralEvent {
case textDelta(String) case textDelta(String)
case segment(text: String, start: Double, end: Double) case segment(text: String, start: Double, end: Double)
case language(String) case language(String)
case done(text: String) case done(text: String, audioSeconds: Int?)
case error(String) case error(String)
case unknown(String) case unknown(String)
} }
@ -76,6 +76,21 @@ struct SegmentEvent: Decodable {
struct DoneEvent: Decodable { struct DoneEvent: Decodable {
let text: String let text: String
let usage: UsageInfo?
}
struct UsageInfo: Decodable {
let promptTokens: Int?
let completionTokens: Int?
let totalTokens: Int?
let promptAudioSeconds: Int?
enum CodingKeys: String, CodingKey {
case promptTokens = "prompt_tokens"
case completionTokens = "completion_tokens"
case totalTokens = "total_tokens"
case promptAudioSeconds = "prompt_audio_seconds"
}
} }
struct ErrorEvent: Decodable { struct ErrorEvent: Decodable {
@ -111,7 +126,7 @@ func parseVoxtralEvent(from data: Data) -> VoxtralEvent {
return .language(e.audioLanguage) return .language(e.audioLanguage)
case "transcription.done": case "transcription.done":
guard let e = try? JSONDecoder().decode(DoneEvent.self, from: data) else { return .unknown("") } guard let e = try? JSONDecoder().decode(DoneEvent.self, from: data) else { return .unknown("") }
return .done(text: e.text) return .done(text: e.text, audioSeconds: e.usage?.promptAudioSeconds)
case "error": case "error":
if let e = try? JSONDecoder().decode(ErrorEvent.self, from: data) { if let e = try? JSONDecoder().decode(ErrorEvent.self, from: data) {
return .error(e.error?.message?.detail ?? "Unknown error") return .error(e.error?.message?.detail ?? "Unknown error")

View file

@ -20,6 +20,19 @@ struct MenuBarView: View {
.padding(.horizontal, 8) .padding(.horizontal, 8)
} }
if manager.sessionCost > 0 || manager.totalCost > 0 {
Divider()
VStack(alignment: .leading, spacing: 2) {
if manager.sessionCost > 0 {
Text("Session: \(manager.sessionAudioSeconds)s — $\(manager.sessionCost, specifier: "%.4f")")
}
Text("Total: \(formatDuration(manager.totalAudioSeconds)) — $\(manager.totalCost, specifier: "%.4f")")
}
.font(.caption)
.foregroundStyle(.secondary)
.padding(.horizontal, 8)
}
Divider() Divider()
Button("Show Transcription") { Button("Show Transcription") {
@ -39,4 +52,11 @@ struct MenuBarView: View {
} }
.padding(.vertical, 4) .padding(.vertical, 4)
} }
private func formatDuration(_ seconds: Int) -> String {
if seconds < 60 { return "\(seconds)s" }
let min = seconds / 60
let sec = seconds % 60
return "\(min)m \(sec)s"
}
} }