feat: add usage cost display in menu bar dropdown
Shows session and cumulative audio duration and cost ($0.006/min) from the transcription.done event's usage.prompt_audio_seconds field. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b57bec4273
commit
3c2cf277bb
3 changed files with 52 additions and 3 deletions
|
|
@ -8,8 +8,14 @@ enum RecordingState: Equatable {
|
||||||
|
|
||||||
@MainActor
|
@MainActor
|
||||||
final class TranscriptionManager: ObservableObject {
|
final class TranscriptionManager: ObservableObject {
|
||||||
|
static let costPerSecond: Double = 0.0001 // $0.006/min for voxtral-mini-transcribe-realtime
|
||||||
|
|
||||||
@Published var state: RecordingState = .idle
|
@Published var state: RecordingState = .idle
|
||||||
@Published var currentText: String = ""
|
@Published var currentText: String = ""
|
||||||
|
@Published var sessionAudioSeconds: Int = 0
|
||||||
|
@Published var totalAudioSeconds: Int = 0
|
||||||
|
@Published var sessionCost: Double = 0
|
||||||
|
@Published var totalCost: Double = 0
|
||||||
|
|
||||||
private let audioCapture = AudioCapture()
|
private let audioCapture = AudioCapture()
|
||||||
private let wsClient = VoxtralWebSocketClient()
|
private let wsClient = VoxtralWebSocketClient()
|
||||||
|
|
@ -33,6 +39,8 @@ final class TranscriptionManager: ObservableObject {
|
||||||
}
|
}
|
||||||
|
|
||||||
currentText = ""
|
currentText = ""
|
||||||
|
sessionAudioSeconds = 0
|
||||||
|
sessionCost = 0
|
||||||
hasRetried = false
|
hasRetried = false
|
||||||
|
|
||||||
wsClient.onEvent = { [weak self] event in
|
wsClient.onEvent = { [weak self] event in
|
||||||
|
|
@ -79,10 +87,16 @@ final class TranscriptionManager: ObservableObject {
|
||||||
break
|
break
|
||||||
case .language:
|
case .language:
|
||||||
break
|
break
|
||||||
case .done(let text):
|
case .done(let text, let audioSeconds):
|
||||||
if currentText.isEmpty {
|
if currentText.isEmpty {
|
||||||
currentText = text
|
currentText = text
|
||||||
}
|
}
|
||||||
|
if let secs = audioSeconds {
|
||||||
|
sessionAudioSeconds = secs
|
||||||
|
sessionCost = Double(secs) * Self.costPerSecond
|
||||||
|
totalAudioSeconds += secs
|
||||||
|
totalCost += sessionCost
|
||||||
|
}
|
||||||
case .error(let message):
|
case .error(let message):
|
||||||
if !hasRetried && state == .recording {
|
if !hasRetried && state == .recording {
|
||||||
hasRetried = true
|
hasRetried = true
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,7 @@ enum VoxtralEvent {
|
||||||
case textDelta(String)
|
case textDelta(String)
|
||||||
case segment(text: String, start: Double, end: Double)
|
case segment(text: String, start: Double, end: Double)
|
||||||
case language(String)
|
case language(String)
|
||||||
case done(text: String)
|
case done(text: String, audioSeconds: Int?)
|
||||||
case error(String)
|
case error(String)
|
||||||
case unknown(String)
|
case unknown(String)
|
||||||
}
|
}
|
||||||
|
|
@ -76,6 +76,21 @@ struct SegmentEvent: Decodable {
|
||||||
|
|
||||||
struct DoneEvent: Decodable {
|
struct DoneEvent: Decodable {
|
||||||
let text: String
|
let text: String
|
||||||
|
let usage: UsageInfo?
|
||||||
|
}
|
||||||
|
|
||||||
|
struct UsageInfo: Decodable {
|
||||||
|
let promptTokens: Int?
|
||||||
|
let completionTokens: Int?
|
||||||
|
let totalTokens: Int?
|
||||||
|
let promptAudioSeconds: Int?
|
||||||
|
|
||||||
|
enum CodingKeys: String, CodingKey {
|
||||||
|
case promptTokens = "prompt_tokens"
|
||||||
|
case completionTokens = "completion_tokens"
|
||||||
|
case totalTokens = "total_tokens"
|
||||||
|
case promptAudioSeconds = "prompt_audio_seconds"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ErrorEvent: Decodable {
|
struct ErrorEvent: Decodable {
|
||||||
|
|
@ -111,7 +126,7 @@ func parseVoxtralEvent(from data: Data) -> VoxtralEvent {
|
||||||
return .language(e.audioLanguage)
|
return .language(e.audioLanguage)
|
||||||
case "transcription.done":
|
case "transcription.done":
|
||||||
guard let e = try? JSONDecoder().decode(DoneEvent.self, from: data) else { return .unknown("") }
|
guard let e = try? JSONDecoder().decode(DoneEvent.self, from: data) else { return .unknown("") }
|
||||||
return .done(text: e.text)
|
return .done(text: e.text, audioSeconds: e.usage?.promptAudioSeconds)
|
||||||
case "error":
|
case "error":
|
||||||
if let e = try? JSONDecoder().decode(ErrorEvent.self, from: data) {
|
if let e = try? JSONDecoder().decode(ErrorEvent.self, from: data) {
|
||||||
return .error(e.error?.message?.detail ?? "Unknown error")
|
return .error(e.error?.message?.detail ?? "Unknown error")
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,19 @@ struct MenuBarView: View {
|
||||||
.padding(.horizontal, 8)
|
.padding(.horizontal, 8)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if manager.sessionCost > 0 || manager.totalCost > 0 {
|
||||||
|
Divider()
|
||||||
|
VStack(alignment: .leading, spacing: 2) {
|
||||||
|
if manager.sessionCost > 0 {
|
||||||
|
Text("Session: \(manager.sessionAudioSeconds)s — $\(manager.sessionCost, specifier: "%.4f")")
|
||||||
|
}
|
||||||
|
Text("Total: \(formatDuration(manager.totalAudioSeconds)) — $\(manager.totalCost, specifier: "%.4f")")
|
||||||
|
}
|
||||||
|
.font(.caption)
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
|
.padding(.horizontal, 8)
|
||||||
|
}
|
||||||
|
|
||||||
Divider()
|
Divider()
|
||||||
|
|
||||||
Button("Show Transcription") {
|
Button("Show Transcription") {
|
||||||
|
|
@ -39,4 +52,11 @@ struct MenuBarView: View {
|
||||||
}
|
}
|
||||||
.padding(.vertical, 4)
|
.padding(.vertical, 4)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private func formatDuration(_ seconds: Int) -> String {
|
||||||
|
if seconds < 60 { return "\(seconds)s" }
|
||||||
|
let min = seconds / 60
|
||||||
|
let sec = seconds % 60
|
||||||
|
return "\(min)m \(sec)s"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue