fix: WebSocket connection, global shortcut, and accessibility

- Add model query param to WebSocket URL (was causing handshake failure)
- Suppress "connection lost" error on intentional disconnect
- Fix shortcut recording with NSEvent local monitor
- Add proper keycode-to-string mapping for shortcut display
- Move app lifecycle to NSApplicationDelegate for reliable window management
- Prompt for Accessibility permission on first launch
- Add build-app.sh for proper .app bundle creation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Carsten Abele 2026-04-07 20:20:12 +02:00
parent 602f97253c
commit b57bec4273
7 changed files with 184 additions and 50 deletions

View file

@ -37,9 +37,24 @@ final class AppSettings: ObservableObject {
if mods.contains(.option) { parts.append("") } if mods.contains(.option) { parts.append("") }
if mods.contains(.shift) { parts.append("") } if mods.contains(.shift) { parts.append("") }
if mods.contains(.command) { parts.append("") } if mods.contains(.command) { parts.append("") }
if let scalar = Unicode.Scalar(shortcutKeyCode) { parts.append(Self.keyCodeToString(shortcutKeyCode))
parts.append(String(Character(scalar)).uppercased())
}
return parts.joined() return parts.joined()
} }
private static func keyCodeToString(_ keyCode: UInt16) -> String {
let mapping: [UInt16: String] = [
0: "A", 1: "S", 2: "D", 3: "F", 4: "H", 5: "G", 6: "Z", 7: "X",
8: "C", 9: "V", 11: "B", 12: "Q", 13: "W", 14: "E", 15: "R",
16: "Y", 17: "T", 18: "1", 19: "2", 20: "3", 21: "4", 22: "6",
23: "5", 24: "=", 25: "9", 26: "7", 27: "-", 28: "8", 29: "0",
30: "]", 31: "O", 32: "U", 33: "[", 34: "I", 35: "P", 37: "L",
38: "J", 39: "'", 40: "K", 41: ";", 42: "\\", 43: ",", 44: "/",
45: "N", 46: "M", 47: ".", 49: "Space", 50: "`",
36: "Return", 48: "Tab", 51: "Delete", 53: "Esc",
96: "F5", 97: "F6", 98: "F7", 99: "F3", 100: "F8",
101: "F9", 103: "F11", 105: "F13", 109: "F10", 111: "F12",
118: "F4", 120: "F2", 122: "F1",
]
return mapping[keyCode] ?? "Key\(keyCode)"
}
} }

View file

@ -2,62 +2,89 @@ import SwiftUI
@main @main
struct MyVoxtralApp: App { struct MyVoxtralApp: App {
@StateObject private var manager = TranscriptionManager() @NSApplicationDelegateAdaptor(AppDelegate.self) var appDelegate
@StateObject private var settings = AppSettings.shared
@State private var transcriptionPanel: TranscriptionPanel?
@State private var settingsWindow: NSWindow?
private let globalShortcut = GlobalShortcut()
var body: some Scene { var body: some Scene {
MenuBarExtra { MenuBarExtra {
MenuBarView( MenuBarView(
manager: manager, manager: appDelegate.manager,
onShowTranscription: { showTranscriptionWindow() }, onShowTranscription: { appDelegate.showTranscriptionWindow() },
onShowSettings: { showSettingsWindow() } onShowSettings: { appDelegate.showSettingsWindow() }
) )
.task { .onChange(of: appDelegate.manager.isRecording) {
if !settings.hasAPIKey { if appDelegate.manager.isRecording && AppSettings.shared.outputMode == .textBox {
showSettingsWindow() appDelegate.showTranscriptionWindow()
}
registerShortcut()
}
.onChange(of: manager.isRecording) {
if manager.isRecording && settings.outputMode == .textBox {
showTranscriptionWindow()
} }
} }
} label: { } label: {
Image(systemName: manager.isRecording ? "mic.fill" : "mic") Image(systemName: appDelegate.manager.isRecording ? "mic.fill" : "mic")
.symbolRenderingMode(.palette) .symbolRenderingMode(.palette)
.foregroundStyle(manager.isRecording ? .red : .primary) .foregroundStyle(appDelegate.manager.isRecording ? .red : .primary)
}
}
}
@MainActor
final class AppDelegate: NSObject, NSApplicationDelegate, ObservableObject {
let manager = TranscriptionManager()
private let globalShortcut = GlobalShortcut()
private var settingsWindow: NSWindow?
private var transcriptionPanel: TranscriptionPanel?
private var defaultsObserver: NSObjectProtocol?
private var lastKeyCode: UInt16 = 0
private var lastModifiers: UInt = 0
func applicationDidFinishLaunching(_ notification: Notification) {
// Prompt for Accessibility permission (needed for global shortcut + cursor injection)
if !CursorInjector.isAccessibilityGranted {
CursorInjector.promptAccessibilityPermission()
}
if !AppSettings.shared.hasAPIKey {
showSettingsWindow()
}
registerShortcut()
// Re-register shortcut when settings change (only if shortcut actually changed)
defaultsObserver = NotificationCenter.default.addObserver(
forName: UserDefaults.didChangeNotification, object: nil, queue: .main
) { [weak self] _ in
Task { @MainActor in
let settings = AppSettings.shared
guard let self,
settings.shortcutKeyCode != self.lastKeyCode ||
settings.shortcutModifiers != self.lastModifiers else { return }
self.registerShortcut()
}
} }
.onChange(of: settings.shortcutKeyCode) { registerShortcut() }
.onChange(of: settings.shortcutModifiers) { registerShortcut() }
} }
private func registerShortcut() { func registerShortcut() {
let settings = AppSettings.shared
lastKeyCode = settings.shortcutKeyCode
lastModifiers = settings.shortcutModifiers
globalShortcut.register( globalShortcut.register(
keyCode: settings.shortcutKeyCode, keyCode: settings.shortcutKeyCode,
modifiers: settings.shortcutModifiers modifiers: settings.shortcutModifiers
) )
globalShortcut.onTrigger = { [weak manager] in globalShortcut.onTrigger = { [weak self] in
Task { @MainActor in Task { @MainActor in
manager?.toggle() self?.manager.toggle()
} }
} }
} }
private func showTranscriptionWindow() { func showTranscriptionWindow() {
if transcriptionPanel == nil { if transcriptionPanel == nil {
transcriptionPanel = TranscriptionPanel(manager: manager) transcriptionPanel = TranscriptionPanel(manager: manager)
} }
transcriptionPanel?.show() transcriptionPanel?.show()
} }
private func showSettingsWindow() { func showSettingsWindow() {
if let settingsWindow, settingsWindow.isVisible { if let settingsWindow, settingsWindow.isVisible {
settingsWindow.makeKeyAndOrderFront(nil) settingsWindow.makeKeyAndOrderFront(nil)
NSApp.activate(ignoringOtherApps: true)
return return
} }
let window = NSWindow( let window = NSWindow(

View file

@ -5,30 +5,36 @@ final class VoxtralWebSocketClient {
private var webSocketTask: URLSessionWebSocketTask? private var webSocketTask: URLSessionWebSocketTask?
private var session: URLSession? private var session: URLSession?
private let encoder = JSONEncoder() private let encoder = JSONEncoder()
private var intentionalDisconnect = false
var onEvent: ((VoxtralEvent) -> Void)? var onEvent: ((VoxtralEvent) -> Void)?
func connect(apiKey: String, delayMs: Int) { func connect(apiKey: String, model: String = "voxtral-mini-transcribe-realtime-2602", delayMs: Int) {
guard let url = URL(string: "wss://api.mistral.ai/v1/audio/transcriptions/realtime") else { return } var components = URLComponents(string: "wss://api.mistral.ai/v1/audio/transcriptions/realtime")!
components.queryItems = [URLQueryItem(name: "model", value: model)]
guard let url = components.url else { return }
var request = URLRequest(url: url) var request = URLRequest(url: url)
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
session = URLSession(configuration: .default) let config = URLSessionConfiguration.default
config.httpAdditionalHeaders = ["Authorization": "Bearer \(apiKey)"]
session = URLSession(configuration: config)
webSocketTask = session?.webSocketTask(with: request) webSocketTask = session?.webSocketTask(with: request)
webSocketTask?.resume() webSocketTask?.resume()
// Send session config intentionalDisconnect = false
let config = SessionUpdateMessage( // Start receiving first, then send session config
receiveLoop()
// Send session config after connection
let sessionConfig = SessionUpdateMessage(
session: SessionConfig( session: SessionConfig(
audioFormat: AudioFormatConfig(), audioFormat: AudioFormatConfig(),
targetStreamingDelayMs: delayMs targetStreamingDelayMs: delayMs
) )
) )
sendJSON(config) sendJSON(sessionConfig)
// Start receiving
receiveLoop()
} }
func sendAudio(_ pcmData: Data) { func sendAudio(_ pcmData: Data) {
@ -42,6 +48,7 @@ final class VoxtralWebSocketClient {
} }
func disconnect() { func disconnect() {
intentionalDisconnect = true
sendJSON(AudioEndMessage()) sendJSON(AudioEndMessage())
webSocketTask?.cancel(with: .normalClosure, reason: nil) webSocketTask?.cancel(with: .normalClosure, reason: nil)
webSocketTask = nil webSocketTask = nil
@ -79,10 +86,11 @@ final class VoxtralWebSocketClient {
@unknown default: @unknown default:
break break
} }
self?.receiveLoop() Task { @MainActor in self?.receiveLoop() }
case .failure(let error): case .failure(let error):
Task { @MainActor in Task { @MainActor in
self?.onEvent?(.error("Connection lost: \(error.localizedDescription)")) guard let self, !self.intentionalDisconnect else { return }
self.onEvent?(.error("Connection lost: \(error.localizedDescription)"))
} }
} }
} }

View file

@ -1,7 +1,8 @@
import Cocoa import Cocoa
final class GlobalShortcut { final class GlobalShortcut {
private var monitor: Any? private var globalMonitor: Any?
private var localMonitor: Any?
var onTrigger: (() -> Void)? var onTrigger: (() -> Void)?
func register(keyCode: UInt16, modifiers: UInt) { func register(keyCode: UInt16, modifiers: UInt) {
@ -9,20 +10,31 @@ final class GlobalShortcut {
guard keyCode != 0 || modifiers != 0 else { return } guard keyCode != 0 || modifiers != 0 else { return }
let requiredFlags = NSEvent.ModifierFlags(rawValue: modifiers) let requiredFlags = NSEvent.ModifierFlags(rawValue: modifiers)
let mask: NSEvent.ModifierFlags = [.command, .option, .control, .shift]
monitor = NSEvent.addGlobalMonitorForEvents(matching: .keyDown) { [weak self] event in globalMonitor = NSEvent.addGlobalMonitorForEvents(matching: .keyDown) { [weak self] event in
let mask: NSEvent.ModifierFlags = [.command, .option, .control, .shift]
if event.keyCode == keyCode && event.modifierFlags.intersection(mask) == requiredFlags { if event.keyCode == keyCode && event.modifierFlags.intersection(mask) == requiredFlags {
self?.onTrigger?() self?.onTrigger?()
} }
} }
localMonitor = NSEvent.addLocalMonitorForEvents(matching: .keyDown) { [weak self] event in
if event.keyCode == keyCode && event.modifierFlags.intersection(mask) == requiredFlags {
self?.onTrigger?()
return nil
}
return event
}
} }
func unregister() { func unregister() {
if let monitor { if let globalMonitor {
NSEvent.removeMonitor(monitor) NSEvent.removeMonitor(globalMonitor)
} }
monitor = nil globalMonitor = nil
if let localMonitor {
NSEvent.removeMonitor(localMonitor)
}
localMonitor = nil
} }
deinit { deinit {

View file

@ -11,7 +11,6 @@ struct MenuBarView: View {
Button(manager.isRecording ? "Stop Recording" : "Start Recording") { Button(manager.isRecording ? "Stop Recording" : "Start Recording") {
manager.toggle() manager.toggle()
} }
.keyboardShortcut("r")
if case .error(let msg) = manager.state { if case .error(let msg) = manager.state {
Text(msg) Text(msg)

View file

@ -3,6 +3,7 @@ import SwiftUI
struct SettingsView: View { struct SettingsView: View {
@ObservedObject var settings = AppSettings.shared @ObservedObject var settings = AppSettings.shared
@State private var isRecordingShortcut = false @State private var isRecordingShortcut = false
@State private var keyMonitor: Any?
var body: some View { var body: some View {
Form { Form {
@ -36,8 +37,18 @@ struct SettingsView: View {
HStack { HStack {
Text("Toggle Recording:") Text("Toggle Recording:")
Spacer() Spacer()
Button(isRecordingShortcut ? "Press keys..." : settings.shortcutDisplayString) { Button(isRecordingShortcut ? "Press a key combo..." : settings.shortcutDisplayString) {
isRecordingShortcut = true startRecordingShortcut()
}
.buttonStyle(.bordered)
if settings.hasShortcut {
Button("Clear") {
settings.shortcutKeyCode = 0
settings.shortcutModifiers = 0
}
.buttonStyle(.borderless)
.foregroundStyle(.secondary)
} }
} }
} }
@ -64,5 +75,38 @@ struct SettingsView: View {
} }
.formStyle(.grouped) .formStyle(.grouped)
.frame(width: 360, height: 340) .frame(width: 360, height: 340)
.onDisappear {
stopRecordingShortcut()
}
}
private func startRecordingShortcut() {
isRecordingShortcut = true
keyMonitor = NSEvent.addLocalMonitorForEvents(matching: .keyDown) { event in
let mask: NSEvent.ModifierFlags = [.command, .option, .control, .shift]
let mods = event.modifierFlags.intersection(mask)
// Require at least one modifier key
guard !mods.isEmpty else {
// Escape cancels
if event.keyCode == 53 {
stopRecordingShortcut()
}
return nil
}
settings.shortcutKeyCode = event.keyCode
settings.shortcutModifiers = mods.rawValue
stopRecordingShortcut()
return nil // swallow the event
}
}
private func stopRecordingShortcut() {
isRecordingShortcut = false
if let keyMonitor {
NSEvent.removeMonitor(keyMonitor)
}
keyMonitor = nil
} }
} }

29
MyVoxtral/build-app.sh Executable file
View file

@ -0,0 +1,29 @@
#!/bin/bash
set -e
APP_NAME="MyVoxtral"
BUILD_DIR=".build/arm64-apple-macosx/debug"
APP_BUNDLE="$BUILD_DIR/$APP_NAME.app"
# Build
swift build
# Create .app bundle structure
rm -rf "$APP_BUNDLE"
mkdir -p "$APP_BUNDLE/Contents/MacOS"
mkdir -p "$APP_BUNDLE/Contents/Resources"
# Copy binary
cp "$BUILD_DIR/$APP_NAME" "$APP_BUNDLE/Contents/MacOS/$APP_NAME"
# Copy Info.plist
cp "$APP_NAME/Info.plist" "$APP_BUNDLE/Contents/Info.plist"
# Add CFBundleExecutable to Info.plist
/usr/libexec/PlistBuddy -c "Add :CFBundleExecutable string $APP_NAME" "$APP_BUNDLE/Contents/Info.plist" 2>/dev/null || true
/usr/libexec/PlistBuddy -c "Add :CFBundleIdentifier string com.myvoxtral.app" "$APP_BUNDLE/Contents/Info.plist" 2>/dev/null || true
/usr/libexec/PlistBuddy -c "Add :CFBundleName string $APP_NAME" "$APP_BUNDLE/Contents/Info.plist" 2>/dev/null || true
/usr/libexec/PlistBuddy -c "Add :CFBundlePackageType string APPL" "$APP_BUNDLE/Contents/Info.plist" 2>/dev/null || true
echo "Built: $APP_BUNDLE"
echo "Run with: open $APP_BUNDLE"