fix: WebSocket connection, global shortcut, and accessibility
- Add model query param to WebSocket URL (was causing handshake failure) - Suppress "connection lost" error on intentional disconnect - Fix shortcut recording with NSEvent local monitor - Add proper keycode-to-string mapping for shortcut display - Move app lifecycle to NSApplicationDelegate for reliable window management - Prompt for Accessibility permission on first launch - Add build-app.sh for proper .app bundle creation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
602f97253c
commit
b57bec4273
7 changed files with 184 additions and 50 deletions
|
|
@ -37,9 +37,24 @@ final class AppSettings: ObservableObject {
|
||||||
if mods.contains(.option) { parts.append("⌥") }
|
if mods.contains(.option) { parts.append("⌥") }
|
||||||
if mods.contains(.shift) { parts.append("⇧") }
|
if mods.contains(.shift) { parts.append("⇧") }
|
||||||
if mods.contains(.command) { parts.append("⌘") }
|
if mods.contains(.command) { parts.append("⌘") }
|
||||||
if let scalar = Unicode.Scalar(shortcutKeyCode) {
|
parts.append(Self.keyCodeToString(shortcutKeyCode))
|
||||||
parts.append(String(Character(scalar)).uppercased())
|
|
||||||
}
|
|
||||||
return parts.joined()
|
return parts.joined()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static func keyCodeToString(_ keyCode: UInt16) -> String {
|
||||||
|
let mapping: [UInt16: String] = [
|
||||||
|
0: "A", 1: "S", 2: "D", 3: "F", 4: "H", 5: "G", 6: "Z", 7: "X",
|
||||||
|
8: "C", 9: "V", 11: "B", 12: "Q", 13: "W", 14: "E", 15: "R",
|
||||||
|
16: "Y", 17: "T", 18: "1", 19: "2", 20: "3", 21: "4", 22: "6",
|
||||||
|
23: "5", 24: "=", 25: "9", 26: "7", 27: "-", 28: "8", 29: "0",
|
||||||
|
30: "]", 31: "O", 32: "U", 33: "[", 34: "I", 35: "P", 37: "L",
|
||||||
|
38: "J", 39: "'", 40: "K", 41: ";", 42: "\\", 43: ",", 44: "/",
|
||||||
|
45: "N", 46: "M", 47: ".", 49: "Space", 50: "`",
|
||||||
|
36: "Return", 48: "Tab", 51: "Delete", 53: "Esc",
|
||||||
|
96: "F5", 97: "F6", 98: "F7", 99: "F3", 100: "F8",
|
||||||
|
101: "F9", 103: "F11", 105: "F13", 109: "F10", 111: "F12",
|
||||||
|
118: "F4", 120: "F2", 122: "F1",
|
||||||
|
]
|
||||||
|
return mapping[keyCode] ?? "Key\(keyCode)"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,62 +2,89 @@ import SwiftUI
|
||||||
|
|
||||||
@main
|
@main
|
||||||
struct MyVoxtralApp: App {
|
struct MyVoxtralApp: App {
|
||||||
@StateObject private var manager = TranscriptionManager()
|
@NSApplicationDelegateAdaptor(AppDelegate.self) var appDelegate
|
||||||
@StateObject private var settings = AppSettings.shared
|
|
||||||
@State private var transcriptionPanel: TranscriptionPanel?
|
|
||||||
@State private var settingsWindow: NSWindow?
|
|
||||||
|
|
||||||
private let globalShortcut = GlobalShortcut()
|
|
||||||
|
|
||||||
var body: some Scene {
|
var body: some Scene {
|
||||||
MenuBarExtra {
|
MenuBarExtra {
|
||||||
MenuBarView(
|
MenuBarView(
|
||||||
manager: manager,
|
manager: appDelegate.manager,
|
||||||
onShowTranscription: { showTranscriptionWindow() },
|
onShowTranscription: { appDelegate.showTranscriptionWindow() },
|
||||||
onShowSettings: { showSettingsWindow() }
|
onShowSettings: { appDelegate.showSettingsWindow() }
|
||||||
)
|
)
|
||||||
.task {
|
.onChange(of: appDelegate.manager.isRecording) {
|
||||||
if !settings.hasAPIKey {
|
if appDelegate.manager.isRecording && AppSettings.shared.outputMode == .textBox {
|
||||||
showSettingsWindow()
|
appDelegate.showTranscriptionWindow()
|
||||||
}
|
|
||||||
registerShortcut()
|
|
||||||
}
|
|
||||||
.onChange(of: manager.isRecording) {
|
|
||||||
if manager.isRecording && settings.outputMode == .textBox {
|
|
||||||
showTranscriptionWindow()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} label: {
|
} label: {
|
||||||
Image(systemName: manager.isRecording ? "mic.fill" : "mic")
|
Image(systemName: appDelegate.manager.isRecording ? "mic.fill" : "mic")
|
||||||
.symbolRenderingMode(.palette)
|
.symbolRenderingMode(.palette)
|
||||||
.foregroundStyle(manager.isRecording ? .red : .primary)
|
.foregroundStyle(appDelegate.manager.isRecording ? .red : .primary)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
.onChange(of: settings.shortcutKeyCode) { registerShortcut() }
|
|
||||||
.onChange(of: settings.shortcutModifiers) { registerShortcut() }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private func registerShortcut() {
|
@MainActor
|
||||||
|
final class AppDelegate: NSObject, NSApplicationDelegate, ObservableObject {
|
||||||
|
let manager = TranscriptionManager()
|
||||||
|
private let globalShortcut = GlobalShortcut()
|
||||||
|
private var settingsWindow: NSWindow?
|
||||||
|
private var transcriptionPanel: TranscriptionPanel?
|
||||||
|
private var defaultsObserver: NSObjectProtocol?
|
||||||
|
private var lastKeyCode: UInt16 = 0
|
||||||
|
private var lastModifiers: UInt = 0
|
||||||
|
|
||||||
|
func applicationDidFinishLaunching(_ notification: Notification) {
|
||||||
|
// Prompt for Accessibility permission (needed for global shortcut + cursor injection)
|
||||||
|
if !CursorInjector.isAccessibilityGranted {
|
||||||
|
CursorInjector.promptAccessibilityPermission()
|
||||||
|
}
|
||||||
|
|
||||||
|
if !AppSettings.shared.hasAPIKey {
|
||||||
|
showSettingsWindow()
|
||||||
|
}
|
||||||
|
registerShortcut()
|
||||||
|
|
||||||
|
// Re-register shortcut when settings change (only if shortcut actually changed)
|
||||||
|
defaultsObserver = NotificationCenter.default.addObserver(
|
||||||
|
forName: UserDefaults.didChangeNotification, object: nil, queue: .main
|
||||||
|
) { [weak self] _ in
|
||||||
|
Task { @MainActor in
|
||||||
|
let settings = AppSettings.shared
|
||||||
|
guard let self,
|
||||||
|
settings.shortcutKeyCode != self.lastKeyCode ||
|
||||||
|
settings.shortcutModifiers != self.lastModifiers else { return }
|
||||||
|
self.registerShortcut()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func registerShortcut() {
|
||||||
|
let settings = AppSettings.shared
|
||||||
|
lastKeyCode = settings.shortcutKeyCode
|
||||||
|
lastModifiers = settings.shortcutModifiers
|
||||||
globalShortcut.register(
|
globalShortcut.register(
|
||||||
keyCode: settings.shortcutKeyCode,
|
keyCode: settings.shortcutKeyCode,
|
||||||
modifiers: settings.shortcutModifiers
|
modifiers: settings.shortcutModifiers
|
||||||
)
|
)
|
||||||
globalShortcut.onTrigger = { [weak manager] in
|
globalShortcut.onTrigger = { [weak self] in
|
||||||
Task { @MainActor in
|
Task { @MainActor in
|
||||||
manager?.toggle()
|
self?.manager.toggle()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private func showTranscriptionWindow() {
|
func showTranscriptionWindow() {
|
||||||
if transcriptionPanel == nil {
|
if transcriptionPanel == nil {
|
||||||
transcriptionPanel = TranscriptionPanel(manager: manager)
|
transcriptionPanel = TranscriptionPanel(manager: manager)
|
||||||
}
|
}
|
||||||
transcriptionPanel?.show()
|
transcriptionPanel?.show()
|
||||||
}
|
}
|
||||||
|
|
||||||
private func showSettingsWindow() {
|
func showSettingsWindow() {
|
||||||
if let settingsWindow, settingsWindow.isVisible {
|
if let settingsWindow, settingsWindow.isVisible {
|
||||||
settingsWindow.makeKeyAndOrderFront(nil)
|
settingsWindow.makeKeyAndOrderFront(nil)
|
||||||
|
NSApp.activate(ignoringOtherApps: true)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
let window = NSWindow(
|
let window = NSWindow(
|
||||||
|
|
|
||||||
|
|
@ -5,30 +5,36 @@ final class VoxtralWebSocketClient {
|
||||||
private var webSocketTask: URLSessionWebSocketTask?
|
private var webSocketTask: URLSessionWebSocketTask?
|
||||||
private var session: URLSession?
|
private var session: URLSession?
|
||||||
private let encoder = JSONEncoder()
|
private let encoder = JSONEncoder()
|
||||||
|
private var intentionalDisconnect = false
|
||||||
|
|
||||||
var onEvent: ((VoxtralEvent) -> Void)?
|
var onEvent: ((VoxtralEvent) -> Void)?
|
||||||
|
|
||||||
func connect(apiKey: String, delayMs: Int) {
|
func connect(apiKey: String, model: String = "voxtral-mini-transcribe-realtime-2602", delayMs: Int) {
|
||||||
guard let url = URL(string: "wss://api.mistral.ai/v1/audio/transcriptions/realtime") else { return }
|
var components = URLComponents(string: "wss://api.mistral.ai/v1/audio/transcriptions/realtime")!
|
||||||
|
components.queryItems = [URLQueryItem(name: "model", value: model)]
|
||||||
|
guard let url = components.url else { return }
|
||||||
|
|
||||||
var request = URLRequest(url: url)
|
var request = URLRequest(url: url)
|
||||||
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
|
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
|
||||||
|
|
||||||
session = URLSession(configuration: .default)
|
let config = URLSessionConfiguration.default
|
||||||
|
config.httpAdditionalHeaders = ["Authorization": "Bearer \(apiKey)"]
|
||||||
|
session = URLSession(configuration: config)
|
||||||
webSocketTask = session?.webSocketTask(with: request)
|
webSocketTask = session?.webSocketTask(with: request)
|
||||||
webSocketTask?.resume()
|
webSocketTask?.resume()
|
||||||
|
|
||||||
// Send session config
|
intentionalDisconnect = false
|
||||||
let config = SessionUpdateMessage(
|
// Start receiving first, then send session config
|
||||||
|
receiveLoop()
|
||||||
|
|
||||||
|
// Send session config after connection
|
||||||
|
let sessionConfig = SessionUpdateMessage(
|
||||||
session: SessionConfig(
|
session: SessionConfig(
|
||||||
audioFormat: AudioFormatConfig(),
|
audioFormat: AudioFormatConfig(),
|
||||||
targetStreamingDelayMs: delayMs
|
targetStreamingDelayMs: delayMs
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
sendJSON(config)
|
sendJSON(sessionConfig)
|
||||||
|
|
||||||
// Start receiving
|
|
||||||
receiveLoop()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func sendAudio(_ pcmData: Data) {
|
func sendAudio(_ pcmData: Data) {
|
||||||
|
|
@ -42,6 +48,7 @@ final class VoxtralWebSocketClient {
|
||||||
}
|
}
|
||||||
|
|
||||||
func disconnect() {
|
func disconnect() {
|
||||||
|
intentionalDisconnect = true
|
||||||
sendJSON(AudioEndMessage())
|
sendJSON(AudioEndMessage())
|
||||||
webSocketTask?.cancel(with: .normalClosure, reason: nil)
|
webSocketTask?.cancel(with: .normalClosure, reason: nil)
|
||||||
webSocketTask = nil
|
webSocketTask = nil
|
||||||
|
|
@ -79,10 +86,11 @@ final class VoxtralWebSocketClient {
|
||||||
@unknown default:
|
@unknown default:
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
self?.receiveLoop()
|
Task { @MainActor in self?.receiveLoop() }
|
||||||
case .failure(let error):
|
case .failure(let error):
|
||||||
Task { @MainActor in
|
Task { @MainActor in
|
||||||
self?.onEvent?(.error("Connection lost: \(error.localizedDescription)"))
|
guard let self, !self.intentionalDisconnect else { return }
|
||||||
|
self.onEvent?(.error("Connection lost: \(error.localizedDescription)"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
import Cocoa
|
import Cocoa
|
||||||
|
|
||||||
final class GlobalShortcut {
|
final class GlobalShortcut {
|
||||||
private var monitor: Any?
|
private var globalMonitor: Any?
|
||||||
|
private var localMonitor: Any?
|
||||||
var onTrigger: (() -> Void)?
|
var onTrigger: (() -> Void)?
|
||||||
|
|
||||||
func register(keyCode: UInt16, modifiers: UInt) {
|
func register(keyCode: UInt16, modifiers: UInt) {
|
||||||
|
|
@ -9,20 +10,31 @@ final class GlobalShortcut {
|
||||||
guard keyCode != 0 || modifiers != 0 else { return }
|
guard keyCode != 0 || modifiers != 0 else { return }
|
||||||
|
|
||||||
let requiredFlags = NSEvent.ModifierFlags(rawValue: modifiers)
|
let requiredFlags = NSEvent.ModifierFlags(rawValue: modifiers)
|
||||||
|
|
||||||
monitor = NSEvent.addGlobalMonitorForEvents(matching: .keyDown) { [weak self] event in
|
|
||||||
let mask: NSEvent.ModifierFlags = [.command, .option, .control, .shift]
|
let mask: NSEvent.ModifierFlags = [.command, .option, .control, .shift]
|
||||||
|
|
||||||
|
globalMonitor = NSEvent.addGlobalMonitorForEvents(matching: .keyDown) { [weak self] event in
|
||||||
if event.keyCode == keyCode && event.modifierFlags.intersection(mask) == requiredFlags {
|
if event.keyCode == keyCode && event.modifierFlags.intersection(mask) == requiredFlags {
|
||||||
self?.onTrigger?()
|
self?.onTrigger?()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
localMonitor = NSEvent.addLocalMonitorForEvents(matching: .keyDown) { [weak self] event in
|
||||||
|
if event.keyCode == keyCode && event.modifierFlags.intersection(mask) == requiredFlags {
|
||||||
|
self?.onTrigger?()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return event
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func unregister() {
|
func unregister() {
|
||||||
if let monitor {
|
if let globalMonitor {
|
||||||
NSEvent.removeMonitor(monitor)
|
NSEvent.removeMonitor(globalMonitor)
|
||||||
}
|
}
|
||||||
monitor = nil
|
globalMonitor = nil
|
||||||
|
if let localMonitor {
|
||||||
|
NSEvent.removeMonitor(localMonitor)
|
||||||
|
}
|
||||||
|
localMonitor = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
deinit {
|
deinit {
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,6 @@ struct MenuBarView: View {
|
||||||
Button(manager.isRecording ? "Stop Recording" : "Start Recording") {
|
Button(manager.isRecording ? "Stop Recording" : "Start Recording") {
|
||||||
manager.toggle()
|
manager.toggle()
|
||||||
}
|
}
|
||||||
.keyboardShortcut("r")
|
|
||||||
|
|
||||||
if case .error(let msg) = manager.state {
|
if case .error(let msg) = manager.state {
|
||||||
Text(msg)
|
Text(msg)
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ import SwiftUI
|
||||||
struct SettingsView: View {
|
struct SettingsView: View {
|
||||||
@ObservedObject var settings = AppSettings.shared
|
@ObservedObject var settings = AppSettings.shared
|
||||||
@State private var isRecordingShortcut = false
|
@State private var isRecordingShortcut = false
|
||||||
|
@State private var keyMonitor: Any?
|
||||||
|
|
||||||
var body: some View {
|
var body: some View {
|
||||||
Form {
|
Form {
|
||||||
|
|
@ -36,8 +37,18 @@ struct SettingsView: View {
|
||||||
HStack {
|
HStack {
|
||||||
Text("Toggle Recording:")
|
Text("Toggle Recording:")
|
||||||
Spacer()
|
Spacer()
|
||||||
Button(isRecordingShortcut ? "Press keys..." : settings.shortcutDisplayString) {
|
Button(isRecordingShortcut ? "Press a key combo..." : settings.shortcutDisplayString) {
|
||||||
isRecordingShortcut = true
|
startRecordingShortcut()
|
||||||
|
}
|
||||||
|
.buttonStyle(.bordered)
|
||||||
|
|
||||||
|
if settings.hasShortcut {
|
||||||
|
Button("Clear") {
|
||||||
|
settings.shortcutKeyCode = 0
|
||||||
|
settings.shortcutModifiers = 0
|
||||||
|
}
|
||||||
|
.buttonStyle(.borderless)
|
||||||
|
.foregroundStyle(.secondary)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -64,5 +75,38 @@ struct SettingsView: View {
|
||||||
}
|
}
|
||||||
.formStyle(.grouped)
|
.formStyle(.grouped)
|
||||||
.frame(width: 360, height: 340)
|
.frame(width: 360, height: 340)
|
||||||
|
.onDisappear {
|
||||||
|
stopRecordingShortcut()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func startRecordingShortcut() {
|
||||||
|
isRecordingShortcut = true
|
||||||
|
keyMonitor = NSEvent.addLocalMonitorForEvents(matching: .keyDown) { event in
|
||||||
|
let mask: NSEvent.ModifierFlags = [.command, .option, .control, .shift]
|
||||||
|
let mods = event.modifierFlags.intersection(mask)
|
||||||
|
|
||||||
|
// Require at least one modifier key
|
||||||
|
guard !mods.isEmpty else {
|
||||||
|
// Escape cancels
|
||||||
|
if event.keyCode == 53 {
|
||||||
|
stopRecordingShortcut()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
settings.shortcutKeyCode = event.keyCode
|
||||||
|
settings.shortcutModifiers = mods.rawValue
|
||||||
|
stopRecordingShortcut()
|
||||||
|
return nil // swallow the event
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func stopRecordingShortcut() {
|
||||||
|
isRecordingShortcut = false
|
||||||
|
if let keyMonitor {
|
||||||
|
NSEvent.removeMonitor(keyMonitor)
|
||||||
|
}
|
||||||
|
keyMonitor = nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
29
MyVoxtral/build-app.sh
Executable file
29
MyVoxtral/build-app.sh
Executable file
|
|
@ -0,0 +1,29 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
APP_NAME="MyVoxtral"
|
||||||
|
BUILD_DIR=".build/arm64-apple-macosx/debug"
|
||||||
|
APP_BUNDLE="$BUILD_DIR/$APP_NAME.app"
|
||||||
|
|
||||||
|
# Build
|
||||||
|
swift build
|
||||||
|
|
||||||
|
# Create .app bundle structure
|
||||||
|
rm -rf "$APP_BUNDLE"
|
||||||
|
mkdir -p "$APP_BUNDLE/Contents/MacOS"
|
||||||
|
mkdir -p "$APP_BUNDLE/Contents/Resources"
|
||||||
|
|
||||||
|
# Copy binary
|
||||||
|
cp "$BUILD_DIR/$APP_NAME" "$APP_BUNDLE/Contents/MacOS/$APP_NAME"
|
||||||
|
|
||||||
|
# Copy Info.plist
|
||||||
|
cp "$APP_NAME/Info.plist" "$APP_BUNDLE/Contents/Info.plist"
|
||||||
|
|
||||||
|
# Add CFBundleExecutable to Info.plist
|
||||||
|
/usr/libexec/PlistBuddy -c "Add :CFBundleExecutable string $APP_NAME" "$APP_BUNDLE/Contents/Info.plist" 2>/dev/null || true
|
||||||
|
/usr/libexec/PlistBuddy -c "Add :CFBundleIdentifier string com.myvoxtral.app" "$APP_BUNDLE/Contents/Info.plist" 2>/dev/null || true
|
||||||
|
/usr/libexec/PlistBuddy -c "Add :CFBundleName string $APP_NAME" "$APP_BUNDLE/Contents/Info.plist" 2>/dev/null || true
|
||||||
|
/usr/libexec/PlistBuddy -c "Add :CFBundlePackageType string APPL" "$APP_BUNDLE/Contents/Info.plist" 2>/dev/null || true
|
||||||
|
|
||||||
|
echo "Built: $APP_BUNDLE"
|
||||||
|
echo "Run with: open $APP_BUNDLE"
|
||||||
Loading…
Add table
Add a link
Reference in a new issue