fix: WebSocket connection, global shortcut, and accessibility
- Add model query param to WebSocket URL (was causing handshake failure) - Suppress "connection lost" error on intentional disconnect - Fix shortcut recording with NSEvent local monitor - Add proper keycode-to-string mapping for shortcut display - Move app lifecycle to NSApplicationDelegate for reliable window management - Prompt for Accessibility permission on first launch - Add build-app.sh for proper .app bundle creation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
602f97253c
commit
b57bec4273
7 changed files with 184 additions and 50 deletions
|
|
@ -37,9 +37,24 @@ final class AppSettings: ObservableObject {
|
|||
if mods.contains(.option) { parts.append("⌥") }
|
||||
if mods.contains(.shift) { parts.append("⇧") }
|
||||
if mods.contains(.command) { parts.append("⌘") }
|
||||
if let scalar = Unicode.Scalar(shortcutKeyCode) {
|
||||
parts.append(String(Character(scalar)).uppercased())
|
||||
}
|
||||
parts.append(Self.keyCodeToString(shortcutKeyCode))
|
||||
return parts.joined()
|
||||
}
|
||||
|
||||
private static func keyCodeToString(_ keyCode: UInt16) -> String {
|
||||
let mapping: [UInt16: String] = [
|
||||
0: "A", 1: "S", 2: "D", 3: "F", 4: "H", 5: "G", 6: "Z", 7: "X",
|
||||
8: "C", 9: "V", 11: "B", 12: "Q", 13: "W", 14: "E", 15: "R",
|
||||
16: "Y", 17: "T", 18: "1", 19: "2", 20: "3", 21: "4", 22: "6",
|
||||
23: "5", 24: "=", 25: "9", 26: "7", 27: "-", 28: "8", 29: "0",
|
||||
30: "]", 31: "O", 32: "U", 33: "[", 34: "I", 35: "P", 37: "L",
|
||||
38: "J", 39: "'", 40: "K", 41: ";", 42: "\\", 43: ",", 44: "/",
|
||||
45: "N", 46: "M", 47: ".", 49: "Space", 50: "`",
|
||||
36: "Return", 48: "Tab", 51: "Delete", 53: "Esc",
|
||||
96: "F5", 97: "F6", 98: "F7", 99: "F3", 100: "F8",
|
||||
101: "F9", 103: "F11", 105: "F13", 109: "F10", 111: "F12",
|
||||
118: "F4", 120: "F2", 122: "F1",
|
||||
]
|
||||
return mapping[keyCode] ?? "Key\(keyCode)"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,62 +2,89 @@ import SwiftUI
|
|||
|
||||
@main
|
||||
struct MyVoxtralApp: App {
|
||||
@StateObject private var manager = TranscriptionManager()
|
||||
@StateObject private var settings = AppSettings.shared
|
||||
@State private var transcriptionPanel: TranscriptionPanel?
|
||||
@State private var settingsWindow: NSWindow?
|
||||
|
||||
private let globalShortcut = GlobalShortcut()
|
||||
@NSApplicationDelegateAdaptor(AppDelegate.self) var appDelegate
|
||||
|
||||
var body: some Scene {
|
||||
MenuBarExtra {
|
||||
MenuBarView(
|
||||
manager: manager,
|
||||
onShowTranscription: { showTranscriptionWindow() },
|
||||
onShowSettings: { showSettingsWindow() }
|
||||
manager: appDelegate.manager,
|
||||
onShowTranscription: { appDelegate.showTranscriptionWindow() },
|
||||
onShowSettings: { appDelegate.showSettingsWindow() }
|
||||
)
|
||||
.task {
|
||||
if !settings.hasAPIKey {
|
||||
showSettingsWindow()
|
||||
}
|
||||
registerShortcut()
|
||||
}
|
||||
.onChange(of: manager.isRecording) {
|
||||
if manager.isRecording && settings.outputMode == .textBox {
|
||||
showTranscriptionWindow()
|
||||
.onChange(of: appDelegate.manager.isRecording) {
|
||||
if appDelegate.manager.isRecording && AppSettings.shared.outputMode == .textBox {
|
||||
appDelegate.showTranscriptionWindow()
|
||||
}
|
||||
}
|
||||
} label: {
|
||||
Image(systemName: manager.isRecording ? "mic.fill" : "mic")
|
||||
Image(systemName: appDelegate.manager.isRecording ? "mic.fill" : "mic")
|
||||
.symbolRenderingMode(.palette)
|
||||
.foregroundStyle(manager.isRecording ? .red : .primary)
|
||||
.foregroundStyle(appDelegate.manager.isRecording ? .red : .primary)
|
||||
}
|
||||
}
|
||||
.onChange(of: settings.shortcutKeyCode) { registerShortcut() }
|
||||
.onChange(of: settings.shortcutModifiers) { registerShortcut() }
|
||||
}
|
||||
|
||||
private func registerShortcut() {
|
||||
@MainActor
|
||||
final class AppDelegate: NSObject, NSApplicationDelegate, ObservableObject {
|
||||
let manager = TranscriptionManager()
|
||||
private let globalShortcut = GlobalShortcut()
|
||||
private var settingsWindow: NSWindow?
|
||||
private var transcriptionPanel: TranscriptionPanel?
|
||||
private var defaultsObserver: NSObjectProtocol?
|
||||
private var lastKeyCode: UInt16 = 0
|
||||
private var lastModifiers: UInt = 0
|
||||
|
||||
func applicationDidFinishLaunching(_ notification: Notification) {
|
||||
// Prompt for Accessibility permission (needed for global shortcut + cursor injection)
|
||||
if !CursorInjector.isAccessibilityGranted {
|
||||
CursorInjector.promptAccessibilityPermission()
|
||||
}
|
||||
|
||||
if !AppSettings.shared.hasAPIKey {
|
||||
showSettingsWindow()
|
||||
}
|
||||
registerShortcut()
|
||||
|
||||
// Re-register shortcut when settings change (only if shortcut actually changed)
|
||||
defaultsObserver = NotificationCenter.default.addObserver(
|
||||
forName: UserDefaults.didChangeNotification, object: nil, queue: .main
|
||||
) { [weak self] _ in
|
||||
Task { @MainActor in
|
||||
let settings = AppSettings.shared
|
||||
guard let self,
|
||||
settings.shortcutKeyCode != self.lastKeyCode ||
|
||||
settings.shortcutModifiers != self.lastModifiers else { return }
|
||||
self.registerShortcut()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func registerShortcut() {
|
||||
let settings = AppSettings.shared
|
||||
lastKeyCode = settings.shortcutKeyCode
|
||||
lastModifiers = settings.shortcutModifiers
|
||||
globalShortcut.register(
|
||||
keyCode: settings.shortcutKeyCode,
|
||||
modifiers: settings.shortcutModifiers
|
||||
)
|
||||
globalShortcut.onTrigger = { [weak manager] in
|
||||
globalShortcut.onTrigger = { [weak self] in
|
||||
Task { @MainActor in
|
||||
manager?.toggle()
|
||||
self?.manager.toggle()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func showTranscriptionWindow() {
|
||||
func showTranscriptionWindow() {
|
||||
if transcriptionPanel == nil {
|
||||
transcriptionPanel = TranscriptionPanel(manager: manager)
|
||||
}
|
||||
transcriptionPanel?.show()
|
||||
}
|
||||
|
||||
private func showSettingsWindow() {
|
||||
func showSettingsWindow() {
|
||||
if let settingsWindow, settingsWindow.isVisible {
|
||||
settingsWindow.makeKeyAndOrderFront(nil)
|
||||
NSApp.activate(ignoringOtherApps: true)
|
||||
return
|
||||
}
|
||||
let window = NSWindow(
|
||||
|
|
|
|||
|
|
@ -5,30 +5,36 @@ final class VoxtralWebSocketClient {
|
|||
private var webSocketTask: URLSessionWebSocketTask?
|
||||
private var session: URLSession?
|
||||
private let encoder = JSONEncoder()
|
||||
private var intentionalDisconnect = false
|
||||
|
||||
var onEvent: ((VoxtralEvent) -> Void)?
|
||||
|
||||
func connect(apiKey: String, delayMs: Int) {
|
||||
guard let url = URL(string: "wss://api.mistral.ai/v1/audio/transcriptions/realtime") else { return }
|
||||
func connect(apiKey: String, model: String = "voxtral-mini-transcribe-realtime-2602", delayMs: Int) {
|
||||
var components = URLComponents(string: "wss://api.mistral.ai/v1/audio/transcriptions/realtime")!
|
||||
components.queryItems = [URLQueryItem(name: "model", value: model)]
|
||||
guard let url = components.url else { return }
|
||||
|
||||
var request = URLRequest(url: url)
|
||||
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
|
||||
|
||||
session = URLSession(configuration: .default)
|
||||
let config = URLSessionConfiguration.default
|
||||
config.httpAdditionalHeaders = ["Authorization": "Bearer \(apiKey)"]
|
||||
session = URLSession(configuration: config)
|
||||
webSocketTask = session?.webSocketTask(with: request)
|
||||
webSocketTask?.resume()
|
||||
|
||||
// Send session config
|
||||
let config = SessionUpdateMessage(
|
||||
intentionalDisconnect = false
|
||||
// Start receiving first, then send session config
|
||||
receiveLoop()
|
||||
|
||||
// Send session config after connection
|
||||
let sessionConfig = SessionUpdateMessage(
|
||||
session: SessionConfig(
|
||||
audioFormat: AudioFormatConfig(),
|
||||
targetStreamingDelayMs: delayMs
|
||||
)
|
||||
)
|
||||
sendJSON(config)
|
||||
|
||||
// Start receiving
|
||||
receiveLoop()
|
||||
sendJSON(sessionConfig)
|
||||
}
|
||||
|
||||
func sendAudio(_ pcmData: Data) {
|
||||
|
|
@ -42,6 +48,7 @@ final class VoxtralWebSocketClient {
|
|||
}
|
||||
|
||||
func disconnect() {
|
||||
intentionalDisconnect = true
|
||||
sendJSON(AudioEndMessage())
|
||||
webSocketTask?.cancel(with: .normalClosure, reason: nil)
|
||||
webSocketTask = nil
|
||||
|
|
@ -79,10 +86,11 @@ final class VoxtralWebSocketClient {
|
|||
@unknown default:
|
||||
break
|
||||
}
|
||||
self?.receiveLoop()
|
||||
Task { @MainActor in self?.receiveLoop() }
|
||||
case .failure(let error):
|
||||
Task { @MainActor in
|
||||
self?.onEvent?(.error("Connection lost: \(error.localizedDescription)"))
|
||||
guard let self, !self.intentionalDisconnect else { return }
|
||||
self.onEvent?(.error("Connection lost: \(error.localizedDescription)"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
import Cocoa
|
||||
|
||||
final class GlobalShortcut {
|
||||
private var monitor: Any?
|
||||
private var globalMonitor: Any?
|
||||
private var localMonitor: Any?
|
||||
var onTrigger: (() -> Void)?
|
||||
|
||||
func register(keyCode: UInt16, modifiers: UInt) {
|
||||
|
|
@ -9,20 +10,31 @@ final class GlobalShortcut {
|
|||
guard keyCode != 0 || modifiers != 0 else { return }
|
||||
|
||||
let requiredFlags = NSEvent.ModifierFlags(rawValue: modifiers)
|
||||
|
||||
monitor = NSEvent.addGlobalMonitorForEvents(matching: .keyDown) { [weak self] event in
|
||||
let mask: NSEvent.ModifierFlags = [.command, .option, .control, .shift]
|
||||
|
||||
globalMonitor = NSEvent.addGlobalMonitorForEvents(matching: .keyDown) { [weak self] event in
|
||||
if event.keyCode == keyCode && event.modifierFlags.intersection(mask) == requiredFlags {
|
||||
self?.onTrigger?()
|
||||
}
|
||||
}
|
||||
localMonitor = NSEvent.addLocalMonitorForEvents(matching: .keyDown) { [weak self] event in
|
||||
if event.keyCode == keyCode && event.modifierFlags.intersection(mask) == requiredFlags {
|
||||
self?.onTrigger?()
|
||||
return nil
|
||||
}
|
||||
return event
|
||||
}
|
||||
}
|
||||
|
||||
func unregister() {
|
||||
if let monitor {
|
||||
NSEvent.removeMonitor(monitor)
|
||||
if let globalMonitor {
|
||||
NSEvent.removeMonitor(globalMonitor)
|
||||
}
|
||||
monitor = nil
|
||||
globalMonitor = nil
|
||||
if let localMonitor {
|
||||
NSEvent.removeMonitor(localMonitor)
|
||||
}
|
||||
localMonitor = nil
|
||||
}
|
||||
|
||||
deinit {
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ struct MenuBarView: View {
|
|||
Button(manager.isRecording ? "Stop Recording" : "Start Recording") {
|
||||
manager.toggle()
|
||||
}
|
||||
.keyboardShortcut("r")
|
||||
|
||||
if case .error(let msg) = manager.state {
|
||||
Text(msg)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import SwiftUI
|
|||
struct SettingsView: View {
|
||||
@ObservedObject var settings = AppSettings.shared
|
||||
@State private var isRecordingShortcut = false
|
||||
@State private var keyMonitor: Any?
|
||||
|
||||
var body: some View {
|
||||
Form {
|
||||
|
|
@ -36,8 +37,18 @@ struct SettingsView: View {
|
|||
HStack {
|
||||
Text("Toggle Recording:")
|
||||
Spacer()
|
||||
Button(isRecordingShortcut ? "Press keys..." : settings.shortcutDisplayString) {
|
||||
isRecordingShortcut = true
|
||||
Button(isRecordingShortcut ? "Press a key combo..." : settings.shortcutDisplayString) {
|
||||
startRecordingShortcut()
|
||||
}
|
||||
.buttonStyle(.bordered)
|
||||
|
||||
if settings.hasShortcut {
|
||||
Button("Clear") {
|
||||
settings.shortcutKeyCode = 0
|
||||
settings.shortcutModifiers = 0
|
||||
}
|
||||
.buttonStyle(.borderless)
|
||||
.foregroundStyle(.secondary)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -64,5 +75,38 @@ struct SettingsView: View {
|
|||
}
|
||||
.formStyle(.grouped)
|
||||
.frame(width: 360, height: 340)
|
||||
.onDisappear {
|
||||
stopRecordingShortcut()
|
||||
}
|
||||
}
|
||||
|
||||
private func startRecordingShortcut() {
|
||||
isRecordingShortcut = true
|
||||
keyMonitor = NSEvent.addLocalMonitorForEvents(matching: .keyDown) { event in
|
||||
let mask: NSEvent.ModifierFlags = [.command, .option, .control, .shift]
|
||||
let mods = event.modifierFlags.intersection(mask)
|
||||
|
||||
// Require at least one modifier key
|
||||
guard !mods.isEmpty else {
|
||||
// Escape cancels
|
||||
if event.keyCode == 53 {
|
||||
stopRecordingShortcut()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
settings.shortcutKeyCode = event.keyCode
|
||||
settings.shortcutModifiers = mods.rawValue
|
||||
stopRecordingShortcut()
|
||||
return nil // swallow the event
|
||||
}
|
||||
}
|
||||
|
||||
private func stopRecordingShortcut() {
|
||||
isRecordingShortcut = false
|
||||
if let keyMonitor {
|
||||
NSEvent.removeMonitor(keyMonitor)
|
||||
}
|
||||
keyMonitor = nil
|
||||
}
|
||||
}
|
||||
|
|
|
|||
29
MyVoxtral/build-app.sh
Executable file
29
MyVoxtral/build-app.sh
Executable file
|
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
APP_NAME="MyVoxtral"
|
||||
BUILD_DIR=".build/arm64-apple-macosx/debug"
|
||||
APP_BUNDLE="$BUILD_DIR/$APP_NAME.app"
|
||||
|
||||
# Build
|
||||
swift build
|
||||
|
||||
# Create .app bundle structure
|
||||
rm -rf "$APP_BUNDLE"
|
||||
mkdir -p "$APP_BUNDLE/Contents/MacOS"
|
||||
mkdir -p "$APP_BUNDLE/Contents/Resources"
|
||||
|
||||
# Copy binary
|
||||
cp "$BUILD_DIR/$APP_NAME" "$APP_BUNDLE/Contents/MacOS/$APP_NAME"
|
||||
|
||||
# Copy Info.plist
|
||||
cp "$APP_NAME/Info.plist" "$APP_BUNDLE/Contents/Info.plist"
|
||||
|
||||
# Add CFBundleExecutable to Info.plist
|
||||
/usr/libexec/PlistBuddy -c "Add :CFBundleExecutable string $APP_NAME" "$APP_BUNDLE/Contents/Info.plist" 2>/dev/null || true
|
||||
/usr/libexec/PlistBuddy -c "Add :CFBundleIdentifier string com.myvoxtral.app" "$APP_BUNDLE/Contents/Info.plist" 2>/dev/null || true
|
||||
/usr/libexec/PlistBuddy -c "Add :CFBundleName string $APP_NAME" "$APP_BUNDLE/Contents/Info.plist" 2>/dev/null || true
|
||||
/usr/libexec/PlistBuddy -c "Add :CFBundlePackageType string APPL" "$APP_BUNDLE/Contents/Info.plist" 2>/dev/null || true
|
||||
|
||||
echo "Built: $APP_BUNDLE"
|
||||
echo "Run with: open $APP_BUNDLE"
|
||||
Loading…
Add table
Add a link
Reference in a new issue