Implement Phase 1: Global hotkey, HUD, and audio capture

Add complete listening UX without STT: - Global hotkey manager with ⌘⇧V, push-to-talk and toggle modes - Floating HUD with real-time RMS audio visualization - AVAudioEngine capture with 16kHz mono PCM conversion - 10-minute dictation timeout with ESC cancellation - Optional start/stop sounds and microphone permissions - Permission management for accessibility and input monitoring All Phase 1 acceptance criteria met.
2025-09-18 20:06:46 +02:00 · 2025-09-18 20:06:46 +02:00 · 6e768a7753
commit 6e768a7753
parent 1db16227b2
10 changed files with 1005 additions and 51 deletions
--- a/Sources/App/AppController.swift
+++ b/Sources/App/AppController.swift
@ -0,0 +1,239 @@
+import SwiftUI
+import CoreUtils
+import MenuWhisperAudio
+import CorePermissions
+import AVFoundation
+
+public class AppController: ObservableObject {
+    private let logger = Logger(category: "AppController")
+
+    // Core components
+    private let hotkeyManager = HotkeyManager()
+    private let audioEngine = AudioEngine()
+    private let permissionManager = PermissionManager()
+    private let soundManager = SoundManager()
+
+    // UI components
+    private var hudWindow: HUDWindow?
+
+    // State management
+    @Published public private(set) var currentState: AppState = .idle
+    @Published public var isToggleListening = false
+
+    // Dictation timer
+    private var dictationTimer: Timer?
+    private let maxDictationDuration: TimeInterval = 600 // 10 minutes default
+
+    public init() {
+        setupDelegates()
+        setupNotifications()
+    }
+
+    deinit {
+        cleanup()
+    }
+
+    public func start() {
+        logger.info("Starting app controller")
+
+        // Check microphone permission first
+        checkMicrophonePermission { [weak self] granted in
+            if granted {
+                self?.setupHotkey()
+            } else {
+                self?.logger.warning("Microphone permission not granted")
+            }
+        }
+    }
+
+    private func setupDelegates() {
+        hotkeyManager.delegate = self
+        audioEngine.delegate = self
+    }
+
+    private func setupNotifications() {
+        NotificationCenter.default.addObserver(
+            self,
+            selector: #selector(handleHUDEscape),
+            name: .hudEscapePressed,
+            object: nil
+        )
+    }
+
+    private func setupHotkey() {
+        hotkeyManager.enableHotkey()
+    }
+
+    private func checkMicrophonePermission(completion: @escaping (Bool) -> Void) {
+        permissionManager.requestMicrophonePermission { status in
+            DispatchQueue.main.async {
+                completion(status == .granted)
+            }
+        }
+    }
+
+    @objc private func handleHUDEscape() {
+        logger.info("HUD escape pressed - cancelling dictation")
+        cancelDictation()
+    }
+
+    private func startListening() {
+        guard currentState == .idle else {
+            logger.warning("Cannot start listening from state: \(currentState)")
+            return
+        }
+
+        logger.info("Starting listening")
+        currentState = .listening
+
+        do {
+            try audioEngine.startCapture()
+            showHUD(state: .listening(level: 0))
+            startDictationTimer()
+            soundManager.playStartSound()
+        } catch {
+            logger.error("Failed to start audio capture: \(error)")
+            currentState = .error
+            soundManager.playErrorSound()
+            showError("Failed to start microphone: \(error.localizedDescription)")
+        }
+    }
+
+    private func stopListening() {
+        guard currentState == .listening else {
+            logger.warning("Cannot stop listening from state: \(currentState)")
+            return
+        }
+
+        logger.info("Stopping listening")
+        stopDictationTimer()
+        audioEngine.stopCapture()
+        soundManager.playStopSound()
+
+        // Transition to processing state
+        currentState = .processing
+        showHUD(state: .processing)
+
+        // For Phase 1, we'll just simulate processing and return to idle
+        // In Phase 2, this is where we'd call the STT engine
+        DispatchQueue.main.asyncAfter(deadline: .now() + 1.0) {
+            self.finishProcessing()
+        }
+    }
+
+    private func finishProcessing() {
+        logger.info("Finishing processing")
+        currentState = .idle
+        hideHUD()
+
+        // Reset toggle state if in toggle mode
+        if hotkeyManager.currentMode == .toggle {
+            isToggleListening = false
+        }
+    }
+
+    private func cancelDictation() {
+        logger.info("Cancelling dictation")
+        stopDictationTimer()
+
+        if audioEngine.isCapturing {
+            audioEngine.stopCapture()
+        }
+
+        currentState = .idle
+        hideHUD()
+
+        // Reset toggle state
+        if hotkeyManager.currentMode == .toggle {
+            isToggleListening = false
+        }
+    }
+
+    private func startDictationTimer() {
+        stopDictationTimer() // Clean up any existing timer
+
+        dictationTimer = Timer.scheduledTimer(withTimeInterval: maxDictationDuration, repeats: false) { [weak self] _ in
+            self?.logger.info("Dictation timeout reached")
+            self?.stopListening()
+        }
+    }
+
+    private func stopDictationTimer() {
+        dictationTimer?.invalidate()
+        dictationTimer = nil
+    }
+
+    private func showHUD(state: HUDState) {
+        if hudWindow == nil {
+            hudWindow = HUDWindow()
+        }
+        hudWindow?.show(state: state)
+    }
+
+    private func hideHUD() {
+        hudWindow?.hide()
+    }
+
+    private func showError(_ message: String) {
+        logger.error("Error: \(message)")
+        // TODO: Show error dialog in a later phase
+        currentState = .idle
+    }
+
+    private func cleanup() {
+        stopDictationTimer()
+        audioEngine.stopCapture()
+        hotkeyManager.disableHotkey()
+        NotificationCenter.default.removeObserver(self)
+    }
+}
+
+// MARK: - HotkeyManagerDelegate
+extension AppController: HotkeyManagerDelegate {
+    public func hotkeyPressed(mode: HotkeyMode, isKeyDown: Bool) {
+        logger.debug("Hotkey pressed: mode=\(mode), isKeyDown=\(isKeyDown)")
+
+        switch mode {
+        case .pushToTalk:
+            if isKeyDown {
+                startListening()
+            } else {
+                if currentState == .listening {
+                    stopListening()
+                }
+            }
+
+        case .toggle:
+            if isKeyDown { // Only respond to key down in toggle mode
+                if currentState == .idle && !isToggleListening {
+                    isToggleListening = true
+                    startListening()
+                } else if currentState == .listening && isToggleListening {
+                    isToggleListening = false
+                    stopListening()
+                }
+            }
+        }
+    }
+}
+
+// MARK: - AudioEngineDelegate
+extension AppController: AudioEngineDelegate {
+    public func audioEngine(_ engine: AudioEngine, didUpdateLevel level: Float) {
+        // Update HUD with new level
+        hudWindow?.updateLevel(level)
+    }
+
+    public func audioEngine(_ engine: AudioEngine, didCaptureAudio data: Data) {
+        logger.info("Audio capture completed: \(data.count) bytes")
+        // In Phase 2, this is where we'd send the data to STT
+    }
+
+    public func audioEngineDidStartCapture(_ engine: AudioEngine) {
+        logger.info("Audio engine started capture")
+    }
+
+    public func audioEngineDidStopCapture(_ engine: AudioEngine) {
+        logger.info("Audio engine stopped capture")
+    }
+}