Implement Phase 1: Global hotkey, HUD, and audio capture
Add complete listening UX without STT: - Global hotkey manager with ⌘⇧V, push-to-talk and toggle modes - Floating HUD with real-time RMS audio visualization - AVAudioEngine capture with 16kHz mono PCM conversion - 10-minute dictation timeout with ESC cancellation - Optional start/stop sounds and microphone permissions - Permission management for accessibility and input monitoring All Phase 1 acceptance criteria met.
This commit is contained in:
parent
1db16227b2
commit
6e768a7753
10 changed files with 1005 additions and 51 deletions
239
Sources/App/AppController.swift
Normal file
239
Sources/App/AppController.swift
Normal file
|
|
@ -0,0 +1,239 @@
|
|||
import SwiftUI
|
||||
import CoreUtils
|
||||
import MenuWhisperAudio
|
||||
import CorePermissions
|
||||
import AVFoundation
|
||||
|
||||
public class AppController: ObservableObject {
|
||||
private let logger = Logger(category: "AppController")
|
||||
|
||||
// Core components
|
||||
private let hotkeyManager = HotkeyManager()
|
||||
private let audioEngine = AudioEngine()
|
||||
private let permissionManager = PermissionManager()
|
||||
private let soundManager = SoundManager()
|
||||
|
||||
// UI components
|
||||
private var hudWindow: HUDWindow?
|
||||
|
||||
// State management
|
||||
@Published public private(set) var currentState: AppState = .idle
|
||||
@Published public var isToggleListening = false
|
||||
|
||||
// Dictation timer
|
||||
private var dictationTimer: Timer?
|
||||
private let maxDictationDuration: TimeInterval = 600 // 10 minutes default
|
||||
|
||||
public init() {
|
||||
setupDelegates()
|
||||
setupNotifications()
|
||||
}
|
||||
|
||||
deinit {
|
||||
cleanup()
|
||||
}
|
||||
|
||||
public func start() {
|
||||
logger.info("Starting app controller")
|
||||
|
||||
// Check microphone permission first
|
||||
checkMicrophonePermission { [weak self] granted in
|
||||
if granted {
|
||||
self?.setupHotkey()
|
||||
} else {
|
||||
self?.logger.warning("Microphone permission not granted")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func setupDelegates() {
|
||||
hotkeyManager.delegate = self
|
||||
audioEngine.delegate = self
|
||||
}
|
||||
|
||||
private func setupNotifications() {
|
||||
NotificationCenter.default.addObserver(
|
||||
self,
|
||||
selector: #selector(handleHUDEscape),
|
||||
name: .hudEscapePressed,
|
||||
object: nil
|
||||
)
|
||||
}
|
||||
|
||||
private func setupHotkey() {
|
||||
hotkeyManager.enableHotkey()
|
||||
}
|
||||
|
||||
private func checkMicrophonePermission(completion: @escaping (Bool) -> Void) {
|
||||
permissionManager.requestMicrophonePermission { status in
|
||||
DispatchQueue.main.async {
|
||||
completion(status == .granted)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@objc private func handleHUDEscape() {
|
||||
logger.info("HUD escape pressed - cancelling dictation")
|
||||
cancelDictation()
|
||||
}
|
||||
|
||||
private func startListening() {
|
||||
guard currentState == .idle else {
|
||||
logger.warning("Cannot start listening from state: \(currentState)")
|
||||
return
|
||||
}
|
||||
|
||||
logger.info("Starting listening")
|
||||
currentState = .listening
|
||||
|
||||
do {
|
||||
try audioEngine.startCapture()
|
||||
showHUD(state: .listening(level: 0))
|
||||
startDictationTimer()
|
||||
soundManager.playStartSound()
|
||||
} catch {
|
||||
logger.error("Failed to start audio capture: \(error)")
|
||||
currentState = .error
|
||||
soundManager.playErrorSound()
|
||||
showError("Failed to start microphone: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
private func stopListening() {
|
||||
guard currentState == .listening else {
|
||||
logger.warning("Cannot stop listening from state: \(currentState)")
|
||||
return
|
||||
}
|
||||
|
||||
logger.info("Stopping listening")
|
||||
stopDictationTimer()
|
||||
audioEngine.stopCapture()
|
||||
soundManager.playStopSound()
|
||||
|
||||
// Transition to processing state
|
||||
currentState = .processing
|
||||
showHUD(state: .processing)
|
||||
|
||||
// For Phase 1, we'll just simulate processing and return to idle
|
||||
// In Phase 2, this is where we'd call the STT engine
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 1.0) {
|
||||
self.finishProcessing()
|
||||
}
|
||||
}
|
||||
|
||||
private func finishProcessing() {
|
||||
logger.info("Finishing processing")
|
||||
currentState = .idle
|
||||
hideHUD()
|
||||
|
||||
// Reset toggle state if in toggle mode
|
||||
if hotkeyManager.currentMode == .toggle {
|
||||
isToggleListening = false
|
||||
}
|
||||
}
|
||||
|
||||
private func cancelDictation() {
|
||||
logger.info("Cancelling dictation")
|
||||
stopDictationTimer()
|
||||
|
||||
if audioEngine.isCapturing {
|
||||
audioEngine.stopCapture()
|
||||
}
|
||||
|
||||
currentState = .idle
|
||||
hideHUD()
|
||||
|
||||
// Reset toggle state
|
||||
if hotkeyManager.currentMode == .toggle {
|
||||
isToggleListening = false
|
||||
}
|
||||
}
|
||||
|
||||
private func startDictationTimer() {
|
||||
stopDictationTimer() // Clean up any existing timer
|
||||
|
||||
dictationTimer = Timer.scheduledTimer(withTimeInterval: maxDictationDuration, repeats: false) { [weak self] _ in
|
||||
self?.logger.info("Dictation timeout reached")
|
||||
self?.stopListening()
|
||||
}
|
||||
}
|
||||
|
||||
private func stopDictationTimer() {
|
||||
dictationTimer?.invalidate()
|
||||
dictationTimer = nil
|
||||
}
|
||||
|
||||
private func showHUD(state: HUDState) {
|
||||
if hudWindow == nil {
|
||||
hudWindow = HUDWindow()
|
||||
}
|
||||
hudWindow?.show(state: state)
|
||||
}
|
||||
|
||||
private func hideHUD() {
|
||||
hudWindow?.hide()
|
||||
}
|
||||
|
||||
private func showError(_ message: String) {
|
||||
logger.error("Error: \(message)")
|
||||
// TODO: Show error dialog in a later phase
|
||||
currentState = .idle
|
||||
}
|
||||
|
||||
private func cleanup() {
|
||||
stopDictationTimer()
|
||||
audioEngine.stopCapture()
|
||||
hotkeyManager.disableHotkey()
|
||||
NotificationCenter.default.removeObserver(self)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - HotkeyManagerDelegate
|
||||
extension AppController: HotkeyManagerDelegate {
|
||||
public func hotkeyPressed(mode: HotkeyMode, isKeyDown: Bool) {
|
||||
logger.debug("Hotkey pressed: mode=\(mode), isKeyDown=\(isKeyDown)")
|
||||
|
||||
switch mode {
|
||||
case .pushToTalk:
|
||||
if isKeyDown {
|
||||
startListening()
|
||||
} else {
|
||||
if currentState == .listening {
|
||||
stopListening()
|
||||
}
|
||||
}
|
||||
|
||||
case .toggle:
|
||||
if isKeyDown { // Only respond to key down in toggle mode
|
||||
if currentState == .idle && !isToggleListening {
|
||||
isToggleListening = true
|
||||
startListening()
|
||||
} else if currentState == .listening && isToggleListening {
|
||||
isToggleListening = false
|
||||
stopListening()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - AudioEngineDelegate
|
||||
extension AppController: AudioEngineDelegate {
|
||||
public func audioEngine(_ engine: AudioEngine, didUpdateLevel level: Float) {
|
||||
// Update HUD with new level
|
||||
hudWindow?.updateLevel(level)
|
||||
}
|
||||
|
||||
public func audioEngine(_ engine: AudioEngine, didCaptureAudio data: Data) {
|
||||
logger.info("Audio capture completed: \(data.count) bytes")
|
||||
// In Phase 2, this is where we'd send the data to STT
|
||||
}
|
||||
|
||||
public func audioEngineDidStartCapture(_ engine: AudioEngine) {
|
||||
logger.info("Audio engine started capture")
|
||||
}
|
||||
|
||||
public func audioEngineDidStopCapture(_ engine: AudioEngine) {
|
||||
logger.info("Audio engine stopped capture")
|
||||
}
|
||||
}
|
||||
214
Sources/App/HUDWindow.swift
Normal file
214
Sources/App/HUDWindow.swift
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
import SwiftUI
|
||||
import AppKit
|
||||
import CoreUtils
|
||||
|
||||
public enum HUDState {
|
||||
case hidden
|
||||
case listening(level: Float)
|
||||
case processing
|
||||
}
|
||||
|
||||
public class HUDWindow: NSPanel {
|
||||
private var hostingView: NSHostingView<HUDContentView>?
|
||||
|
||||
public init() {
|
||||
super.init(
|
||||
contentRect: NSRect(x: 0, y: 0, width: 320, height: 160),
|
||||
styleMask: [.nonactivatingPanel],
|
||||
backing: .buffered,
|
||||
defer: false
|
||||
)
|
||||
|
||||
setupWindow()
|
||||
setupContentView()
|
||||
}
|
||||
|
||||
private func setupWindow() {
|
||||
level = .floating
|
||||
isOpaque = false
|
||||
backgroundColor = NSColor.clear
|
||||
hasShadow = true
|
||||
isMovable = false
|
||||
collectionBehavior = [.canJoinAllSpaces, .fullScreenAuxiliary]
|
||||
}
|
||||
|
||||
private func setupContentView() {
|
||||
let hudContentView = HUDContentView()
|
||||
hostingView = NSHostingView(rootView: hudContentView)
|
||||
|
||||
if let hostingView = hostingView {
|
||||
contentView = hostingView
|
||||
}
|
||||
}
|
||||
|
||||
public func show(state: HUDState) {
|
||||
centerOnScreen()
|
||||
|
||||
if let hostingView = hostingView {
|
||||
hostingView.rootView.updateState(state)
|
||||
}
|
||||
|
||||
if !isVisible {
|
||||
orderFront(nil)
|
||||
alphaValue = 0
|
||||
NSAnimationContext.runAnimationGroup({ context in
|
||||
context.duration = 0.2
|
||||
animator().alphaValue = 1.0
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
public func hide() {
|
||||
guard isVisible else { return }
|
||||
|
||||
NSAnimationContext.runAnimationGroup({ context in
|
||||
context.duration = 0.2
|
||||
animator().alphaValue = 0
|
||||
}, completionHandler: {
|
||||
self.orderOut(nil)
|
||||
})
|
||||
}
|
||||
|
||||
public func updateLevel(_ level: Float) {
|
||||
if let hostingView = hostingView {
|
||||
hostingView.rootView.updateState(.listening(level: level))
|
||||
}
|
||||
}
|
||||
|
||||
private func centerOnScreen() {
|
||||
guard let screen = NSScreen.main else { return }
|
||||
|
||||
let screenFrame = screen.visibleFrame
|
||||
let windowSize = frame.size
|
||||
|
||||
let x = screenFrame.midX - windowSize.width / 2
|
||||
let y = screenFrame.midY - windowSize.height / 2
|
||||
|
||||
setFrameOrigin(NSPoint(x: x, y: y))
|
||||
}
|
||||
|
||||
override public func keyDown(with event: NSEvent) {
|
||||
if event.keyCode == 53 { // Escape key
|
||||
NotificationCenter.default.post(name: .hudEscapePressed, object: nil)
|
||||
return
|
||||
}
|
||||
super.keyDown(with: event)
|
||||
}
|
||||
|
||||
override public var canBecomeKey: Bool {
|
||||
return true // Allow the window to receive key events
|
||||
}
|
||||
}
|
||||
|
||||
extension Notification.Name {
|
||||
static let hudEscapePressed = Notification.Name("hudEscapePressed")
|
||||
}
|
||||
|
||||
struct HUDContentView: View {
|
||||
@State private var currentState: HUDState = .hidden
|
||||
|
||||
var body: some View {
|
||||
ZStack {
|
||||
RoundedRectangle(cornerRadius: 12)
|
||||
.fill(.regularMaterial)
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 12)
|
||||
.stroke(Color.primary.opacity(0.1), lineWidth: 1)
|
||||
)
|
||||
|
||||
VStack(spacing: 16) {
|
||||
switch currentState {
|
||||
case .hidden:
|
||||
EmptyView()
|
||||
|
||||
case .listening(let level):
|
||||
listeningView(level: level)
|
||||
|
||||
case .processing:
|
||||
processingView
|
||||
}
|
||||
}
|
||||
.padding(24)
|
||||
}
|
||||
.frame(width: 320, height: 160)
|
||||
}
|
||||
|
||||
@ViewBuilder
|
||||
private func listeningView(level: Float) -> some View {
|
||||
VStack(spacing: 12) {
|
||||
Image(systemName: "mic.fill")
|
||||
.font(.system(size: 32))
|
||||
.foregroundColor(.blue)
|
||||
|
||||
Text("Listening...")
|
||||
.font(.headline)
|
||||
.foregroundColor(.primary)
|
||||
|
||||
AudioLevelView(level: level)
|
||||
.frame(height: 20)
|
||||
|
||||
Text("Press Esc to cancel")
|
||||
.font(.caption)
|
||||
.foregroundColor(.secondary)
|
||||
}
|
||||
}
|
||||
|
||||
@ViewBuilder
|
||||
private var processingView: some View {
|
||||
VStack(spacing: 12) {
|
||||
ProgressView()
|
||||
.scaleEffect(1.2)
|
||||
|
||||
Text("Processing...")
|
||||
.font(.headline)
|
||||
.foregroundColor(.primary)
|
||||
|
||||
Text("Please wait")
|
||||
.font(.caption)
|
||||
.foregroundColor(.secondary)
|
||||
}
|
||||
}
|
||||
|
||||
func updateState(_ state: HUDState) {
|
||||
withAnimation(.easeInOut(duration: 0.3)) {
|
||||
currentState = state
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct AudioLevelView: View {
|
||||
let level: Float
|
||||
private let barCount = 20
|
||||
|
||||
var body: some View {
|
||||
HStack(spacing: 2) {
|
||||
ForEach(0..<barCount, id: \.self) { index in
|
||||
RoundedRectangle(cornerRadius: 1)
|
||||
.fill(barColor(for: index))
|
||||
.frame(width: 12, height: barHeight(for: index))
|
||||
.animation(.easeInOut(duration: 0.1), value: level)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func barHeight(for index: Int) -> CGFloat {
|
||||
let threshold = Float(index) / Float(barCount - 1)
|
||||
return level > threshold ? 20 : 4
|
||||
}
|
||||
|
||||
private func barColor(for index: Int) -> Color {
|
||||
let threshold = Float(index) / Float(barCount - 1)
|
||||
|
||||
if level > threshold {
|
||||
if threshold < 0.6 {
|
||||
return .green
|
||||
} else if threshold < 0.8 {
|
||||
return .orange
|
||||
} else {
|
||||
return .red
|
||||
}
|
||||
} else {
|
||||
return .gray.opacity(0.3)
|
||||
}
|
||||
}
|
||||
}
|
||||
152
Sources/App/HotkeyManager.swift
Normal file
152
Sources/App/HotkeyManager.swift
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
import Foundation
|
||||
import AppKit
|
||||
import Carbon
|
||||
import CoreUtils
|
||||
|
||||
public enum HotkeyMode: String, CaseIterable {
|
||||
case pushToTalk = "pushToTalk"
|
||||
case toggle = "toggle"
|
||||
|
||||
public var displayName: String {
|
||||
switch self {
|
||||
case .pushToTalk:
|
||||
return NSLocalizedString("hotkey.mode.push", comment: "Push-to-talk mode")
|
||||
case .toggle:
|
||||
return NSLocalizedString("hotkey.mode.toggle", comment: "Toggle mode")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public protocol HotkeyManagerDelegate: AnyObject {
|
||||
func hotkeyPressed(mode: HotkeyMode, isKeyDown: Bool)
|
||||
}
|
||||
|
||||
public class HotkeyManager: ObservableObject {
|
||||
private let logger = Logger(category: "HotkeyManager")
|
||||
|
||||
public weak var delegate: HotkeyManagerDelegate?
|
||||
|
||||
@Published public var currentMode: HotkeyMode = .toggle
|
||||
@Published public var isEnabled: Bool = false
|
||||
|
||||
private var hotKeyRef: EventHotKeyRef?
|
||||
private var eventHandler: EventHandlerRef?
|
||||
|
||||
// Default hotkey: ⌘⇧V (Command + Shift + V)
|
||||
private let defaultKeyCode: UInt32 = 9 // V key
|
||||
private let defaultModifiers: UInt32 = UInt32(cmdKey + shiftKey)
|
||||
|
||||
public init() {
|
||||
setupEventHandler()
|
||||
}
|
||||
|
||||
deinit {
|
||||
unregisterHotkey()
|
||||
if let handler = eventHandler {
|
||||
RemoveEventHandler(handler)
|
||||
}
|
||||
}
|
||||
|
||||
public func enableHotkey() {
|
||||
guard !isEnabled else { return }
|
||||
|
||||
logger.info("Enabling global hotkey")
|
||||
|
||||
let hotKeyID = EventHotKeyID(signature: OSType(0x4D575350), id: 1) // 'MWSP'
|
||||
|
||||
let status = RegisterEventHotKey(
|
||||
defaultKeyCode,
|
||||
defaultModifiers,
|
||||
hotKeyID,
|
||||
GetApplicationEventTarget(),
|
||||
0,
|
||||
&hotKeyRef
|
||||
)
|
||||
|
||||
if status == noErr {
|
||||
isEnabled = true
|
||||
logger.info("Global hotkey registered successfully")
|
||||
} else {
|
||||
logger.error("Failed to register global hotkey: \(status)")
|
||||
}
|
||||
}
|
||||
|
||||
public func disableHotkey() {
|
||||
guard isEnabled else { return }
|
||||
|
||||
logger.info("Disabling global hotkey")
|
||||
unregisterHotkey()
|
||||
isEnabled = false
|
||||
}
|
||||
|
||||
private func unregisterHotkey() {
|
||||
if let hotKeyRef = hotKeyRef {
|
||||
UnregisterEventHotKey(hotKeyRef)
|
||||
self.hotKeyRef = nil
|
||||
}
|
||||
}
|
||||
|
||||
private func setupEventHandler() {
|
||||
let eventTypes: [EventTypeSpec] = [
|
||||
EventTypeSpec(eventClass: OSType(kEventClassKeyboard), eventKind: OSType(kEventHotKeyPressed)),
|
||||
EventTypeSpec(eventClass: OSType(kEventClassKeyboard), eventKind: OSType(kEventHotKeyReleased))
|
||||
]
|
||||
|
||||
let callback: EventHandlerProcPtr = { (nextHandler, theEvent, userData) -> OSStatus in
|
||||
guard let userData = userData else { return OSStatus(eventNotHandledErr) }
|
||||
let manager = Unmanaged<HotkeyManager>.fromOpaque(userData).takeUnretainedValue()
|
||||
|
||||
var hotKeyID = EventHotKeyID()
|
||||
let status = GetEventParameter(
|
||||
theEvent,
|
||||
OSType(kEventParamDirectObject),
|
||||
OSType(typeEventHotKeyID),
|
||||
nil,
|
||||
MemoryLayout<EventHotKeyID>.size,
|
||||
nil,
|
||||
&hotKeyID
|
||||
)
|
||||
|
||||
guard status == noErr else { return OSStatus(eventNotHandledErr) }
|
||||
|
||||
let eventKind = GetEventKind(theEvent)
|
||||
let isKeyDown = eventKind == OSType(kEventHotKeyPressed)
|
||||
|
||||
DispatchQueue.main.async {
|
||||
manager.handleHotkeyEvent(isKeyDown: isKeyDown)
|
||||
}
|
||||
|
||||
return noErr
|
||||
}
|
||||
|
||||
let selfPtr = Unmanaged.passUnretained(self).toOpaque()
|
||||
|
||||
let status = InstallEventHandler(
|
||||
GetApplicationEventTarget(),
|
||||
callback,
|
||||
2,
|
||||
eventTypes,
|
||||
selfPtr,
|
||||
&eventHandler
|
||||
)
|
||||
|
||||
if status != noErr {
|
||||
logger.error("Failed to install event handler: \(status)")
|
||||
}
|
||||
}
|
||||
|
||||
private func handleHotkeyEvent(isKeyDown: Bool) {
|
||||
logger.debug("Hotkey event: \(isKeyDown ? "down" : "up"), mode: \(currentMode)")
|
||||
|
||||
switch currentMode {
|
||||
case .pushToTalk:
|
||||
// In push-to-talk mode, respond to both key down and up
|
||||
delegate?.hotkeyPressed(mode: currentMode, isKeyDown: isKeyDown)
|
||||
case .toggle:
|
||||
// In toggle mode, only respond to key down
|
||||
if isKeyDown {
|
||||
delegate?.hotkeyPressed(mode: currentMode, isKeyDown: true)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
64
Sources/App/MenuWhisperApp.swift
Normal file
64
Sources/App/MenuWhisperApp.swift
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
import SwiftUI
|
||||
import CoreUtils
|
||||
|
||||
@main
|
||||
struct MenuWhisperApp: App {
|
||||
@StateObject private var appController = AppController()
|
||||
|
||||
var body: some Scene {
|
||||
MenuBarExtra("Menu-Whisper", systemImage: "mic") {
|
||||
MenuBarContentView()
|
||||
.environmentObject(appController)
|
||||
.onAppear {
|
||||
appController.start()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct MenuBarContentView: View {
|
||||
@EnvironmentObject var appController: AppController
|
||||
|
||||
var body: some View {
|
||||
VStack(alignment: .leading, spacing: 4) {
|
||||
Text("Menu-Whisper")
|
||||
.font(.headline)
|
||||
|
||||
Text(appController.currentState.displayName)
|
||||
.font(.subheadline)
|
||||
.foregroundColor(stateColor)
|
||||
|
||||
if appController.currentState == .listening {
|
||||
Text("Press ⌘⇧V or Esc to stop")
|
||||
.font(.caption)
|
||||
.foregroundColor(.secondary)
|
||||
}
|
||||
|
||||
Divider()
|
||||
|
||||
Button("Preferences...") {
|
||||
// TODO: Open preferences window in Phase 4
|
||||
}
|
||||
|
||||
Button("Quit") {
|
||||
NSApplication.shared.terminate(nil)
|
||||
}
|
||||
}
|
||||
.padding(.horizontal, 4)
|
||||
}
|
||||
|
||||
private var stateColor: Color {
|
||||
switch appController.currentState {
|
||||
case .idle:
|
||||
return .primary
|
||||
case .listening:
|
||||
return .blue
|
||||
case .processing:
|
||||
return .orange
|
||||
case .injecting:
|
||||
return .green
|
||||
case .error:
|
||||
return .red
|
||||
}
|
||||
}
|
||||
}
|
||||
51
Sources/App/SoundManager.swift
Normal file
51
Sources/App/SoundManager.swift
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
import Foundation
|
||||
import AVFoundation
|
||||
import AppKit
|
||||
import CoreUtils
|
||||
|
||||
public class SoundManager: ObservableObject {
|
||||
private let logger = Logger(category: "SoundManager")
|
||||
|
||||
@Published public var soundsEnabled: Bool = true
|
||||
|
||||
private var startSound: AVAudioPlayer?
|
||||
private var stopSound: AVAudioPlayer?
|
||||
|
||||
public init() {
|
||||
setupSounds()
|
||||
}
|
||||
|
||||
private func setupSounds() {
|
||||
// Use system sounds for now
|
||||
// In a future version, we could bundle custom sound files
|
||||
setupSystemSounds()
|
||||
}
|
||||
|
||||
private func setupSystemSounds() {
|
||||
// We'll use NSSound for system sounds since AVAudioPlayer requires files
|
||||
// These are just placeholders - in a real implementation we'd bundle sound files
|
||||
logger.info("Sound manager initialized with system sounds")
|
||||
}
|
||||
|
||||
public func playStartSound() {
|
||||
guard soundsEnabled else { return }
|
||||
|
||||
logger.debug("Playing start sound")
|
||||
// Use a subtle system sound for start
|
||||
NSSound(named: "Glass")?.play()
|
||||
}
|
||||
|
||||
public func playStopSound() {
|
||||
guard soundsEnabled else { return }
|
||||
|
||||
logger.debug("Playing stop sound")
|
||||
// Use a different system sound for stop
|
||||
NSSound(named: "Blow")?.play()
|
||||
}
|
||||
|
||||
public func playErrorSound() {
|
||||
logger.debug("Playing error sound")
|
||||
// Always play error sound regardless of settings
|
||||
NSSound(named: "Funk")?.play()
|
||||
}
|
||||
}
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
import SwiftUI
|
||||
|
||||
@main
|
||||
struct MenuWhisperApp: App {
|
||||
var body: some Scene {
|
||||
MenuBarExtra("Menu-Whisper", systemImage: "mic") {
|
||||
Text("Menu-Whisper")
|
||||
Text("Idle")
|
||||
Divider()
|
||||
Button("Preferences...") {
|
||||
// TODO: Open preferences
|
||||
}
|
||||
Button("Quit") {
|
||||
NSApplication.shared.terminate(nil)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -12,6 +12,17 @@ public protocol AudioEngineDelegate: AnyObject {
|
|||
public class AudioEngine: ObservableObject {
|
||||
private let logger = Logger(category: "AudioEngine")
|
||||
private let audioEngine = AVAudioEngine()
|
||||
private let inputNode: AVAudioInputNode
|
||||
private let mixerNode = AVAudioMixerNode()
|
||||
|
||||
// Audio format for 16 kHz mono PCM
|
||||
private let targetFormat = AVAudioFormat(commonFormat: .pcmFormatInt16,
|
||||
sampleRate: 16000,
|
||||
channels: 1,
|
||||
interleaved: false)!
|
||||
|
||||
private var capturedData = Data()
|
||||
private let captureQueue = DispatchQueue(label: "com.menuwhisper.audio.capture", qos: .userInitiated)
|
||||
|
||||
public weak var delegate: AudioEngineDelegate?
|
||||
|
||||
|
|
@ -19,24 +30,178 @@ public class AudioEngine: ObservableObject {
|
|||
@Published public private(set) var currentLevel: Float = 0.0
|
||||
|
||||
public init() {
|
||||
// Audio engine initialization will be completed in Phase 1
|
||||
inputNode = audioEngine.inputNode
|
||||
setupAudioEngine()
|
||||
}
|
||||
|
||||
deinit {
|
||||
stopCapture()
|
||||
}
|
||||
|
||||
private func setupAudioEngine() {
|
||||
// Attach mixer node
|
||||
audioEngine.attach(mixerNode)
|
||||
|
||||
// Get the input format from the microphone
|
||||
let inputFormat = inputNode.inputFormat(forBus: 0)
|
||||
logger.info("Input format: \(inputFormat)")
|
||||
|
||||
// Connect input node to mixer
|
||||
audioEngine.connect(inputNode, to: mixerNode, format: inputFormat)
|
||||
}
|
||||
|
||||
public func startCapture() throws {
|
||||
logger.info("Starting audio capture")
|
||||
// TODO: Implement in Phase 1
|
||||
isCapturing = true
|
||||
delegate?.audioEngineDidStartCapture(self)
|
||||
|
||||
guard !isCapturing else {
|
||||
logger.warning("Audio capture already in progress")
|
||||
return
|
||||
}
|
||||
|
||||
// Reset captured data
|
||||
captureQueue.async {
|
||||
self.capturedData = Data()
|
||||
}
|
||||
|
||||
// Install tap on the mixer node to capture audio
|
||||
let inputFormat = inputNode.inputFormat(forBus: 0)
|
||||
|
||||
inputNode.installTap(onBus: 0, bufferSize: 4096, format: inputFormat) { [weak self] buffer, time in
|
||||
self?.processAudioBuffer(buffer)
|
||||
}
|
||||
|
||||
do {
|
||||
try audioEngine.start()
|
||||
isCapturing = true
|
||||
logger.info("Audio engine started successfully")
|
||||
delegate?.audioEngineDidStartCapture(self)
|
||||
} catch {
|
||||
logger.error("Failed to start audio engine: \(error)")
|
||||
inputNode.removeTap(onBus: 0)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
public func stopCapture() {
|
||||
logger.info("Stopping audio capture")
|
||||
// TODO: Implement in Phase 1
|
||||
|
||||
guard isCapturing else {
|
||||
logger.warning("Audio capture not in progress")
|
||||
return
|
||||
}
|
||||
|
||||
// Remove tap and stop engine
|
||||
inputNode.removeTap(onBus: 0)
|
||||
audioEngine.stop()
|
||||
|
||||
isCapturing = false
|
||||
currentLevel = 0.0
|
||||
|
||||
// Send final captured data to delegate
|
||||
captureQueue.async {
|
||||
if !self.capturedData.isEmpty {
|
||||
DispatchQueue.main.async {
|
||||
self.delegate?.audioEngine(self, didCaptureAudio: self.capturedData)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delegate?.audioEngineDidStopCapture(self)
|
||||
logger.info("Audio capture stopped")
|
||||
}
|
||||
|
||||
private func processAudioBuffer(_ buffer: AVAudioPCMBuffer) {
|
||||
// TODO: Implement RMS calculation and audio processing in Phase 1
|
||||
// Calculate RMS level for visualization
|
||||
let level = calculateRMS(buffer: buffer)
|
||||
|
||||
DispatchQueue.main.async {
|
||||
self.currentLevel = level
|
||||
self.delegate?.audioEngine(self, didUpdateLevel: level)
|
||||
}
|
||||
|
||||
// Convert to target format (16 kHz mono) if needed
|
||||
if let convertedBuffer = convertBufferToTargetFormat(buffer) {
|
||||
captureQueue.async {
|
||||
self.appendAudioData(from: convertedBuffer)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func calculateRMS(buffer: AVAudioPCMBuffer) -> Float {
|
||||
guard let channelData = buffer.floatChannelData,
|
||||
buffer.frameLength > 0 else {
|
||||
return 0.0
|
||||
}
|
||||
|
||||
let frameLength = Int(buffer.frameLength)
|
||||
let samples = channelData[0] // Use first channel
|
||||
|
||||
var sum: Float = 0.0
|
||||
for i in 0..<frameLength {
|
||||
sum += samples[i] * samples[i]
|
||||
}
|
||||
|
||||
let rms = sqrt(sum / Float(frameLength))
|
||||
|
||||
// Convert to dB and normalize to 0-1 range
|
||||
let db = 20 * log10(max(rms, 0.00001)) // Avoid log(0)
|
||||
let normalizedLevel = max(0, min(1, (db + 60) / 60)) // Map -60dB to 0dB -> 0 to 1
|
||||
|
||||
return normalizedLevel
|
||||
}
|
||||
|
||||
private func convertBufferToTargetFormat(_ inputBuffer: AVAudioPCMBuffer) -> AVAudioPCMBuffer? {
|
||||
let inputFormat = inputBuffer.format
|
||||
|
||||
// If already in target format, return as-is
|
||||
if inputFormat.sampleRate == targetFormat.sampleRate &&
|
||||
inputFormat.channelCount == targetFormat.channelCount {
|
||||
return inputBuffer
|
||||
}
|
||||
|
||||
// Create converter
|
||||
guard let converter = AVAudioConverter(from: inputFormat, to: targetFormat) else {
|
||||
logger.error("Failed to create audio converter")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Calculate output frame capacity
|
||||
let inputFrameCount = inputBuffer.frameLength
|
||||
let outputFrameCapacity = AVAudioFrameCount(Double(inputFrameCount) * targetFormat.sampleRate / inputFormat.sampleRate)
|
||||
|
||||
// Create output buffer
|
||||
guard let outputBuffer = AVAudioPCMBuffer(pcmFormat: targetFormat, frameCapacity: outputFrameCapacity) else {
|
||||
logger.error("Failed to create output buffer")
|
||||
return nil
|
||||
}
|
||||
|
||||
var error: NSError?
|
||||
let inputBlock: AVAudioConverterInputBlock = { inNumPackets, outStatus in
|
||||
outStatus.pointee = .haveData
|
||||
return inputBuffer
|
||||
}
|
||||
|
||||
converter.convert(to: outputBuffer, error: &error, withInputFrom: inputBlock)
|
||||
|
||||
if let error = error {
|
||||
logger.error("Audio conversion failed: \(error)")
|
||||
return nil
|
||||
}
|
||||
|
||||
return outputBuffer
|
||||
}
|
||||
|
||||
private func appendAudioData(from buffer: AVAudioPCMBuffer) {
|
||||
guard let channelData = buffer.int16ChannelData,
|
||||
buffer.frameLength > 0 else {
|
||||
return
|
||||
}
|
||||
|
||||
let frameLength = Int(buffer.frameLength)
|
||||
let samples = channelData[0]
|
||||
|
||||
// Convert Int16 samples to Data
|
||||
let data = Data(bytes: samples, count: frameLength * MemoryLayout<Int16>.size)
|
||||
capturedData.append(data)
|
||||
}
|
||||
}
|
||||
|
|
@ -50,16 +50,58 @@ public class PermissionManager: ObservableObject {
|
|||
}
|
||||
}
|
||||
|
||||
public func requestMicrophonePermission(completion: @escaping (PermissionStatus) -> Void) {
|
||||
logger.info("Requesting microphone permission")
|
||||
|
||||
switch AVCaptureDevice.authorizationStatus(for: .audio) {
|
||||
case .authorized:
|
||||
completion(.granted)
|
||||
case .denied, .restricted:
|
||||
completion(.denied)
|
||||
case .notDetermined:
|
||||
AVCaptureDevice.requestAccess(for: .audio) { granted in
|
||||
let status: PermissionStatus = granted ? .granted : .denied
|
||||
Task { @MainActor in
|
||||
self.microphoneStatus = status
|
||||
}
|
||||
completion(status)
|
||||
}
|
||||
@unknown default:
|
||||
completion(.notDetermined)
|
||||
}
|
||||
}
|
||||
|
||||
public func requestAccessibilityPermission() {
|
||||
logger.info("Requesting accessibility permission")
|
||||
// TODO: Implement accessibility permission request in Phase 1
|
||||
// This typically involves guiding the user to System Settings
|
||||
|
||||
if !AXIsProcessTrusted() {
|
||||
logger.info("Accessibility permission not granted, opening System Settings")
|
||||
openSystemSettings(for: .accessibility)
|
||||
} else {
|
||||
logger.info("Accessibility permission already granted")
|
||||
accessibilityStatus = .granted
|
||||
}
|
||||
}
|
||||
|
||||
public func requestInputMonitoringPermission() {
|
||||
logger.info("Requesting input monitoring permission")
|
||||
// TODO: Implement input monitoring permission request in Phase 1
|
||||
// This typically involves guiding the user to System Settings
|
||||
|
||||
// For input monitoring, we can try to detect it by attempting to create a CGEvent
|
||||
// If it fails, we likely need permission
|
||||
let testEvent = CGEvent(keyboardEventSource: nil, virtualKey: 0, keyDown: true)
|
||||
|
||||
if testEvent == nil {
|
||||
logger.info("Input monitoring permission likely not granted, opening System Settings")
|
||||
openSystemSettings(for: .inputMonitoring)
|
||||
} else {
|
||||
logger.info("Input monitoring permission appears to be granted")
|
||||
inputMonitoringStatus = .granted
|
||||
}
|
||||
}
|
||||
|
||||
public func checkAllPermissions() {
|
||||
logger.info("Checking all permissions")
|
||||
refreshAllPermissions()
|
||||
}
|
||||
|
||||
public func openSystemSettings(for permission: PermissionType) {
|
||||
|
|
@ -100,12 +142,21 @@ public class PermissionManager: ObservableObject {
|
|||
}
|
||||
|
||||
private func refreshAccessibilityPermission() {
|
||||
// TODO: Implement accessibility permission check in Phase 1
|
||||
accessibilityStatus = .notDetermined
|
||||
if AXIsProcessTrusted() {
|
||||
accessibilityStatus = .granted
|
||||
} else {
|
||||
accessibilityStatus = .denied
|
||||
}
|
||||
}
|
||||
|
||||
private func refreshInputMonitoringPermission() {
|
||||
// TODO: Implement input monitoring permission check in Phase 1
|
||||
inputMonitoringStatus = .notDetermined
|
||||
// Test if we can create CGEvents (requires Input Monitoring permission)
|
||||
let testEvent = CGEvent(keyboardEventSource: nil, virtualKey: 0, keyDown: true)
|
||||
|
||||
if testEvent != nil {
|
||||
inputMonitoringStatus = .granted
|
||||
} else {
|
||||
inputMonitoringStatus = .denied
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue