Initial commit

This commit is contained in:
Felipe M 2025-09-18 19:56:06 +02:00
commit 1db16227b2
Signed by: fmartingr
GPG key ID: CCFBC5637D4000A8
31 changed files with 2175 additions and 0 deletions

18
Sources/App/main.swift Normal file
View file

@ -0,0 +1,18 @@
import SwiftUI
@main
struct MenuWhisperApp: App {
var body: some Scene {
MenuBarExtra("Menu-Whisper", systemImage: "mic") {
Text("Menu-Whisper")
Text("Idle")
Divider()
Button("Preferences...") {
// TODO: Open preferences
}
Button("Quit") {
NSApplication.shared.terminate(nil)
}
}
}
}

View file

@ -0,0 +1,42 @@
import Foundation
import AVFoundation
import CoreUtils
public protocol AudioEngineDelegate: AnyObject {
func audioEngine(_ engine: AudioEngine, didUpdateLevel level: Float)
func audioEngine(_ engine: AudioEngine, didCaptureAudio data: Data)
func audioEngineDidStartCapture(_ engine: AudioEngine)
func audioEngineDidStopCapture(_ engine: AudioEngine)
}
public class AudioEngine: ObservableObject {
private let logger = Logger(category: "AudioEngine")
private let audioEngine = AVAudioEngine()
public weak var delegate: AudioEngineDelegate?
@Published public private(set) var isCapturing = false
@Published public private(set) var currentLevel: Float = 0.0
public init() {
// Audio engine initialization will be completed in Phase 1
}
public func startCapture() throws {
logger.info("Starting audio capture")
// TODO: Implement in Phase 1
isCapturing = true
delegate?.audioEngineDidStartCapture(self)
}
public func stopCapture() {
logger.info("Stopping audio capture")
// TODO: Implement in Phase 1
isCapturing = false
delegate?.audioEngineDidStopCapture(self)
}
private func processAudioBuffer(_ buffer: AVAudioPCMBuffer) {
// TODO: Implement RMS calculation and audio processing in Phase 1
}
}

View file

@ -0,0 +1,73 @@
import Foundation
import AppKit
import CoreUtils
public enum InjectionMethod {
case paste
case typing
}
public enum InjectionError: Error, LocalizedError {
case secureInputActive
case accessibilityPermissionRequired
case injectionFailed(String)
public var errorDescription: String? {
switch self {
case .secureInputActive:
return NSLocalizedString("preferences.insertion.secure_input.message", comment: "Secure input message")
case .accessibilityPermissionRequired:
return NSLocalizedString("permissions.accessibility.message", comment: "Accessibility permission message")
case .injectionFailed(let reason):
return "Text injection failed: \(reason)"
}
}
}
public class TextInjector {
private let logger = Logger(category: "TextInjector")
public init() {}
public func injectText(_ text: String, method: InjectionMethod = .paste) throws {
logger.info("Injecting text using method: \(method)")
// Check for secure input first
if isSecureInputActive() {
// Copy to clipboard but don't inject
copyToClipboard(text)
throw InjectionError.secureInputActive
}
switch method {
case .paste:
try injectViaPaste(text)
case .typing:
try injectViaTyping(text)
}
}
private func injectViaPaste(_ text: String) throws {
logger.debug("Injecting text via paste method")
// TODO: Implement paste injection (clipboard + V) in Phase 3
copyToClipboard(text)
// TODO: Send V via CGEvent
}
private func injectViaTyping(_ text: String) throws {
logger.debug("Injecting text via typing method")
// TODO: Implement character-by-character typing via CGEvent in Phase 3
}
private func copyToClipboard(_ text: String) {
let pasteboard = NSPasteboard.general
pasteboard.clearContents()
pasteboard.setString(text, forType: .string)
logger.debug("Text copied to clipboard")
}
private func isSecureInputActive() -> Bool {
// TODO: Implement IsSecureEventInputEnabled() check in Phase 3
return false
}
}

View file

@ -0,0 +1,70 @@
import Foundation
import CoreUtils
public struct ModelInfo: Codable, Identifiable {
public let id = UUID()
public let name: String
public let family: String
public let format: String
public let sizeMB: Int
public let languages: [String]
public let recommendedBackend: String
public let qualityTier: String
public let license: String
public let sha256: String
public let downloadURL: String
public let notes: String
enum CodingKeys: String, CodingKey {
case name, family, format, languages, license, sha256, notes
case sizeMB = "size_mb"
case recommendedBackend = "recommended_backend"
case qualityTier = "quality_tier"
case downloadURL = "download_url"
}
}
public class ModelManager: ObservableObject {
private let logger = Logger(category: "ModelManager")
@Published public private(set) var availableModels: [ModelInfo] = []
@Published public private(set) var downloadedModels: [ModelInfo] = []
@Published public private(set) var activeModel: ModelInfo?
private let modelsDirectory: URL
public init() {
let appSupport = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first!
modelsDirectory = appSupport.appendingPathComponent("MenuWhisper/Models")
try? FileManager.default.createDirectory(at: modelsDirectory, withIntermediateDirectories: true)
loadModelCatalog()
refreshDownloadedModels()
}
public func downloadModel(_ model: ModelInfo) async throws {
logger.info("Starting download for model: \(model.name)")
// TODO: Implement model download with progress tracking and SHA256 verification in Phase 2
}
public func deleteModel(_ model: ModelInfo) throws {
logger.info("Deleting model: \(model.name)")
// TODO: Implement model deletion in Phase 2
}
public func setActiveModel(_ model: ModelInfo) {
logger.info("Setting active model: \(model.name)")
activeModel = model
// TODO: Persist active model selection in Phase 2
}
private func loadModelCatalog() {
// TODO: Load curated model catalog from bundled JSON in Phase 2
logger.info("Loading model catalog")
}
private func refreshDownloadedModels() {
// TODO: Scan models directory and populate downloadedModels in Phase 2
logger.info("Refreshing downloaded models")
}
}

View file

@ -0,0 +1,111 @@
import Foundation
import AVFoundation
import AppKit
import CoreUtils
public enum PermissionType: CaseIterable {
case microphone
case accessibility
case inputMonitoring
}
public enum PermissionStatus {
case notDetermined
case granted
case denied
case restricted
}
public class PermissionManager: ObservableObject {
private let logger = Logger(category: "PermissionManager")
@Published public private(set) var microphoneStatus: PermissionStatus = .notDetermined
@Published public private(set) var accessibilityStatus: PermissionStatus = .notDetermined
@Published public private(set) var inputMonitoringStatus: PermissionStatus = .notDetermined
public init() {
refreshAllPermissions()
}
public func requestMicrophonePermission() async -> PermissionStatus {
logger.info("Requesting microphone permission")
return await withCheckedContinuation { continuation in
switch AVCaptureDevice.authorizationStatus(for: .audio) {
case .authorized:
continuation.resume(returning: .granted)
case .denied, .restricted:
continuation.resume(returning: .denied)
case .notDetermined:
AVCaptureDevice.requestAccess(for: .audio) { granted in
let status: PermissionStatus = granted ? .granted : .denied
Task { @MainActor in
self.microphoneStatus = status
}
continuation.resume(returning: status)
}
@unknown default:
continuation.resume(returning: .notDetermined)
}
}
}
public func requestAccessibilityPermission() {
logger.info("Requesting accessibility permission")
// TODO: Implement accessibility permission request in Phase 1
// This typically involves guiding the user to System Settings
}
public func requestInputMonitoringPermission() {
logger.info("Requesting input monitoring permission")
// TODO: Implement input monitoring permission request in Phase 1
// This typically involves guiding the user to System Settings
}
public func openSystemSettings(for permission: PermissionType) {
logger.info("Opening system settings for permission: \(permission)")
let urlString: String
switch permission {
case .microphone:
urlString = "x-apple.systempreferences:com.apple.preference.security?Privacy_Microphone"
case .accessibility:
urlString = "x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility"
case .inputMonitoring:
urlString = "x-apple.systempreferences:com.apple.preference.security?Privacy_ListenEvent"
}
if let url = URL(string: urlString) {
NSWorkspace.shared.open(url)
}
}
private func refreshAllPermissions() {
refreshMicrophonePermission()
refreshAccessibilityPermission()
refreshInputMonitoringPermission()
}
private func refreshMicrophonePermission() {
switch AVCaptureDevice.authorizationStatus(for: .audio) {
case .notDetermined:
microphoneStatus = .notDetermined
case .authorized:
microphoneStatus = .granted
case .denied, .restricted:
microphoneStatus = .denied
@unknown default:
microphoneStatus = .notDetermined
}
}
private func refreshAccessibilityPermission() {
// TODO: Implement accessibility permission check in Phase 1
accessibilityStatus = .notDetermined
}
private func refreshInputMonitoringPermission() {
// TODO: Implement input monitoring permission check in Phase 1
inputMonitoringStatus = .notDetermined
}
}

View file

@ -0,0 +1,32 @@
import Foundation
import CoreUtils
public protocol STTEngine {
func transcribe(audioData: Data, language: String?) async throws -> String
func isModelLoaded() -> Bool
func loadModel(at path: URL) async throws
func unloadModel()
}
public enum STTError: Error, LocalizedError {
case modelNotFound
case modelLoadFailed(String)
case transcriptionFailed(String)
case unsupportedFormat
case invalidAudioData
public var errorDescription: String? {
switch self {
case .modelNotFound:
return NSLocalizedString("error.model.not_found", comment: "Model not found error")
case .modelLoadFailed(let reason):
return NSLocalizedString("error.model.load_failed", comment: "Model load failed error") + ": \(reason)"
case .transcriptionFailed(let reason):
return NSLocalizedString("error.transcription.failed", comment: "Transcription failed error") + ": \(reason)"
case .unsupportedFormat:
return "Unsupported audio format"
case .invalidAudioData:
return "Invalid audio data"
}
}
}

View file

@ -0,0 +1,35 @@
import Foundation
import CoreUtils
public class WhisperCPPEngine: STTEngine {
private let logger = Logger(category: "WhisperCPPEngine")
private var modelPath: URL?
private var isLoaded = false
public init() {
// WhisperCPP integration will be implemented in Phase 2
}
public func transcribe(audioData: Data, language: String?) async throws -> String {
logger.info("Transcribing audio data")
// TODO: Implement whisper.cpp integration in Phase 2
throw STTError.transcriptionFailed("Not implemented yet")
}
public func isModelLoaded() -> Bool {
return isLoaded
}
public func loadModel(at path: URL) async throws {
logger.info("Loading model at path: \(path.path)")
self.modelPath = path
// TODO: Implement model loading in Phase 2
isLoaded = true
}
public func unloadModel() {
logger.info("Unloading model")
modelPath = nil
isLoaded = false
}
}

View file

@ -0,0 +1,149 @@
import Foundation
import CoreUtils
public enum HotkeyMode: String, CaseIterable, Codable {
case pushToTalk = "push_to_talk"
case toggle = "toggle"
public var displayName: String {
switch self {
case .pushToTalk:
return NSLocalizedString("preferences.general.mode.push_to_talk", comment: "Push to talk mode")
case .toggle:
return NSLocalizedString("preferences.general.mode.toggle", comment: "Toggle mode")
}
}
}
public struct HotkeyConfig: Codable {
public let keyCode: UInt32
public let modifiers: UInt32
public init(keyCode: UInt32, modifiers: UInt32) {
self.keyCode = keyCode
self.modifiers = modifiers
}
// Default to V
public static let `default` = HotkeyConfig(keyCode: 9, modifiers: 768) // V key with Cmd+Shift
}
public class Settings: ObservableObject {
private let logger = Logger(category: "Settings")
private let userDefaults = UserDefaults.standard
// General Settings
@Published public var hotkey: HotkeyConfig {
didSet { saveHotkey() }
}
@Published public var hotkeyMode: HotkeyMode {
didSet { saveHotkeyMode() }
}
@Published public var playSounds: Bool {
didSet { userDefaults.set(playSounds, forKey: "playSounds") }
}
@Published public var dictationTimeLimit: TimeInterval {
didSet { userDefaults.set(dictationTimeLimit, forKey: "dictationTimeLimit") }
}
// Model Settings
@Published public var activeModelName: String? {
didSet { userDefaults.set(activeModelName, forKey: "activeModelName") }
}
@Published public var forcedLanguage: String? {
didSet { userDefaults.set(forcedLanguage, forKey: "forcedLanguage") }
}
// Insertion Settings
@Published public var insertionMethod: String {
didSet { userDefaults.set(insertionMethod, forKey: "insertionMethod") }
}
@Published public var showPreview: Bool {
didSet { userDefaults.set(showPreview, forKey: "showPreview") }
}
public init() {
// Load settings from UserDefaults
self.hotkey = Settings.loadHotkey()
self.hotkeyMode = HotkeyMode(rawValue: userDefaults.string(forKey: "hotkeyMode") ?? "") ?? .pushToTalk
self.playSounds = userDefaults.object(forKey: "playSounds") as? Bool ?? false
self.dictationTimeLimit = userDefaults.object(forKey: "dictationTimeLimit") as? TimeInterval ?? 600 // 10 minutes
self.activeModelName = userDefaults.string(forKey: "activeModelName")
self.forcedLanguage = userDefaults.string(forKey: "forcedLanguage")
self.insertionMethod = userDefaults.string(forKey: "insertionMethod") ?? "paste"
self.showPreview = userDefaults.object(forKey: "showPreview") as? Bool ?? false
logger.info("Settings initialized")
}
public func exportSettings() throws -> Data {
let settingsDict: [String: Any] = [
"hotkeyKeyCode": hotkey.keyCode,
"hotkeyModifiers": hotkey.modifiers,
"hotkeyMode": hotkeyMode.rawValue,
"playSounds": playSounds,
"dictationTimeLimit": dictationTimeLimit,
"activeModelName": activeModelName as Any,
"forcedLanguage": forcedLanguage as Any,
"insertionMethod": insertionMethod,
"showPreview": showPreview
]
return try JSONSerialization.data(withJSONObject: settingsDict, options: .prettyPrinted)
}
public func importSettings(from data: Data) throws {
let settingsDict = try JSONSerialization.jsonObject(with: data) as? [String: Any] ?? [:]
if let keyCode = settingsDict["hotkeyKeyCode"] as? UInt32,
let modifiers = settingsDict["hotkeyModifiers"] as? UInt32 {
hotkey = HotkeyConfig(keyCode: keyCode, modifiers: modifiers)
}
if let modeString = settingsDict["hotkeyMode"] as? String,
let mode = HotkeyMode(rawValue: modeString) {
hotkeyMode = mode
}
if let sounds = settingsDict["playSounds"] as? Bool {
playSounds = sounds
}
if let timeLimit = settingsDict["dictationTimeLimit"] as? TimeInterval {
dictationTimeLimit = timeLimit
}
activeModelName = settingsDict["activeModelName"] as? String
forcedLanguage = settingsDict["forcedLanguage"] as? String
if let method = settingsDict["insertionMethod"] as? String {
insertionMethod = method
}
if let preview = settingsDict["showPreview"] as? Bool {
showPreview = preview
}
logger.info("Settings imported successfully")
}
private static func loadHotkey() -> HotkeyConfig {
let keyCode = UserDefaults.standard.object(forKey: "hotkeyKeyCode") as? UInt32 ?? HotkeyConfig.default.keyCode
let modifiers = UserDefaults.standard.object(forKey: "hotkeyModifiers") as? UInt32 ?? HotkeyConfig.default.modifiers
return HotkeyConfig(keyCode: keyCode, modifiers: modifiers)
}
private func saveHotkey() {
userDefaults.set(hotkey.keyCode, forKey: "hotkeyKeyCode")
userDefaults.set(hotkey.modifiers, forKey: "hotkeyModifiers")
}
private func saveHotkeyMode() {
userDefaults.set(hotkeyMode.rawValue, forKey: "hotkeyMode")
}
}

View file

@ -0,0 +1,24 @@
import Foundation
public enum AppState: String, CaseIterable {
case idle = "idle"
case listening = "listening"
case processing = "processing"
case injecting = "injecting"
case error = "error"
public var displayName: String {
switch self {
case .idle:
return NSLocalizedString("menubar.idle", comment: "Idle state")
case .listening:
return NSLocalizedString("menubar.listening", comment: "Listening state")
case .processing:
return NSLocalizedString("menubar.processing", comment: "Processing state")
case .injecting:
return "Injecting" // Not shown in menu bar
case .error:
return "Error" // Not shown in menu bar
}
}
}

View file

@ -0,0 +1,51 @@
import Foundation
import os.log
public enum LogLevel: String, CaseIterable {
case debug = "DEBUG"
case info = "INFO"
case warning = "WARNING"
case error = "ERROR"
}
public class Logger {
private let osLog: OSLog
private let category: String
public init(category: String) {
self.category = category
self.osLog = OSLog(subsystem: "com.menuwhisper.app", category: category)
}
public func debug(_ message: String, file: String = #file, function: String = #function, line: Int = #line) {
log(level: .debug, message: message, file: file, function: function, line: line)
}
public func info(_ message: String, file: String = #file, function: String = #function, line: Int = #line) {
log(level: .info, message: message, file: file, function: function, line: line)
}
public func warning(_ message: String, file: String = #file, function: String = #function, line: Int = #line) {
log(level: .warning, message: message, file: file, function: function, line: line)
}
public func error(_ message: String, file: String = #file, function: String = #function, line: Int = #line) {
log(level: .error, message: message, file: file, function: function, line: line)
}
private func log(level: LogLevel, message: String, file: String, function: String, line: Int) {
let fileName = URL(fileURLWithPath: file).lastPathComponent
let logMessage = "[\(category)] \(message) (\(fileName):\(function):\(line))"
switch level {
case .debug:
os_log("%{public}@", log: osLog, type: .debug, logMessage)
case .info:
os_log("%{public}@", log: osLog, type: .info, logMessage)
case .warning:
os_log("%{public}@", log: osLog, type: .default, logMessage)
case .error:
os_log("%{public}@", log: osLog, type: .error, logMessage)
}
}
}