Complete Phase 3: Text injection with permissions management

- Implement text injection with paste method (NSPasteboard + ⌘V)
- Add typing fallback with Unicode support and keyboard layout respect
- Integrate secure input detection using IsSecureEventInputEnabled()
- Add comprehensive permission checking and management
- Create Permissions tab in preferences with status indicators
- Add permission onboarding flow for new users
- Implement automatic fallback between injection methods
- Add deep links to System Settings for permission grants
- Remove duplicate preferences menu item
- Create development build script for easier testing
- Update Phase 3 tasks as completed in TODO.md
This commit is contained in:
Felipe M 2025-09-19 09:04:38 +02:00
parent 5663f3c3de
commit 7ba5895406
Signed by: fmartingr
GPG key ID: CCFBC5637D4000A8
7 changed files with 589 additions and 56 deletions

View file

@ -4,6 +4,7 @@ import MenuWhisperAudio
import CorePermissions
import CoreSTT
import CoreModels
import CoreInjection
import AVFoundation
public class AppController: ObservableObject {
@ -14,6 +15,7 @@ public class AppController: ObservableObject {
private let audioEngine = AudioEngine()
private let permissionManager = PermissionManager()
private let soundManager = SoundManager()
private let textInjector: TextInjector
// STT components
public let whisperEngine = WhisperCPPEngine(numThreads: 4, useGPU: true)
@ -33,6 +35,7 @@ public class AppController: ObservableObject {
private let maxDictationDuration: TimeInterval = 600 // 10 minutes default
public init() {
textInjector = TextInjector(permissionManager: permissionManager)
setupDelegates()
setupNotifications()
setupSTTComponents()
@ -91,6 +94,9 @@ public class AppController: ObservableObject {
setupStatusItemMenu()
}
// Check all required permissions on startup
checkAllPermissionsOnStartup()
// Check microphone permission first
checkMicrophonePermission { [weak self] granted in
if granted {
@ -130,12 +136,6 @@ public class AppController: ObservableObject {
preferencesMenuItem.target = self
menu.addItem(preferencesMenuItem)
// Test item - add direct preferences shortcut
let testPrefsMenuItem = NSMenuItem(title: "Open Preferences (⇧⌘P)", action: #selector(openPreferences), keyEquivalent: "P")
testPrefsMenuItem.keyEquivalentModifierMask = [.shift, .command]
testPrefsMenuItem.target = self
menu.addItem(testPrefsMenuItem)
// Quit
let quitMenuItem = NSMenuItem(title: "Quit MenuWhisper", action: #selector(quitApp), keyEquivalent: "q")
quitMenuItem.target = self
@ -191,6 +191,54 @@ public class AppController: ObservableObject {
hotkeyManager.enableHotkey()
}
private func checkAllPermissionsOnStartup() {
logger.info("Checking all permissions on startup")
// Check all permissions and log their status
permissionManager.checkAllPermissions()
// Log permission status
logger.info("Permission status: Microphone=\(permissionManager.microphoneStatus), Accessibility=\(permissionManager.accessibilityStatus), InputMonitoring=\(permissionManager.inputMonitoringStatus)")
// Check if we need to show permission onboarding for first-time users
if shouldShowPermissionOnboarding() {
Task { @MainActor in
showPermissionOnboarding()
}
}
}
private func shouldShowPermissionOnboarding() -> Bool {
// Don't show again if user already dismissed it
if UserDefaults.standard.bool(forKey: "hasShownPermissionOnboarding") {
return false
}
// Show onboarding if any critical permissions are not granted
return permissionManager.accessibilityStatus != .granted ||
permissionManager.inputMonitoringStatus != .granted
}
@MainActor
private func showPermissionOnboarding() {
let alert = NSAlert()
alert.messageText = "Welcome to MenuWhisper"
alert.informativeText = "MenuWhisper needs some permissions to work properly:\n\n• Microphone: To capture your speech\n• Accessibility: To insert transcribed text\n• Input Monitoring: To send keyboard events\n\nWould you like to set up permissions now?"
alert.alertStyle = .informational
alert.addButton(withTitle: "Set Up Permissions")
alert.addButton(withTitle: "Later")
let response = alert.runModal()
// Mark that we've shown the onboarding
UserDefaults.standard.set(true, forKey: "hasShownPermissionOnboarding")
if response == .alertFirstButtonReturn {
showPreferences(initialTab: 1) // Open Permissions tab
}
}
private func checkMicrophonePermission(completion: @escaping (Bool) -> Void) {
permissionManager.requestMicrophonePermission { status in
DispatchQueue.main.async {
@ -281,10 +329,9 @@ public class AppController: ObservableObject {
logger.info("Transcription completed in \(String(format: "%.2f", duration))s: \"\(transcription)\"")
// For now, just print the result - in Phase 3 we'll inject it
// Inject the transcribed text
await MainActor.run {
print("🎤 TRANSCRIPTION RESULT: \(transcription)")
showTranscriptionResult(transcription)
injectTranscriptionResult(transcription)
}
} catch {
@ -295,11 +342,32 @@ public class AppController: ObservableObject {
}
@MainActor
private func showTranscriptionResult(_ text: String) {
// For Phase 2, we'll just show it in logs and console
// In Phase 3, this will inject the text into the active app
logger.info("Transcription result: \(text)")
finishProcessing()
private func injectTranscriptionResult(_ text: String) {
logger.info("Attempting to inject transcription result: \(text)")
do {
// Attempt to inject the text using paste method with fallback enabled
try textInjector.injectText(text, method: .paste, enableFallback: true)
logger.info("Text injection successful")
// Show success and finish processing
finishProcessing()
} catch InjectionError.secureInputActive {
logger.warning("Secure input active - text copied to clipboard")
showSecureInputNotice(text)
finishProcessing()
} catch InjectionError.accessibilityPermissionRequired {
logger.error("Accessibility permission required for text injection")
showPermissionRequiredNotice()
finishProcessing()
} catch {
logger.error("Text injection failed: \(error)")
showInjectionError(error.localizedDescription)
finishProcessing()
}
}
@MainActor
@ -364,7 +432,42 @@ public class AppController: ObservableObject {
}
@MainActor
public func showPreferences() {
private func showSecureInputNotice(_ text: String) {
let alert = NSAlert()
alert.messageText = "Secure Input Active"
alert.informativeText = "Text injection is blocked because secure input is active (likely in a password field or secure app).\n\nThe transcribed text has been copied to your clipboard instead: \"\(text)\""
alert.alertStyle = .informational
alert.addButton(withTitle: "OK")
alert.runModal()
}
@MainActor
private func showPermissionRequiredNotice() {
let alert = NSAlert()
alert.messageText = "Permission Required"
alert.informativeText = "MenuWhisper needs Accessibility and Input Monitoring permissions to insert text into other applications.\n\nWould you like to open System Settings to grant these permissions?"
alert.alertStyle = .warning
alert.addButton(withTitle: "Open System Settings")
alert.addButton(withTitle: "Cancel")
let response = alert.runModal()
if response == .alertFirstButtonReturn {
showPreferences(initialTab: 1) // Open Permissions tab
}
}
@MainActor
private func showInjectionError(_ message: String) {
let alert = NSAlert()
alert.messageText = "Text Injection Failed"
alert.informativeText = "Failed to insert the transcribed text: \(message)"
alert.alertStyle = .warning
alert.addButton(withTitle: "OK")
alert.runModal()
}
@MainActor
public func showPreferences(initialTab: Int = 0) {
guard let modelManager = modelManager else {
logger.error("ModelManager not initialized yet")
return
@ -373,8 +476,13 @@ public class AppController: ObservableObject {
if preferencesWindow == nil {
preferencesWindow = PreferencesWindowController(
modelManager: modelManager,
whisperEngine: whisperEngine
whisperEngine: whisperEngine,
permissionManager: permissionManager,
initialTab: initialTab
)
} else {
// If window already exists, update the selected tab
preferencesWindow?.setSelectedTab(initialTab)
}
preferencesWindow?.showWindow(nil)

View file

@ -2,14 +2,18 @@ import SwiftUI
import CoreModels
import CoreSTT
import CoreUtils
import CorePermissions
class PreferencesWindowController: NSWindowController {
private let modelManager: ModelManager
private let whisperEngine: WhisperCPPEngine
private let permissionManager: PermissionManager
private var preferencesView: PreferencesView?
init(modelManager: ModelManager, whisperEngine: WhisperCPPEngine) {
init(modelManager: ModelManager, whisperEngine: WhisperCPPEngine, permissionManager: PermissionManager, initialTab: Int = 0) {
self.modelManager = modelManager
self.whisperEngine = whisperEngine
self.permissionManager = permissionManager
let window = NSWindow(
contentRect: NSRect(x: 0, y: 0, width: 600, height: 500),
@ -22,33 +26,53 @@ class PreferencesWindowController: NSWindowController {
window.title = "MenuWhisper Preferences"
window.center()
window.contentView = NSHostingView(
rootView: PreferencesView(
modelManager: modelManager,
whisperEngine: whisperEngine,
onClose: { [weak self] in
self?.close()
}
)
preferencesView = PreferencesView(
modelManager: modelManager,
whisperEngine: whisperEngine,
permissionManager: permissionManager,
initialTab: initialTab,
onClose: { [weak self] in
self?.close()
}
)
window.contentView = NSHostingView(rootView: preferencesView!)
}
required init?(coder: NSCoder) {
fatalError("init(coder:) has not been implemented")
}
func setSelectedTab(_ tabIndex: Int) {
preferencesView?.setSelectedTab(tabIndex)
}
}
struct PreferencesView: View {
@ObservedObject var modelManager: ModelManager
let whisperEngine: WhisperCPPEngine
@ObservedObject var permissionManager: PermissionManager
let onClose: () -> Void
@State private var selectedTab = 0
@State private var selectedTab: Int
@State private var isDownloading: [String: Bool] = [:]
@State private var downloadProgress: [String: Double] = [:]
@State private var showingDeleteAlert = false
@State private var modelToDelete: ModelInfo?
init(modelManager: ModelManager, whisperEngine: WhisperCPPEngine, permissionManager: PermissionManager, initialTab: Int = 0, onClose: @escaping () -> Void) {
self.modelManager = modelManager
self.whisperEngine = whisperEngine
self.permissionManager = permissionManager
self.onClose = onClose
self._selectedTab = State(initialValue: initialTab)
}
func setSelectedTab(_ tabIndex: Int) {
selectedTab = tabIndex
}
var body: some View {
TabView(selection: $selectedTab) {
ModelsTab(
@ -64,11 +88,17 @@ struct PreferencesView: View {
}
.tag(0)
PermissionsTab(permissionManager: permissionManager)
.tabItem {
Label("Permissions", systemImage: "lock.shield")
}
.tag(1)
GeneralTab()
.tabItem {
Label("General", systemImage: "gearshape")
}
.tag(1)
.tag(2)
}
.frame(width: 600, height: 500)
.alert("Delete Model", isPresented: $showingDeleteAlert) {
@ -324,6 +354,173 @@ struct ModelRow: View {
}
}
struct PermissionsTab: View {
@ObservedObject var permissionManager: PermissionManager
var body: some View {
VStack(alignment: .leading, spacing: 16) {
Text("Permissions")
.font(.title2)
.fontWeight(.semibold)
Text("MenuWhisper requires certain system permissions to function properly. Click the buttons below to grant permissions in System Settings.")
.font(.caption)
.foregroundColor(.secondary)
VStack(alignment: .leading, spacing: 12) {
// Microphone Permission
PermissionRow(
title: "Microphone",
description: "Required to capture speech for transcription",
status: permissionManager.microphoneStatus,
onOpenSettings: {
permissionManager.openSystemSettings(for: .microphone)
},
onRefresh: {
permissionManager.checkAllPermissions()
}
)
Divider()
// Accessibility Permission
PermissionRow(
title: "Accessibility",
description: "Required to insert transcribed text into other applications",
status: permissionManager.accessibilityStatus,
onOpenSettings: {
permissionManager.openSystemSettings(for: .accessibility)
},
onRefresh: {
permissionManager.checkAllPermissions()
}
)
Divider()
// Input Monitoring Permission
PermissionRow(
title: "Input Monitoring",
description: "Required to send keyboard events for text insertion",
status: permissionManager.inputMonitoringStatus,
onOpenSettings: {
permissionManager.openSystemSettings(for: .inputMonitoring)
},
onRefresh: {
permissionManager.checkAllPermissions()
}
)
}
.padding(16)
.background(Color(NSColor.controlBackgroundColor))
.cornerRadius(12)
// Help text
VStack(alignment: .leading, spacing: 8) {
Text("Need Help?")
.font(.headline)
Text("After granting permissions in System Settings:")
.font(.body)
.foregroundColor(.secondary)
VStack(alignment: .leading, spacing: 4) {
Text("1. Close System Settings")
Text("2. Click 'Refresh Status' to update permission status")
Text("3. Some permissions may require restarting MenuWhisper")
}
.font(.caption)
.foregroundColor(.secondary)
.padding(.leading, 8)
}
Spacer()
}
.padding(20)
}
}
struct PermissionRow: View {
let title: String
let description: String
let status: PermissionStatus
let onOpenSettings: () -> Void
let onRefresh: () -> Void
var body: some View {
HStack {
VStack(alignment: .leading, spacing: 4) {
HStack {
Text(title)
.font(.body)
.fontWeight(.medium)
Spacer()
// Status indicator
HStack(spacing: 4) {
Circle()
.fill(statusColor)
.frame(width: 8, height: 8)
Text(statusText)
.font(.caption)
.fontWeight(.medium)
.foregroundColor(statusColor)
}
}
Text(description)
.font(.caption)
.foregroundColor(.secondary)
}
Spacer()
VStack(spacing: 6) {
if status != .granted {
Button("Open System Settings") {
onOpenSettings()
}
.buttonStyle(.bordered)
.controlSize(.small)
}
Button("Refresh Status") {
onRefresh()
}
.buttonStyle(.borderless)
.controlSize(.small)
.foregroundColor(.secondary)
}
}
}
private var statusColor: Color {
switch status {
case .granted:
return .green
case .denied:
return .red
case .notDetermined, .restricted:
return .orange
}
}
private var statusText: String {
switch status {
case .granted:
return "Granted"
case .denied:
return "Denied"
case .notDetermined:
return "Not Set"
case .restricted:
return "Restricted"
}
}
}
struct GeneralTab: View {
var body: some View {
VStack(alignment: .leading, spacing: 16) {

View file

@ -1,10 +1,21 @@
import Foundation
import AppKit
import Carbon
import CoreUtils
import CorePermissions
public enum InjectionMethod {
case paste
case typing
public enum InjectionMethod: String, CaseIterable {
case paste = "paste"
case typing = "typing"
public var displayName: String {
switch self {
case .paste:
return NSLocalizedString("preferences.insertion.method.paste", comment: "Paste method")
case .typing:
return NSLocalizedString("preferences.insertion.method.typing", comment: "Typing method")
}
}
}
public enum InjectionError: Error, LocalizedError {
@ -26,11 +37,17 @@ public enum InjectionError: Error, LocalizedError {
public class TextInjector {
private let logger = Logger(category: "TextInjector")
private let permissionManager: PermissionManager
public init() {}
public init(permissionManager: PermissionManager? = nil) {
self.permissionManager = permissionManager ?? PermissionManager()
}
public func injectText(_ text: String, method: InjectionMethod = .paste) throws {
logger.info("Injecting text using method: \(method)")
public func injectText(_ text: String, method: InjectionMethod = .paste, enableFallback: Bool = true) throws {
logger.info("Injecting text using method: \(method), fallback enabled: \(enableFallback)")
// Check permissions required for text injection
try checkRequiredPermissions()
// Check for secure input first
if isSecureInputActive() {
@ -39,6 +56,41 @@ public class TextInjector {
throw InjectionError.secureInputActive
}
do {
try attemptInjection(text: text, method: method)
} catch {
if enableFallback {
let fallbackMethod: InjectionMethod = method == .paste ? .typing : .paste
logger.warning("Primary injection method failed, trying fallback: \(fallbackMethod)")
try attemptInjection(text: text, method: fallbackMethod)
} else {
throw error
}
}
}
private func checkRequiredPermissions() throws {
// Refresh permission status first
permissionManager.checkAllPermissions()
logger.info("Permission status - Accessibility: \(permissionManager.accessibilityStatus), Input Monitoring: \(permissionManager.inputMonitoringStatus)")
// Check accessibility permission (required for text injection)
if permissionManager.accessibilityStatus != .granted {
logger.error("Accessibility permission not granted: \(permissionManager.accessibilityStatus)")
throw InjectionError.accessibilityPermissionRequired
}
// Check input monitoring permission (required for CGEvent creation)
if permissionManager.inputMonitoringStatus != .granted {
logger.error("Input monitoring permission not granted: \(permissionManager.inputMonitoringStatus)")
throw InjectionError.accessibilityPermissionRequired // Using same error for simplicity
}
logger.info("All permissions granted for text injection")
}
private func attemptInjection(text: String, method: InjectionMethod) throws {
switch method {
case .paste:
try injectViaPaste(text)
@ -49,25 +101,115 @@ public class TextInjector {
private func injectViaPaste(_ text: String) throws {
logger.debug("Injecting text via paste method")
// TODO: Implement paste injection (clipboard + V) in Phase 3
// First copy text to clipboard
copyToClipboard(text)
// TODO: Send V via CGEvent
// Small delay to ensure clipboard is updated
Thread.sleep(forTimeInterval: 0.05)
// Send V via CGEvent
try sendCommandV()
}
private func sendCommandV() throws {
logger.debug("Sending ⌘V keyboard event")
// Create V key combination
let cmdDownEvent = CGEvent(keyboardEventSource: nil, virtualKey: CGKeyCode(kVK_ANSI_V), keyDown: true)
let cmdUpEvent = CGEvent(keyboardEventSource: nil, virtualKey: CGKeyCode(kVK_ANSI_V), keyDown: false)
guard let cmdDown = cmdDownEvent, let cmdUp = cmdUpEvent else {
logger.error("Failed to create CGEvent objects for ⌘V")
throw InjectionError.injectionFailed("Failed to create CGEvent for ⌘V")
}
// Set command modifier for both events
cmdDown.flags = .maskCommand
cmdUp.flags = .maskCommand
logger.debug("Created ⌘V events, posting to system...")
// Post the events
cmdDown.post(tap: .cghidEventTap)
cmdUp.post(tap: .cghidEventTap)
logger.info("⌘V events posted successfully")
}
private func injectViaTyping(_ text: String) throws {
logger.debug("Injecting text via typing method")
// TODO: Implement character-by-character typing via CGEvent in Phase 3
for character in text {
try typeCharacter(character)
// Small delay between characters to avoid overwhelming the target app
Thread.sleep(forTimeInterval: 0.01)
}
logger.debug("Typing injection completed")
}
private func typeCharacter(_ character: Character) throws {
let string = String(character)
// Handle common special characters
switch character {
case "\n":
try postKeyEvent(keyCode: CGKeyCode(kVK_Return))
case "\t":
try postKeyEvent(keyCode: CGKeyCode(kVK_Tab))
case " ":
try postKeyEvent(keyCode: CGKeyCode(kVK_Space))
default:
// Use CGEvent string posting for regular characters
// This respects the current keyboard layout
let keyDownEvent = CGEvent(keyboardEventSource: nil, virtualKey: 0, keyDown: true)
let keyUpEvent = CGEvent(keyboardEventSource: nil, virtualKey: 0, keyDown: false)
guard let keyDown = keyDownEvent, let keyUp = keyUpEvent else {
throw InjectionError.injectionFailed("Failed to create CGEvent for character: \(character)")
}
// Set the Unicode string for the character
let unicodeChars = string.unicodeScalars.map { UniChar($0.value) }
keyDown.keyboardSetUnicodeString(stringLength: string.count, unicodeString: unicodeChars)
keyUp.keyboardSetUnicodeString(stringLength: string.count, unicodeString: unicodeChars)
// Post the events
keyDown.post(tap: .cghidEventTap)
keyUp.post(tap: .cghidEventTap)
}
}
private func postKeyEvent(keyCode: CGKeyCode) throws {
let keyDownEvent = CGEvent(keyboardEventSource: nil, virtualKey: keyCode, keyDown: true)
let keyUpEvent = CGEvent(keyboardEventSource: nil, virtualKey: keyCode, keyDown: false)
guard let keyDown = keyDownEvent, let keyUp = keyUpEvent else {
throw InjectionError.injectionFailed("Failed to create CGEvent for key code: \(keyCode)")
}
keyDown.post(tap: .cghidEventTap)
keyUp.post(tap: .cghidEventTap)
}
private func copyToClipboard(_ text: String) {
let pasteboard = NSPasteboard.general
pasteboard.clearContents()
pasteboard.setString(text, forType: .string)
logger.debug("Text copied to clipboard")
let success = pasteboard.setString(text, forType: .string)
if success {
logger.info("Text copied to clipboard: \"\(text)\"")
} else {
logger.error("Failed to copy text to clipboard")
}
}
private func isSecureInputActive() -> Bool {
// TODO: Implement IsSecureEventInputEnabled() check in Phase 3
return false
let isSecure = IsSecureEventInputEnabled()
if isSecure {
logger.warning("Secure input is active - text injection will be blocked")
}
return isSecure
}
}

View file

@ -155,8 +155,10 @@ public class PermissionManager: ObservableObject {
if testEvent != nil {
inputMonitoringStatus = .granted
logger.debug("Input monitoring permission appears to be granted")
} else {
inputMonitoringStatus = .denied
logger.warning("Input monitoring permission appears to be denied")
}
}
}