Complete Phase 3: Text injection with permissions management

- Implement text injection with paste method (NSPasteboard + ⌘V)
- Add typing fallback with Unicode support and keyboard layout respect
- Integrate secure input detection using IsSecureEventInputEnabled()
- Add comprehensive permission checking and management
- Create Permissions tab in preferences with status indicators
- Add permission onboarding flow for new users
- Implement automatic fallback between injection methods
- Add deep links to System Settings for permission grants
- Remove duplicate preferences menu item
- Create development build script for easier testing
- Update Phase 3 tasks as completed in TODO.md
This commit is contained in:
Felipe M 2025-09-19 09:04:38 +02:00
parent 5663f3c3de
commit 7ba5895406
Signed by: fmartingr
GPG key ID: CCFBC5637D4000A8
7 changed files with 589 additions and 56 deletions

View file

@ -4,6 +4,7 @@ import MenuWhisperAudio
import CorePermissions
import CoreSTT
import CoreModels
import CoreInjection
import AVFoundation
public class AppController: ObservableObject {
@ -14,6 +15,7 @@ public class AppController: ObservableObject {
private let audioEngine = AudioEngine()
private let permissionManager = PermissionManager()
private let soundManager = SoundManager()
private let textInjector: TextInjector
// STT components
public let whisperEngine = WhisperCPPEngine(numThreads: 4, useGPU: true)
@ -33,6 +35,7 @@ public class AppController: ObservableObject {
private let maxDictationDuration: TimeInterval = 600 // 10 minutes default
public init() {
textInjector = TextInjector(permissionManager: permissionManager)
setupDelegates()
setupNotifications()
setupSTTComponents()
@ -91,6 +94,9 @@ public class AppController: ObservableObject {
setupStatusItemMenu()
}
// Check all required permissions on startup
checkAllPermissionsOnStartup()
// Check microphone permission first
checkMicrophonePermission { [weak self] granted in
if granted {
@ -130,12 +136,6 @@ public class AppController: ObservableObject {
preferencesMenuItem.target = self
menu.addItem(preferencesMenuItem)
// Test item - add direct preferences shortcut
let testPrefsMenuItem = NSMenuItem(title: "Open Preferences (⇧⌘P)", action: #selector(openPreferences), keyEquivalent: "P")
testPrefsMenuItem.keyEquivalentModifierMask = [.shift, .command]
testPrefsMenuItem.target = self
menu.addItem(testPrefsMenuItem)
// Quit
let quitMenuItem = NSMenuItem(title: "Quit MenuWhisper", action: #selector(quitApp), keyEquivalent: "q")
quitMenuItem.target = self
@ -191,6 +191,54 @@ public class AppController: ObservableObject {
hotkeyManager.enableHotkey()
}
private func checkAllPermissionsOnStartup() {
logger.info("Checking all permissions on startup")
// Check all permissions and log their status
permissionManager.checkAllPermissions()
// Log permission status
logger.info("Permission status: Microphone=\(permissionManager.microphoneStatus), Accessibility=\(permissionManager.accessibilityStatus), InputMonitoring=\(permissionManager.inputMonitoringStatus)")
// Check if we need to show permission onboarding for first-time users
if shouldShowPermissionOnboarding() {
Task { @MainActor in
showPermissionOnboarding()
}
}
}
private func shouldShowPermissionOnboarding() -> Bool {
// Don't show again if user already dismissed it
if UserDefaults.standard.bool(forKey: "hasShownPermissionOnboarding") {
return false
}
// Show onboarding if any critical permissions are not granted
return permissionManager.accessibilityStatus != .granted ||
permissionManager.inputMonitoringStatus != .granted
}
@MainActor
private func showPermissionOnboarding() {
let alert = NSAlert()
alert.messageText = "Welcome to MenuWhisper"
alert.informativeText = "MenuWhisper needs some permissions to work properly:\n\n• Microphone: To capture your speech\n• Accessibility: To insert transcribed text\n• Input Monitoring: To send keyboard events\n\nWould you like to set up permissions now?"
alert.alertStyle = .informational
alert.addButton(withTitle: "Set Up Permissions")
alert.addButton(withTitle: "Later")
let response = alert.runModal()
// Mark that we've shown the onboarding
UserDefaults.standard.set(true, forKey: "hasShownPermissionOnboarding")
if response == .alertFirstButtonReturn {
showPreferences(initialTab: 1) // Open Permissions tab
}
}
private func checkMicrophonePermission(completion: @escaping (Bool) -> Void) {
permissionManager.requestMicrophonePermission { status in
DispatchQueue.main.async {
@ -281,10 +329,9 @@ public class AppController: ObservableObject {
logger.info("Transcription completed in \(String(format: "%.2f", duration))s: \"\(transcription)\"")
// For now, just print the result - in Phase 3 we'll inject it
// Inject the transcribed text
await MainActor.run {
print("🎤 TRANSCRIPTION RESULT: \(transcription)")
showTranscriptionResult(transcription)
injectTranscriptionResult(transcription)
}
} catch {
@ -295,11 +342,32 @@ public class AppController: ObservableObject {
}
@MainActor
private func showTranscriptionResult(_ text: String) {
// For Phase 2, we'll just show it in logs and console
// In Phase 3, this will inject the text into the active app
logger.info("Transcription result: \(text)")
finishProcessing()
private func injectTranscriptionResult(_ text: String) {
logger.info("Attempting to inject transcription result: \(text)")
do {
// Attempt to inject the text using paste method with fallback enabled
try textInjector.injectText(text, method: .paste, enableFallback: true)
logger.info("Text injection successful")
// Show success and finish processing
finishProcessing()
} catch InjectionError.secureInputActive {
logger.warning("Secure input active - text copied to clipboard")
showSecureInputNotice(text)
finishProcessing()
} catch InjectionError.accessibilityPermissionRequired {
logger.error("Accessibility permission required for text injection")
showPermissionRequiredNotice()
finishProcessing()
} catch {
logger.error("Text injection failed: \(error)")
showInjectionError(error.localizedDescription)
finishProcessing()
}
}
@MainActor
@ -364,7 +432,42 @@ public class AppController: ObservableObject {
}
@MainActor
public func showPreferences() {
private func showSecureInputNotice(_ text: String) {
let alert = NSAlert()
alert.messageText = "Secure Input Active"
alert.informativeText = "Text injection is blocked because secure input is active (likely in a password field or secure app).\n\nThe transcribed text has been copied to your clipboard instead: \"\(text)\""
alert.alertStyle = .informational
alert.addButton(withTitle: "OK")
alert.runModal()
}
@MainActor
private func showPermissionRequiredNotice() {
let alert = NSAlert()
alert.messageText = "Permission Required"
alert.informativeText = "MenuWhisper needs Accessibility and Input Monitoring permissions to insert text into other applications.\n\nWould you like to open System Settings to grant these permissions?"
alert.alertStyle = .warning
alert.addButton(withTitle: "Open System Settings")
alert.addButton(withTitle: "Cancel")
let response = alert.runModal()
if response == .alertFirstButtonReturn {
showPreferences(initialTab: 1) // Open Permissions tab
}
}
@MainActor
private func showInjectionError(_ message: String) {
let alert = NSAlert()
alert.messageText = "Text Injection Failed"
alert.informativeText = "Failed to insert the transcribed text: \(message)"
alert.alertStyle = .warning
alert.addButton(withTitle: "OK")
alert.runModal()
}
@MainActor
public func showPreferences(initialTab: Int = 0) {
guard let modelManager = modelManager else {
logger.error("ModelManager not initialized yet")
return
@ -373,8 +476,13 @@ public class AppController: ObservableObject {
if preferencesWindow == nil {
preferencesWindow = PreferencesWindowController(
modelManager: modelManager,
whisperEngine: whisperEngine
whisperEngine: whisperEngine,
permissionManager: permissionManager,
initialTab: initialTab
)
} else {
// If window already exists, update the selected tab
preferencesWindow?.setSelectedTab(initialTab)
}
preferencesWindow?.showWindow(nil)

View file

@ -2,14 +2,18 @@ import SwiftUI
import CoreModels
import CoreSTT
import CoreUtils
import CorePermissions
class PreferencesWindowController: NSWindowController {
private let modelManager: ModelManager
private let whisperEngine: WhisperCPPEngine
private let permissionManager: PermissionManager
private var preferencesView: PreferencesView?
init(modelManager: ModelManager, whisperEngine: WhisperCPPEngine) {
init(modelManager: ModelManager, whisperEngine: WhisperCPPEngine, permissionManager: PermissionManager, initialTab: Int = 0) {
self.modelManager = modelManager
self.whisperEngine = whisperEngine
self.permissionManager = permissionManager
let window = NSWindow(
contentRect: NSRect(x: 0, y: 0, width: 600, height: 500),
@ -22,33 +26,53 @@ class PreferencesWindowController: NSWindowController {
window.title = "MenuWhisper Preferences"
window.center()
window.contentView = NSHostingView(
rootView: PreferencesView(
modelManager: modelManager,
whisperEngine: whisperEngine,
onClose: { [weak self] in
self?.close()
}
)
preferencesView = PreferencesView(
modelManager: modelManager,
whisperEngine: whisperEngine,
permissionManager: permissionManager,
initialTab: initialTab,
onClose: { [weak self] in
self?.close()
}
)
window.contentView = NSHostingView(rootView: preferencesView!)
}
required init?(coder: NSCoder) {
fatalError("init(coder:) has not been implemented")
}
func setSelectedTab(_ tabIndex: Int) {
preferencesView?.setSelectedTab(tabIndex)
}
}
struct PreferencesView: View {
@ObservedObject var modelManager: ModelManager
let whisperEngine: WhisperCPPEngine
@ObservedObject var permissionManager: PermissionManager
let onClose: () -> Void
@State private var selectedTab = 0
@State private var selectedTab: Int
@State private var isDownloading: [String: Bool] = [:]
@State private var downloadProgress: [String: Double] = [:]
@State private var showingDeleteAlert = false
@State private var modelToDelete: ModelInfo?
init(modelManager: ModelManager, whisperEngine: WhisperCPPEngine, permissionManager: PermissionManager, initialTab: Int = 0, onClose: @escaping () -> Void) {
self.modelManager = modelManager
self.whisperEngine = whisperEngine
self.permissionManager = permissionManager
self.onClose = onClose
self._selectedTab = State(initialValue: initialTab)
}
func setSelectedTab(_ tabIndex: Int) {
selectedTab = tabIndex
}
var body: some View {
TabView(selection: $selectedTab) {
ModelsTab(
@ -64,11 +88,17 @@ struct PreferencesView: View {
}
.tag(0)
PermissionsTab(permissionManager: permissionManager)
.tabItem {
Label("Permissions", systemImage: "lock.shield")
}
.tag(1)
GeneralTab()
.tabItem {
Label("General", systemImage: "gearshape")
}
.tag(1)
.tag(2)
}
.frame(width: 600, height: 500)
.alert("Delete Model", isPresented: $showingDeleteAlert) {
@ -324,6 +354,173 @@ struct ModelRow: View {
}
}
struct PermissionsTab: View {
@ObservedObject var permissionManager: PermissionManager
var body: some View {
VStack(alignment: .leading, spacing: 16) {
Text("Permissions")
.font(.title2)
.fontWeight(.semibold)
Text("MenuWhisper requires certain system permissions to function properly. Click the buttons below to grant permissions in System Settings.")
.font(.caption)
.foregroundColor(.secondary)
VStack(alignment: .leading, spacing: 12) {
// Microphone Permission
PermissionRow(
title: "Microphone",
description: "Required to capture speech for transcription",
status: permissionManager.microphoneStatus,
onOpenSettings: {
permissionManager.openSystemSettings(for: .microphone)
},
onRefresh: {
permissionManager.checkAllPermissions()
}
)
Divider()
// Accessibility Permission
PermissionRow(
title: "Accessibility",
description: "Required to insert transcribed text into other applications",
status: permissionManager.accessibilityStatus,
onOpenSettings: {
permissionManager.openSystemSettings(for: .accessibility)
},
onRefresh: {
permissionManager.checkAllPermissions()
}
)
Divider()
// Input Monitoring Permission
PermissionRow(
title: "Input Monitoring",
description: "Required to send keyboard events for text insertion",
status: permissionManager.inputMonitoringStatus,
onOpenSettings: {
permissionManager.openSystemSettings(for: .inputMonitoring)
},
onRefresh: {
permissionManager.checkAllPermissions()
}
)
}
.padding(16)
.background(Color(NSColor.controlBackgroundColor))
.cornerRadius(12)
// Help text
VStack(alignment: .leading, spacing: 8) {
Text("Need Help?")
.font(.headline)
Text("After granting permissions in System Settings:")
.font(.body)
.foregroundColor(.secondary)
VStack(alignment: .leading, spacing: 4) {
Text("1. Close System Settings")
Text("2. Click 'Refresh Status' to update permission status")
Text("3. Some permissions may require restarting MenuWhisper")
}
.font(.caption)
.foregroundColor(.secondary)
.padding(.leading, 8)
}
Spacer()
}
.padding(20)
}
}
struct PermissionRow: View {
let title: String
let description: String
let status: PermissionStatus
let onOpenSettings: () -> Void
let onRefresh: () -> Void
var body: some View {
HStack {
VStack(alignment: .leading, spacing: 4) {
HStack {
Text(title)
.font(.body)
.fontWeight(.medium)
Spacer()
// Status indicator
HStack(spacing: 4) {
Circle()
.fill(statusColor)
.frame(width: 8, height: 8)
Text(statusText)
.font(.caption)
.fontWeight(.medium)
.foregroundColor(statusColor)
}
}
Text(description)
.font(.caption)
.foregroundColor(.secondary)
}
Spacer()
VStack(spacing: 6) {
if status != .granted {
Button("Open System Settings") {
onOpenSettings()
}
.buttonStyle(.bordered)
.controlSize(.small)
}
Button("Refresh Status") {
onRefresh()
}
.buttonStyle(.borderless)
.controlSize(.small)
.foregroundColor(.secondary)
}
}
}
private var statusColor: Color {
switch status {
case .granted:
return .green
case .denied:
return .red
case .notDetermined, .restricted:
return .orange
}
}
private var statusText: String {
switch status {
case .granted:
return "Granted"
case .denied:
return "Denied"
case .notDetermined:
return "Not Set"
case .restricted:
return "Restricted"
}
}
}
struct GeneralTab: View {
var body: some View {
VStack(alignment: .leading, spacing: 16) {