- Add SwiftWhisper integration for real whisper.cpp support with Metal acceleration - Implement complete WhisperCPPEngine with audio transcription and text normalization - Build ModelManager with curated catalog, downloads, and Core ML encoder support - Create preferences window with model management UI (download, select, delete) - Add NSStatusItem menu bar with model status display - Integrate STT pipeline: hotkey → audio capture → whisper transcription - Add model setup alerts when no model is loaded - Support offline operation with performance targets met (<4s for 10s audio) - Store models in ~/Library/Application Support/MenuWhisper/Models/ Phase 2 TECHSPEC requirements fully implemented and tested.
160 lines
No EOL
5.5 KiB
JSON
160 lines
No EOL
5.5 KiB
JSON
{
|
|
"models": [
|
|
{
|
|
"name": "whisper-tiny",
|
|
"family": "OpenAI-Whisper",
|
|
"format": "bin",
|
|
"size_mb": 39,
|
|
"languages": ["multilingual"],
|
|
"recommended_backend": "whisper.cpp",
|
|
"quality_tier": "tiny",
|
|
"license": "MIT",
|
|
"sha256": "",
|
|
"download_url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin",
|
|
"notes": "Fastest model, suitable for real-time applications with basic accuracy."
|
|
},
|
|
{
|
|
"name": "whisper-tiny.en",
|
|
"family": "OpenAI-Whisper",
|
|
"format": "bin",
|
|
"size_mb": 39,
|
|
"languages": ["en"],
|
|
"recommended_backend": "whisper.cpp",
|
|
"quality_tier": "tiny",
|
|
"license": "MIT",
|
|
"sha256": "",
|
|
"download_url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin",
|
|
"notes": "English-only tiny model, slightly more accurate for English than multilingual tiny."
|
|
},
|
|
{
|
|
"name": "whisper-base",
|
|
"family": "OpenAI-Whisper",
|
|
"format": "bin",
|
|
"size_mb": 142,
|
|
"languages": ["multilingual"],
|
|
"recommended_backend": "whisper.cpp",
|
|
"quality_tier": "base",
|
|
"license": "MIT",
|
|
"sha256": "",
|
|
"download_url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin",
|
|
"notes": "Good balance of speed and accuracy, recommended for most use cases."
|
|
},
|
|
{
|
|
"name": "whisper-base.en",
|
|
"family": "OpenAI-Whisper",
|
|
"format": "bin",
|
|
"size_mb": 142,
|
|
"languages": ["en"],
|
|
"recommended_backend": "whisper.cpp",
|
|
"quality_tier": "base",
|
|
"license": "MIT",
|
|
"sha256": "",
|
|
"download_url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin",
|
|
"notes": "English-only base model, optimal for English-only applications."
|
|
},
|
|
{
|
|
"name": "whisper-small",
|
|
"family": "OpenAI-Whisper",
|
|
"format": "bin",
|
|
"size_mb": 466,
|
|
"languages": ["multilingual"],
|
|
"recommended_backend": "whisper.cpp",
|
|
"quality_tier": "small",
|
|
"license": "MIT",
|
|
"sha256": "",
|
|
"download_url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
|
|
"notes": "Excellent balance of speed and accuracy for M1/M2/M3 machines."
|
|
},
|
|
{
|
|
"name": "whisper-small.en",
|
|
"family": "OpenAI-Whisper",
|
|
"format": "bin",
|
|
"size_mb": 466,
|
|
"languages": ["en"],
|
|
"recommended_backend": "whisper.cpp",
|
|
"quality_tier": "small",
|
|
"license": "MIT",
|
|
"sha256": "",
|
|
"download_url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin",
|
|
"notes": "English-only small model, high accuracy for English-only use."
|
|
},
|
|
{
|
|
"name": "whisper-medium",
|
|
"family": "OpenAI-Whisper",
|
|
"format": "bin",
|
|
"size_mb": 1540,
|
|
"languages": ["multilingual"],
|
|
"recommended_backend": "whisper.cpp",
|
|
"quality_tier": "medium",
|
|
"license": "MIT",
|
|
"sha256": "",
|
|
"download_url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin",
|
|
"notes": "Higher accuracy but slower, requires more RAM (2-3GB)."
|
|
},
|
|
{
|
|
"name": "whisper-medium.en",
|
|
"family": "OpenAI-Whisper",
|
|
"format": "bin",
|
|
"size_mb": 1540,
|
|
"languages": ["en"],
|
|
"recommended_backend": "whisper.cpp",
|
|
"quality_tier": "medium",
|
|
"license": "MIT",
|
|
"sha256": "",
|
|
"download_url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en.bin",
|
|
"notes": "English-only medium model, very high accuracy for English."
|
|
},
|
|
{
|
|
"name": "whisper-large-v2",
|
|
"family": "OpenAI-Whisper",
|
|
"format": "bin",
|
|
"size_mb": 3090,
|
|
"languages": ["multilingual"],
|
|
"recommended_backend": "whisper.cpp",
|
|
"quality_tier": "large",
|
|
"license": "MIT",
|
|
"sha256": "",
|
|
"download_url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v2.bin",
|
|
"notes": "Highest accuracy but slowest, requires significant RAM (4-5GB)."
|
|
},
|
|
{
|
|
"name": "whisper-large-v3",
|
|
"family": "OpenAI-Whisper",
|
|
"format": "bin",
|
|
"size_mb": 3090,
|
|
"languages": ["multilingual"],
|
|
"recommended_backend": "whisper.cpp",
|
|
"quality_tier": "large",
|
|
"license": "MIT",
|
|
"sha256": "",
|
|
"download_url": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin",
|
|
"notes": "Latest large model with improved accuracy, requires significant RAM (4-5GB)."
|
|
},
|
|
{
|
|
"name": "distil-whisper-large-v2",
|
|
"family": "Distil-Whisper",
|
|
"format": "bin",
|
|
"size_mb": 1540,
|
|
"languages": ["multilingual"],
|
|
"recommended_backend": "whisper.cpp",
|
|
"quality_tier": "large",
|
|
"license": "MIT",
|
|
"sha256": "",
|
|
"download_url": "https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/ggml-distil-large-v2.bin",
|
|
"notes": "Distilled large model, 2x faster than large-v2 with similar accuracy."
|
|
},
|
|
{
|
|
"name": "distil-whisper-large-v3",
|
|
"family": "Distil-Whisper",
|
|
"format": "bin",
|
|
"size_mb": 1540,
|
|
"languages": ["multilingual"],
|
|
"recommended_backend": "whisper.cpp",
|
|
"quality_tier": "large",
|
|
"license": "MIT",
|
|
"sha256": "",
|
|
"download_url": "https://huggingface.co/distil-whisper/distil-large-v3/resolve/main/ggml-distil-large-v3.bin",
|
|
"notes": "Latest distilled model, excellent balance of speed and accuracy."
|
|
}
|
|
]
|
|
} |