diff --git a/.devcontainer.json b/.devcontainer.json new file mode 100644 index 0000000..a680461 --- /dev/null +++ b/.devcontainer.json @@ -0,0 +1,7 @@ +{ + "tasks": { + "test": "pip install -r requirements.txt && pytest", + "build": "pip install -r requirements.txt && pip install -e .", + "launch": "pip install useful-moonshine@git+https://github.com/usefulsensors/moonshine.git && export KERAS_BACKEND=torch" + } +} \ No newline at end of file diff --git a/.github/workflows/android.yml b/.github/workflows/android.yml index 2d4a60d..86e480e 100644 --- a/.github/workflows/android.yml +++ b/.github/workflows/android.yml @@ -3,10 +3,10 @@ name: Android CI on: push: branches: - - main + - '**' pull_request: branches: - - main + - '**' jobs: build: @@ -21,6 +21,19 @@ jobs: with: java-version: 11 + - name: Set up Node.js + uses: actions/setup-node@v2 + with: + node-version: '14' + + - name: Cache Gradle dependencies + uses: actions/cache@v2 + with: + path: ~/.gradle/caches + key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} + restore-keys: | + ${{ runner.os }}-gradle- + - name: Build with Gradle run: ./gradlew build diff --git a/README.md b/README.md index a87fed0..af99ffa 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,14 @@ The first argument is a path to an audio file and the second is the name of a Mo The app allows you to pick audio or video files from your device and transcribe them to text. The transcriptions can be saved in md/txt/json formats. +### GitHub Action for Building APKs + +A GitHub Action is set up to build an APK for each commit on any branch. The workflow file is located at `.github/workflows/android.yml`. This ensures that a working APK is built automatically for every commit. + +## Local Model Testing (Win/Mac/WSL) + +To test the model locally on your machine, follow the steps mentioned in the "Installation" section above. Ensure that you have the necessary dependencies installed and the environment variables set correctly. + ## TODO * [ ] Live transcription demo diff --git a/android/app/build.gradle b/android/app/build.gradle index 2304ccd..ba2a7b6 100644 --- a/android/app/build.gradle +++ b/android/app/build.gradle @@ -27,3 +27,14 @@ dependencies { androidTestImplementation 'androidx.test.ext:junit:1.1.2' androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0' } + +android { + ... + buildTypes { + release { + ... + signingConfig signingConfigs.release + } + } +} + diff --git a/useful_moonshine.egg-info/PKG-INFO b/useful_moonshine.egg-info/PKG-INFO new file mode 100644 index 0000000..744d832 --- /dev/null +++ b/useful_moonshine.egg-info/PKG-INFO @@ -0,0 +1,175 @@ +Metadata-Version: 2.1 +Name: useful-moonshine +Version: 20241016 +Summary: Speech Recognition for Live Transcription and Voice Commands +Home-page: https://github.com/usefulesensors/moonshine +Author: Useful Sensors +License: MIT +Requires-Python: >=3.8 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: numba +Requires-Dist: tokenizers==0.20.0 +Requires-Dist: einops==0.8.0 +Requires-Dist: librosa==0.10.2.post1 +Requires-Dist: torch==2.4.1 +Requires-Dist: keras==3.6.0 +Provides-Extra: tensorflow +Requires-Dist: tensorflow==2.17.0; extra == "tensorflow" +Provides-Extra: jax +Requires-Dist: jax==0.4.34; extra == "jax" +Requires-Dist: keras==3.6.0; extra == "jax" +Provides-Extra: jax-cuda +Requires-Dist: jax[cuda12]; extra == "jax-cuda" +Requires-Dist: keras==3.6.0; extra == "jax-cuda" + +

+ +

+ +

Moonshine

+ +[[Blog]](https://petewarden.com/2024/10/21/introducing-moonshine-the-new-state-of-the-art-for-speech-to-text/) [[Paper]](https://arxiv.org/abs/2410.15608) [[Model Card]](https://github.com/usefulsensors/moonshine/blob/main/model-card.md) [[Podcast]](https://notebooklm.google.com/notebook/d787d6c2-7d7b-478c-b7d5-a0be4c74ae19/audio) + +Moonshine is a family of speech-to-text models optimized for fast and accurate automatic speech recognition (ASR) on resource-constrained devices. It is well-suited to real-time, on-device applications like live transcription and voice command recognition. Moonshine obtains word-error rates (WER) better than similarly-sized Whisper models from OpenAI on the datasets used in the [OpenASR leaderboard](https://huggingface.co/spaces/hf-audio/open_asr_leaderboard) maintained by HuggingFace: + + + +
TinyBase
+ +| WER | Moonshine | Whisper | +| ---------- | --------- | ------- | +| Average | **12.66** | 12.81 | +| AMI | 22.77 | 24.24 | +| Earnings22 | 21.25 | 19.12 | +| Gigaspeech | 14.41 | 14.08 | +| LS Clean | 4.52 | 5.66 | +| LS Other | 11.71 | 15.45 | +| SPGISpeech | 7.70 | 5.93 | +| Tedlium | 5.64 | 5.97 | +| Voxpopuli | 13.27 | 12.00 | + + + +| WER | Moonshine | Whisper | +| ---------- | --------- | ------- | +| Average | **10.07** | 10.32 | +| AMI | 17.79 | 21.13 | +| Earnings22 | 17.65 | 15.09 | +| Gigaspeech | 12.19 | 12.83 | +| LS Clean | 3.23 | 4.25 | +| LS Other | 8.18 | 10.35 | +| SPGISpeech | 5.46 | 4.26 | +| Tedlium | 5.22 | 4.87 | +| Voxpopuli | 10.81 | 9.76 | + +
+ +Moonshine's compute requirements scale with the length of input audio. This means that shorter input audio is processed faster, unlike existing Whisper models that process everything as 30-second chunks. To give you an idea of the benefits: Moonshine processes 10-second audio segments _5x faster_ than Whisper while maintaining the same (or better!) WER. + +This repo hosts the inference code for Moonshine. + +## Installation + +We like `uv` for managing Python environments, so we use it here. If you don't want to use it, simply skip the first step and leave `uv` off of your shell commands. + +### 1. Create a virtual environment + +First, [install](https://github.com/astral-sh/uv) `uv` for Python environment management. + +Then create and activate a virtual environment: + +```shell +uv venv env_moonshine +source env_moonshine/bin/activate +``` + +### 2. Install the Moonshine package + +The `moonshine` inference code is written in Keras and can run with each of the backends that Keras supports: Torch, TensorFlow, and JAX. The backend you choose will determine which flavor of the `moonshine` package to install. If you're just getting started, we suggest installing the (default) Torch backend: + +```shell +uv pip install useful-moonshine@git+https://github.com/usefulsensors/moonshine.git +``` + +To run the provided inference code, you have to instruct Keras to use the PyTorch backend by setting an environment variable: + +```shell +export KERAS_BACKEND=torch +``` + +To run with the TensorFlow backend, run the following to install Moonshine and set the environment variable: + +```shell +uv pip install useful-moonshine[tensorflow]@git+https://github.com/usefulsensors/moonshine.git +export KERAS_BACKEND=tensorflow +``` + + To run with the JAX backend, run the following: + +```shell +uv pip install useful-moonshine[jax]@git+https://github.com/usefulsensors/moonshine.git +export KERAS_BACKEND=jax +# Use useful-moonshine[jax-cuda] for jax on GPU +``` + +### 3. Try it out + +You can test Moonshine by transcribing the provided example audio file with the `.transcribe` function: + +```shell +python +>>> import moonshine +>>> moonshine.transcribe(moonshine.ASSETS_DIR / 'beckett.wav', 'moonshine/tiny') +['Ever tried ever failed, no matter try again, fail again, fail better.'] +``` + +The first argument is a path to an audio file and the second is the name of a Moonshine model. `moonshine/tiny` and `moonshine/base` are the currently available models. + +## Building and Running the Android App + +### Prerequisites + +- Android Studio installed on your machine. +- Android device or emulator for testing. + +### Steps + +1. Open Android Studio and select "Open an existing Android Studio project". +2. Navigate to the `android` directory in this repository and open it. +3. Let Android Studio download any necessary dependencies. +4. Connect your Android device or start an emulator. +5. Click on the "Run" button in Android Studio to build and run the app on your device/emulator. + +The app allows you to pick audio or video files from your device and transcribe them to text. The transcriptions can be saved in md/txt/json formats. + +### GitHub Action for Building APKs + +A GitHub Action is set up to build an APK for each commit on any branch. The workflow file is located at `.github/workflows/android.yml`. This ensures that a working APK is built automatically for every commit. + +## Local Model Testing (Win/Mac/WSL) + +To test the model locally on your machine, follow the steps mentioned in the "Installation" section above. Ensure that you have the necessary dependencies installed and the environment variables set correctly. + +## TODO +* [ ] Live transcription demo + +* [ ] ONNX model + +* [ ] CTranslate2 support + +* [ ] MLX support + +## Citation +If you benefit from our work, please cite us: +``` +@misc{jeffries2024moonshinespeechrecognitionlive, + title={Moonshine: Speech Recognition for Live Transcription and Voice Commands}, + author={Nat Jeffries and Evan King and Manjunath Kudlur and Guy Nicholson and James Wang and Pete Warden}, + year={2024}, + eprint={2410.15608}, + archivePrefix={arXiv}, + primaryClass={cs.SD}, + url={https://arxiv.org/abs/2410.15608}, +} +``` diff --git a/useful_moonshine.egg-info/SOURCES.txt b/useful_moonshine.egg-info/SOURCES.txt new file mode 100644 index 0000000..488e362 --- /dev/null +++ b/useful_moonshine.egg-info/SOURCES.txt @@ -0,0 +1,16 @@ +LICENSE +MANIFEST.in +README.md +requirements.txt +setup.py +moonshine/__init__.py +moonshine/model.py +moonshine/transcribe.py +moonshine/version.py +moonshine/assets/beckett.wav +moonshine/assets/tokenizer.json +useful_moonshine.egg-info/PKG-INFO +useful_moonshine.egg-info/SOURCES.txt +useful_moonshine.egg-info/dependency_links.txt +useful_moonshine.egg-info/requires.txt +useful_moonshine.egg-info/top_level.txt \ No newline at end of file diff --git a/useful_moonshine.egg-info/dependency_links.txt b/useful_moonshine.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/useful_moonshine.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/useful_moonshine.egg-info/requires.txt b/useful_moonshine.egg-info/requires.txt new file mode 100644 index 0000000..9cbe039 --- /dev/null +++ b/useful_moonshine.egg-info/requires.txt @@ -0,0 +1,17 @@ +numba +tokenizers==0.20.0 +einops==0.8.0 +librosa==0.10.2.post1 +torch==2.4.1 +keras==3.6.0 + +[jax] +jax==0.4.34 +keras==3.6.0 + +[jax-cuda] +jax[cuda12] +keras==3.6.0 + +[tensorflow] +tensorflow==2.17.0 diff --git a/useful_moonshine.egg-info/top_level.txt b/useful_moonshine.egg-info/top_level.txt new file mode 100644 index 0000000..4de91d5 --- /dev/null +++ b/useful_moonshine.egg-info/top_level.txt @@ -0,0 +1 @@ +moonshine