-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #107 from lf-lang/vinzbarbuto-audioclassification-v1
Audio Classification example
- Loading branch information
Showing
9 changed files
with
418 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
*.tflite filter=lfs diff=lfs merge=lfs -text |
166 changes: 166 additions & 0 deletions
166
examples/Python/src/AudioClassification/AudioClassification.lf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
/** | ||
* This example illustrates the capabilities of an Emergency Sirens Classifier, which can classify | ||
* two distinct categories: Emergency, and Other | ||
* | ||
* @author Vincenzo Barbuto | ||
*/ | ||
target Python { | ||
timeout: 100 sec | ||
} | ||
|
||
reactor Microphone { | ||
physical action send_audio_data | ||
output audio_data | ||
|
||
state audio_capture_thread # Thread variables | ||
state thread_should_be_running | ||
|
||
state buffer_size # Audio variables | ||
state sample_rate | ||
state num_channels | ||
state overlapping_factor | ||
state input_length_in_samples | ||
state interval_between_inference | ||
|
||
preamble {= | ||
import time as tm | ||
import sounddevice as sd | ||
import numpy as np | ||
import threading | ||
|
||
def audio_capture(self, audio_action, running): | ||
|
||
def callback(indata, frames, time, status): | ||
if status: | ||
print(status) | ||
|
||
input_data = self.np.array(indata, dtype=self.np.float32)[:self.buffer_size].reshape((1, self.buffer_size)) | ||
audio_action.schedule(0, input_data) | ||
|
||
with self.sd.InputStream(channels=self.num_channels, samplerate=self.sample_rate, callback=callback, blocksize=self.buffer_size): | ||
# Press Enter when the shutdown procedure starts to close the audio capturing thread | ||
print("#" * 50) | ||
print("Recording started. Press Enter to stop") | ||
print("#" * 50) | ||
input() | ||
print("\nRecording stopped") | ||
=} | ||
|
||
reaction(startup) -> send_audio_data {= | ||
# Setup Audio recorders | ||
self.buffer_size, self.sample_rate, self.num_channels, self.overlapping_factor = 15600, 16000, 1, 0.5 | ||
self.input_length_in_samples =self. buffer_size | ||
self.interval_between_inference = self.input_length_in_samples * (1 - self.overlapping_factor) | ||
|
||
# Launch Audio Capture Thread | ||
self.thread_should_be_running = self.threading.Event() | ||
self.thread_should_be_running.set() | ||
|
||
self.audio_capture_thread = self.threading.Thread(target=self.audio_capture, args=(send_audio_data, self.thread_should_be_running)) | ||
self.audio_capture_thread.start() | ||
=} | ||
|
||
reaction(send_audio_data) -> audio_data {= | ||
audio_data.set(send_audio_data.value) | ||
=} | ||
|
||
reaction(shutdown) {= | ||
print("*"*32 + " SHUTTING DOWN " + "*"*32) | ||
self.thread_should_be_running.clear() | ||
# self.audio_capture_thread.join() | ||
=} | ||
} | ||
|
||
reactor Classifier(model="evds_bin.tflite") { | ||
state interpreter | ||
state input_details | ||
state output_details | ||
|
||
input input_data | ||
output output_data | ||
output inference_time | ||
|
||
preamble {= | ||
import tensorflow as tf | ||
=} | ||
|
||
reaction(startup) {= | ||
model_path = f"./{self.model}" | ||
print(f"Loading the model: {self.model}") | ||
self.interpreter = self.tf.lite.Interpreter(model_path) | ||
self.interpreter.allocate_tensors() | ||
self.input_details = self.interpreter.get_input_details() | ||
self.output_details = self.interpreter.get_output_details() | ||
=} | ||
|
||
reaction(input_data) -> output_data, inference_time {= | ||
# Run inference | ||
self.interpreter.set_tensor(self.input_details[0]["index"], input_data.value) | ||
start = lf.time.physical() | ||
self.interpreter.invoke() | ||
inference_tm = lf.time.physical() - start | ||
# Get output results | ||
results = self.interpreter.get_tensor(self.output_details[1]["index"]) | ||
output_data.set(results) | ||
inference_time.set(inference_tm) | ||
=} | ||
} | ||
|
||
reactor Actuator(labels = {= ["Emergency", "Other"] =}, window=3) { | ||
state results_window | ||
state times | ||
state count | ||
state total_times | ||
|
||
input results | ||
input inference_time | ||
|
||
preamble {= | ||
import numpy as np | ||
=} | ||
|
||
reaction(startup) {= | ||
self.results_window = [] | ||
self.times = [] | ||
self.total_times = [] | ||
self.count = 0 | ||
=} | ||
|
||
reaction(results, inference_time) {= | ||
self.results_window.append(results.value) | ||
self.times.append(inference_time.value) | ||
self.total_times.append(inference_time.value) | ||
if(((self.count + 1 )%self.window == 0)): | ||
results_np = self.np.array(self.results_window) | ||
mean_results = results_np.mean(axis=0) | ||
result_index = mean_results.argmax() | ||
times_ms = self.np.mean(self.times) / 1000000 | ||
print("-" * 25 + f"Mean Results for {self.window} Inferences" + "-" * 25) | ||
print(f"Classification: {self.labels[result_index]} -> {format(mean_results[0][result_index]*100, '.2f')}%") | ||
print(f"Inference (physical) time: {format(times_ms, '.2f')}ms") | ||
print("-" * 79) | ||
self.results_window.clear() | ||
self.times.clear() | ||
self.count+=1 | ||
=} | ||
|
||
reaction(shutdown) {= | ||
avg_time = self.np.mean(self.total_times) / 1000000 | ||
max_time = self.np.max(self.total_times) / 1000000 | ||
min_time = self.np.min(self.total_times) / 1000000 | ||
print("-"*36 + "Summary" + "-"*36) | ||
print(f"Mean Inference Time: {format(avg_time, '.2f')}ms") | ||
print(f"Slowest Inference: {format(max_time, '.2f')}ms") | ||
print(f"Fastes Inference: {format(min_time, '.2f')}ms") | ||
print("-" * 79) | ||
=} | ||
} | ||
|
||
main reactor { | ||
mic = new Microphone() | ||
classifier = new Classifier() | ||
actuator = new Actuator() | ||
|
||
mic.audio_data -> classifier.input_data | ||
classifier.output_data, classifier.inference_time -> actuator.results, actuator.inference_time | ||
} |
1 change: 1 addition & 0 deletions
1
examples/Python/src/AudioClassification/AudioClassification.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
# Audio Classification Example | ||
|
||
This example demonstrates the implementation of audio classification in Lingua Franca, utilizing the [Tensorflow Lite API](https://www.tensorflow.org/lite). Specifically, it showcases the functionality of an Emergency Sirens Classifier, capable of real-time classification of three distinct classes: Ambulance, Firetruck, and Traffic. | ||
|
||
## Example Description | ||
|
||
The example comprises three reactors: | ||
- **Microphone**: Responsible for capturing real-time audio input data frames and forwarding them to the Classifier reactor for classification. | ||
- **Classifier**: This reactor loads the TensorFlow Lite model. Upon receiving audio data from the Microphone reactor, it executes the classification task and forwards the output to the Actuator reactor. | ||
- **Actuator**: Responsible for receiving the classification results and displaying them on the terminal. Additionally, to improve classification accuracy, the Actuator reactor computes a mean classification result over a predefined number of iterations, typically every three iterations. | ||
|
||
![Diagram of the Lingua Franca Program](./AudioClassification.svg "Diagram of the Lingua Franca Program") | ||
|
||
## Running Locally | ||
|
||
Before cloning this repository, you need to install and configure [Git LFS](https://git-lfs.github.com/) to handle large files. Follow the [installation instructions](https://docs.github.com/en/github/managing-large-files/installing-git-large-file-storage) to set up Git LFS on your system. | ||
|
||
Once Git LFS is installed, you can clone the repository: | ||
|
||
```bash | ||
git clone https://github.com/lf-lang/playground-lingua-franca.git | ||
``` | ||
|
||
### Install Dependencies | ||
|
||
The example requires several Python packages, including: | ||
|
||
- `sounddevice` | ||
- `numpy` | ||
- `tensorflow` | ||
|
||
To install the dependencies: | ||
|
||
1. Navigate to the example directory: | ||
|
||
```bash | ||
cd examples/Python/src/AudioClassification | ||
``` | ||
|
||
2. Install the required packages: | ||
|
||
```bash | ||
python3 -m pip install -r requirements.txt | ||
``` | ||
|
||
This will install all the packages listed in `requirements.txt`. | ||
> [!WARNING] | ||
> Be sure that you are using the same Python version as Lingua Franca for building the program. | ||
|
||
#### Installing Tensorflow for Apple Silicon | ||
|
||
Installing TensorFlow for Apple Silicon can be a bit challenging. Therefore, it's important to follow this guide closely. First, ensure you update your **Xcode Command Line Tools**. Open your terminal and execute the following command: | ||
```bash | ||
xcode-select --install | ||
``` | ||
After the installation finishes, you'll need to set up a package manager like [Homebrew](https://brew.sh/). Refer to the website for installation instructions. | ||
|
||
Once you've successfully installed both **Xcode Command Line Tools** and **Homebrew Package Manager**, proceed with the following instructions: | ||
1. Install the `hdf5` package using Homebrew: | ||
```bash | ||
brew install hdf5 | ||
``` | ||
2. Install the necessary packages: | ||
```bash | ||
python3 -m pip install -r requirements_apple_silicon.txt | ||
``` | ||
> [!WARNING] | ||
> Make sure to check the versions of the packages listed in the `requirements_apple_silicon.txt` file, as they may have been updated by the time of your installation. The current versions listed in the file have been tested under Python 3.9. | ||
3. Finally, install TensorFlow for MacOS: | ||
```bash | ||
python3 -m pip install tensorflow-macos | ||
python3 -m pip install tensorflow-metal | ||
``` | ||
## Troubleshooting | ||
### Error installing packages | ||
If you are facing issues while installing the `h5py` package, you can try the following steps: | ||
1. Remove the following line from `requirements_apple_silicon.txt`: | ||
```bash | ||
h5py>=3.6.0,<3.7 | ||
``` | ||
2. Then, execute again the command: | ||
```bash | ||
python3 -m pip install -r requirements_apple_silicon.txt | ||
``` | ||
> [!WARNING] | ||
> Make sure to check the versions of the packages listed in the `requirements_apple_silicon.txt` file, as they may have been updated by the time of your installation. The current versions listed in the file have been tested under Python 3.9. | ||
### Python version | ||
To successfully install and execute TensorFlow, it's recommended to use Python 3.9. However, the `CMAKELists.txt` file is configured to search for a version of Python between `3.10.0` and `<3.11.0`. To resolve this issue, you'll need to manually modify the `CMAKELists.txt` file after compiling the Lingua Franca program, followed by rebuilding the program. Here's how you can do it: | ||
|
||
1. Navigate to the following path: `AudioClassification/src-gen/AudioClassification`. | ||
2. Open the `CMAKELists.txt` file. | ||
3. Locate the line: | ||
|
||
```cmake | ||
find_package(Python 3.10.0...<3.11.0 REQUIRED COMPONENTS Interpreter Development) | ||
``` | ||
|
||
4. Modify it to: | ||
|
||
```cmake | ||
find_package(Python 3.9.0...<3.10.0 REQUIRED COMPONENTS Interpreter Development) | ||
``` | ||
> [!NOTE] | ||
> You can also use a version lower than 3.9 but not greater than 3.10, as TensorFlow may encounter execution errors otherwise. | ||
|
||
Once you've made these changes, proceed to rebuild the program. Follow these steps: | ||
1. Open the terminal and ensure you're in the directory `AudioClassification/src-gen/AudioClassification`. | ||
2. Execute the following commands: | ||
|
||
```bash | ||
rm -rf build && mkdir -p build && cd build && cmake .. && make && cd .. | ||
``` | ||
|
||
After the build process is complete, you can now execute the Lingua Franca Program directly using Python. Make sure you're in the directory `AudioClassification/src-gen/AudioClassification`, and then run the following command: | ||
```bash | ||
python3 AudioClassification.py | ||
``` | ||
Git LFS file not shown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
numpy | ||
sounddevice | ||
tensorflow |
3 changes: 3 additions & 0 deletions
3
examples/Python/src/AudioClassification/requirements_apple_silicon.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
grpcio>=1.37.0,<2.0 | ||
h5py>=3.6.0,<3.7 | ||
numpy>=1.22.3,<1.23.5 |
42 changes: 42 additions & 0 deletions
42
examples/Python/src/AudioClassification/train/README_Dataset.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# Dataset and Training | ||
|
||
## Dataset Details | ||
The training dataset for the Audio Classification model comprises two distinct classes: | ||
|
||
- **Emergency**: This class encompasses 850 .wav files featuring sirens from emergency vehicles like Ambulances, Firetrucks, and Police units. These files were meticulously extracted from the [Emergency Vehicle Siren Sounds][EVSS] and [SireNNet][SireNNet] Dataset. | ||
|
||
- **Other**: Comprising 800 .wav files, this class includes a diverse range of sounds manually extracted from the [ESC-50 Dataset][ESC50]. These encompass various categories such as Animals, Natural Soundscapes & Water Sounds, Human Non-Speech Sounds, Interior/Domestic Sounds, and Urban Sounds (excluding sirens). | ||
|
||
You can acces the _.zip_ file containing the dataset a this [link][Drive] | ||
|
||
The internal directory structure is the following: | ||
```bash | ||
├── dataset | ||
│ ├── test | ||
│ │ ├── emergency | ||
│ │ └──other | ||
│ ├── test | ||
│ │ ├── emergency | ||
└── └── └── other | ||
``` | ||
|
||
## Training | ||
|
||
Once you have downloaded the dataset, you have the option to either train your own machine learning model or utilize the `train.py` file to perform Transfer Learning using a pre-trained audio classification model like `YAMNet`. | ||
|
||
Before initiating the transfer learning task, ensure that you have the following Python libraries installed: | ||
```bash | ||
tensorflow | ||
tflite_model_maker | ||
numpy | ||
matplotlib | ||
seaborn | ||
``` | ||
|
||
Additionally, inside the `train.py` script, make sure to fill in all the placeholders for paths (`path/to/the/file`) as required. | ||
|
||
|
||
[ESC50]: https://github.com/karolpiczak/ESC-50 | ||
[SireNNet]: https://data.mendeley.com/datasets/j4ydzzv4kb/1 | ||
[EVSS]: https://www.kaggle.com/datasets/vishnu0399/emergency-vehicle-siren-sounds | ||
[Drive]: https://drive.google.com/file/d/1iLDItoe9v7zL1AIz2bP2OVVRcN2oTziD/view?usp=drive_link |
Oops, something went wrong.