Skip to content

Commit

Permalink
Merge pull request #107 from lf-lang/vinzbarbuto-audioclassification-v1
Browse files Browse the repository at this point in the history
Audio Classification example
  • Loading branch information
lhstrh authored May 1, 2024
2 parents 2fd32be + 8a583f2 commit c403c24
Show file tree
Hide file tree
Showing 9 changed files with 418 additions and 0 deletions.
1 change: 1 addition & 0 deletions examples/Python/src/AudioClassification/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.tflite filter=lfs diff=lfs merge=lfs -text
166 changes: 166 additions & 0 deletions examples/Python/src/AudioClassification/AudioClassification.lf
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/**
* This example illustrates the capabilities of an Emergency Sirens Classifier, which can classify
* two distinct categories: Emergency, and Other
*
* @author Vincenzo Barbuto
*/
target Python {
timeout: 100 sec
}

reactor Microphone {
physical action send_audio_data
output audio_data

state audio_capture_thread # Thread variables
state thread_should_be_running

state buffer_size # Audio variables
state sample_rate
state num_channels
state overlapping_factor
state input_length_in_samples
state interval_between_inference

preamble {=
import time as tm
import sounddevice as sd
import numpy as np
import threading

def audio_capture(self, audio_action, running):

def callback(indata, frames, time, status):
if status:
print(status)

input_data = self.np.array(indata, dtype=self.np.float32)[:self.buffer_size].reshape((1, self.buffer_size))
audio_action.schedule(0, input_data)

with self.sd.InputStream(channels=self.num_channels, samplerate=self.sample_rate, callback=callback, blocksize=self.buffer_size):
# Press Enter when the shutdown procedure starts to close the audio capturing thread
print("#" * 50)
print("Recording started. Press Enter to stop")
print("#" * 50)
input()
print("\nRecording stopped")
=}

reaction(startup) -> send_audio_data {=
# Setup Audio recorders
self.buffer_size, self.sample_rate, self.num_channels, self.overlapping_factor = 15600, 16000, 1, 0.5
self.input_length_in_samples =self. buffer_size
self.interval_between_inference = self.input_length_in_samples * (1 - self.overlapping_factor)

# Launch Audio Capture Thread
self.thread_should_be_running = self.threading.Event()
self.thread_should_be_running.set()

self.audio_capture_thread = self.threading.Thread(target=self.audio_capture, args=(send_audio_data, self.thread_should_be_running))
self.audio_capture_thread.start()
=}

reaction(send_audio_data) -> audio_data {=
audio_data.set(send_audio_data.value)
=}

reaction(shutdown) {=
print("*"*32 + " SHUTTING DOWN " + "*"*32)
self.thread_should_be_running.clear()
# self.audio_capture_thread.join()
=}
}

reactor Classifier(model="evds_bin.tflite") {
state interpreter
state input_details
state output_details

input input_data
output output_data
output inference_time

preamble {=
import tensorflow as tf
=}

reaction(startup) {=
model_path = f"./{self.model}"
print(f"Loading the model: {self.model}")
self.interpreter = self.tf.lite.Interpreter(model_path)
self.interpreter.allocate_tensors()
self.input_details = self.interpreter.get_input_details()
self.output_details = self.interpreter.get_output_details()
=}

reaction(input_data) -> output_data, inference_time {=
# Run inference
self.interpreter.set_tensor(self.input_details[0]["index"], input_data.value)
start = lf.time.physical()
self.interpreter.invoke()
inference_tm = lf.time.physical() - start
# Get output results
results = self.interpreter.get_tensor(self.output_details[1]["index"])
output_data.set(results)
inference_time.set(inference_tm)
=}
}

reactor Actuator(labels = {= ["Emergency", "Other"] =}, window=3) {
state results_window
state times
state count
state total_times

input results
input inference_time

preamble {=
import numpy as np
=}

reaction(startup) {=
self.results_window = []
self.times = []
self.total_times = []
self.count = 0
=}

reaction(results, inference_time) {=
self.results_window.append(results.value)
self.times.append(inference_time.value)
self.total_times.append(inference_time.value)
if(((self.count + 1 )%self.window == 0)):
results_np = self.np.array(self.results_window)
mean_results = results_np.mean(axis=0)
result_index = mean_results.argmax()
times_ms = self.np.mean(self.times) / 1000000
print("-" * 25 + f"Mean Results for {self.window} Inferences" + "-" * 25)
print(f"Classification: {self.labels[result_index]} -> {format(mean_results[0][result_index]*100, '.2f')}%")
print(f"Inference (physical) time: {format(times_ms, '.2f')}ms")
print("-" * 79)
self.results_window.clear()
self.times.clear()
self.count+=1
=}

reaction(shutdown) {=
avg_time = self.np.mean(self.total_times) / 1000000
max_time = self.np.max(self.total_times) / 1000000
min_time = self.np.min(self.total_times) / 1000000
print("-"*36 + "Summary" + "-"*36)
print(f"Mean Inference Time: {format(avg_time, '.2f')}ms")
print(f"Slowest Inference: {format(max_time, '.2f')}ms")
print(f"Fastes Inference: {format(min_time, '.2f')}ms")
print("-" * 79)
=}
}

main reactor {
mic = new Microphone()
classifier = new Classifier()
actuator = new Actuator()

mic.audio_data -> classifier.input_data
classifier.output_data, classifier.inference_time -> actuator.results, actuator.inference_time
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
134 changes: 134 additions & 0 deletions examples/Python/src/AudioClassification/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# Audio Classification Example

This example demonstrates the implementation of audio classification in Lingua Franca, utilizing the [Tensorflow Lite API](https://www.tensorflow.org/lite). Specifically, it showcases the functionality of an Emergency Sirens Classifier, capable of real-time classification of three distinct classes: Ambulance, Firetruck, and Traffic.

## Example Description

The example comprises three reactors:
- **Microphone**: Responsible for capturing real-time audio input data frames and forwarding them to the Classifier reactor for classification.
- **Classifier**: This reactor loads the TensorFlow Lite model. Upon receiving audio data from the Microphone reactor, it executes the classification task and forwards the output to the Actuator reactor.
- **Actuator**: Responsible for receiving the classification results and displaying them on the terminal. Additionally, to improve classification accuracy, the Actuator reactor computes a mean classification result over a predefined number of iterations, typically every three iterations.

![Diagram of the Lingua Franca Program](./AudioClassification.svg "Diagram of the Lingua Franca Program")

## Running Locally

Before cloning this repository, you need to install and configure [Git LFS](https://git-lfs.github.com/) to handle large files. Follow the [installation instructions](https://docs.github.com/en/github/managing-large-files/installing-git-large-file-storage) to set up Git LFS on your system.

Once Git LFS is installed, you can clone the repository:

```bash
git clone https://github.com/lf-lang/playground-lingua-franca.git
```

### Install Dependencies

The example requires several Python packages, including:

- `sounddevice`
- `numpy`
- `tensorflow`

To install the dependencies:

1. Navigate to the example directory:

```bash
cd examples/Python/src/AudioClassification
```

2. Install the required packages:

```bash
python3 -m pip install -r requirements.txt
```

This will install all the packages listed in `requirements.txt`.
> [!WARNING]
> Be sure that you are using the same Python version as Lingua Franca for building the program.

#### Installing Tensorflow for Apple Silicon

Installing TensorFlow for Apple Silicon can be a bit challenging. Therefore, it's important to follow this guide closely. First, ensure you update your **Xcode Command Line Tools**. Open your terminal and execute the following command:
```bash
xcode-select --install
```
After the installation finishes, you'll need to set up a package manager like [Homebrew](https://brew.sh/). Refer to the website for installation instructions.

Once you've successfully installed both **Xcode Command Line Tools** and **Homebrew Package Manager**, proceed with the following instructions:
1. Install the `hdf5` package using Homebrew:
```bash
brew install hdf5
```
2. Install the necessary packages:
```bash
python3 -m pip install -r requirements_apple_silicon.txt
```
> [!WARNING]
> Make sure to check the versions of the packages listed in the `requirements_apple_silicon.txt` file, as they may have been updated by the time of your installation. The current versions listed in the file have been tested under Python 3.9.
3. Finally, install TensorFlow for MacOS:
```bash
python3 -m pip install tensorflow-macos
python3 -m pip install tensorflow-metal
```
## Troubleshooting
### Error installing packages
If you are facing issues while installing the `h5py` package, you can try the following steps:
1. Remove the following line from `requirements_apple_silicon.txt`:
```bash
h5py>=3.6.0,<3.7
```
2. Then, execute again the command:
```bash
python3 -m pip install -r requirements_apple_silicon.txt
```
> [!WARNING]
> Make sure to check the versions of the packages listed in the `requirements_apple_silicon.txt` file, as they may have been updated by the time of your installation. The current versions listed in the file have been tested under Python 3.9.
### Python version
To successfully install and execute TensorFlow, it's recommended to use Python 3.9. However, the `CMAKELists.txt` file is configured to search for a version of Python between `3.10.0` and `<3.11.0`. To resolve this issue, you'll need to manually modify the `CMAKELists.txt` file after compiling the Lingua Franca program, followed by rebuilding the program. Here's how you can do it:

1. Navigate to the following path: `AudioClassification/src-gen/AudioClassification`.
2. Open the `CMAKELists.txt` file.
3. Locate the line:

```cmake
find_package(Python 3.10.0...<3.11.0 REQUIRED COMPONENTS Interpreter Development)
```

4. Modify it to:

```cmake
find_package(Python 3.9.0...<3.10.0 REQUIRED COMPONENTS Interpreter Development)
```
> [!NOTE]
> You can also use a version lower than 3.9 but not greater than 3.10, as TensorFlow may encounter execution errors otherwise.

Once you've made these changes, proceed to rebuild the program. Follow these steps:
1. Open the terminal and ensure you're in the directory `AudioClassification/src-gen/AudioClassification`.
2. Execute the following commands:

```bash
rm -rf build && mkdir -p build && cd build && cmake .. && make && cd ..
```

After the build process is complete, you can now execute the Lingua Franca Program directly using Python. Make sure you're in the directory `AudioClassification/src-gen/AudioClassification`, and then run the following command:
```bash
python3 AudioClassification.py
```
3 changes: 3 additions & 0 deletions examples/Python/src/AudioClassification/evds_bin.tflite
Git LFS file not shown
3 changes: 3 additions & 0 deletions examples/Python/src/AudioClassification/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
numpy
sounddevice
tensorflow
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
grpcio>=1.37.0,<2.0
h5py>=3.6.0,<3.7
numpy>=1.22.3,<1.23.5
42 changes: 42 additions & 0 deletions examples/Python/src/AudioClassification/train/README_Dataset.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Dataset and Training

## Dataset Details
The training dataset for the Audio Classification model comprises two distinct classes:

- **Emergency**: This class encompasses 850 .wav files featuring sirens from emergency vehicles like Ambulances, Firetrucks, and Police units. These files were meticulously extracted from the [Emergency Vehicle Siren Sounds][EVSS] and [SireNNet][SireNNet] Dataset.

- **Other**: Comprising 800 .wav files, this class includes a diverse range of sounds manually extracted from the [ESC-50 Dataset][ESC50]. These encompass various categories such as Animals, Natural Soundscapes & Water Sounds, Human Non-Speech Sounds, Interior/Domestic Sounds, and Urban Sounds (excluding sirens).

You can acces the _.zip_ file containing the dataset a this [link][Drive]

The internal directory structure is the following:
```bash
├── dataset
│ ├── test
│ │ ├── emergency
│ │ └──other
│ ├── test
│ │ ├── emergency
└── └── └── other
```

## Training

Once you have downloaded the dataset, you have the option to either train your own machine learning model or utilize the `train.py` file to perform Transfer Learning using a pre-trained audio classification model like `YAMNet`.

Before initiating the transfer learning task, ensure that you have the following Python libraries installed:
```bash
tensorflow
tflite_model_maker
numpy
matplotlib
seaborn
```

Additionally, inside the `train.py` script, make sure to fill in all the placeholders for paths (`path/to/the/file`) as required.


[ESC50]: https://github.com/karolpiczak/ESC-50
[SireNNet]: https://data.mendeley.com/datasets/j4ydzzv4kb/1
[EVSS]: https://www.kaggle.com/datasets/vishnu0399/emergency-vehicle-siren-sounds
[Drive]: https://drive.google.com/file/d/1iLDItoe9v7zL1AIz2bP2OVVRcN2oTziD/view?usp=drive_link
Loading

0 comments on commit c403c24

Please sign in to comment.