diff --git a/examples/Python/src/AudioClassification/.gitattributes b/examples/Python/src/AudioClassification/.gitattributes new file mode 100644 index 00000000..7d630108 --- /dev/null +++ b/examples/Python/src/AudioClassification/.gitattributes @@ -0,0 +1 @@ +*.tflite filter=lfs diff=lfs merge=lfs -text diff --git a/examples/Python/src/AudioClassification/AudioClassification.lf b/examples/Python/src/AudioClassification/AudioClassification.lf new file mode 100644 index 00000000..9ec190bb --- /dev/null +++ b/examples/Python/src/AudioClassification/AudioClassification.lf @@ -0,0 +1,166 @@ +/** + * This example illustrates the capabilities of an Emergency Sirens Classifier, which can classify + * two distinct categories: Emergency, and Other + * + * @author Vincenzo Barbuto + */ +target Python { + timeout: 100 sec +} + +reactor Microphone { + physical action send_audio_data + output audio_data + + state audio_capture_thread # Thread variables + state thread_should_be_running + + state buffer_size # Audio variables + state sample_rate + state num_channels + state overlapping_factor + state input_length_in_samples + state interval_between_inference + + preamble {= + import time as tm + import sounddevice as sd + import numpy as np + import threading + + def audio_capture(self, audio_action, running): + + def callback(indata, frames, time, status): + if status: + print(status) + + input_data = self.np.array(indata, dtype=self.np.float32)[:self.buffer_size].reshape((1, self.buffer_size)) + audio_action.schedule(0, input_data) + + with self.sd.InputStream(channels=self.num_channels, samplerate=self.sample_rate, callback=callback, blocksize=self.buffer_size): + # Press Enter when the shutdown procedure starts to close the audio capturing thread + print("#" * 50) + print("Recording started. Press Enter to stop") + print("#" * 50) + input() + print("\nRecording stopped") + =} + + reaction(startup) -> send_audio_data {= + # Setup Audio recorders + self.buffer_size, self.sample_rate, self.num_channels, self.overlapping_factor = 15600, 16000, 1, 0.5 + self.input_length_in_samples =self. buffer_size + self.interval_between_inference = self.input_length_in_samples * (1 - self.overlapping_factor) + + # Launch Audio Capture Thread + self.thread_should_be_running = self.threading.Event() + self.thread_should_be_running.set() + + self.audio_capture_thread = self.threading.Thread(target=self.audio_capture, args=(send_audio_data, self.thread_should_be_running)) + self.audio_capture_thread.start() + =} + + reaction(send_audio_data) -> audio_data {= + audio_data.set(send_audio_data.value) + =} + + reaction(shutdown) {= + print("*"*32 + " SHUTTING DOWN " + "*"*32) + self.thread_should_be_running.clear() + # self.audio_capture_thread.join() + =} +} + +reactor Classifier(model="evds_bin.tflite") { + state interpreter + state input_details + state output_details + + input input_data + output output_data + output inference_time + + preamble {= + import tensorflow as tf + =} + + reaction(startup) {= + model_path = f"./{self.model}" + print(f"Loading the model: {self.model}") + self.interpreter = self.tf.lite.Interpreter(model_path) + self.interpreter.allocate_tensors() + self.input_details = self.interpreter.get_input_details() + self.output_details = self.interpreter.get_output_details() + =} + + reaction(input_data) -> output_data, inference_time {= + # Run inference + self.interpreter.set_tensor(self.input_details[0]["index"], input_data.value) + start = lf.time.physical() + self.interpreter.invoke() + inference_tm = lf.time.physical() - start + # Get output results + results = self.interpreter.get_tensor(self.output_details[1]["index"]) + output_data.set(results) + inference_time.set(inference_tm) + =} +} + +reactor Actuator(labels = {= ["Emergency", "Other"] =}, window=3) { + state results_window + state times + state count + state total_times + + input results + input inference_time + + preamble {= + import numpy as np + =} + + reaction(startup) {= + self.results_window = [] + self.times = [] + self.total_times = [] + self.count = 0 + =} + + reaction(results, inference_time) {= + self.results_window.append(results.value) + self.times.append(inference_time.value) + self.total_times.append(inference_time.value) + if(((self.count + 1 )%self.window == 0)): + results_np = self.np.array(self.results_window) + mean_results = results_np.mean(axis=0) + result_index = mean_results.argmax() + times_ms = self.np.mean(self.times) / 1000000 + print("-" * 25 + f"Mean Results for {self.window} Inferences" + "-" * 25) + print(f"Classification: {self.labels[result_index]} -> {format(mean_results[0][result_index]*100, '.2f')}%") + print(f"Inference (physical) time: {format(times_ms, '.2f')}ms") + print("-" * 79) + self.results_window.clear() + self.times.clear() + self.count+=1 + =} + + reaction(shutdown) {= + avg_time = self.np.mean(self.total_times) / 1000000 + max_time = self.np.max(self.total_times) / 1000000 + min_time = self.np.min(self.total_times) / 1000000 + print("-"*36 + "Summary" + "-"*36) + print(f"Mean Inference Time: {format(avg_time, '.2f')}ms") + print(f"Slowest Inference: {format(max_time, '.2f')}ms") + print(f"Fastes Inference: {format(min_time, '.2f')}ms") + print("-" * 79) + =} +} + +main reactor { + mic = new Microphone() + classifier = new Classifier() + actuator = new Actuator() + + mic.audio_data -> classifier.input_data + classifier.output_data, classifier.inference_time -> actuator.results, actuator.inference_time +} diff --git a/examples/Python/src/AudioClassification/AudioClassification.svg b/examples/Python/src/AudioClassification/AudioClassification.svg new file mode 100644 index 00000000..b02aa04b --- /dev/null +++ b/examples/Python/src/AudioClassification/AudioClassification.svg @@ -0,0 +1 @@ +AudioClassificationMicrophone123Paudio_dataClassifier12input_dataoutput_datainference_timeActuator123resultsinference_time \ No newline at end of file diff --git a/examples/Python/src/AudioClassification/README.md b/examples/Python/src/AudioClassification/README.md new file mode 100644 index 00000000..c933bd05 --- /dev/null +++ b/examples/Python/src/AudioClassification/README.md @@ -0,0 +1,134 @@ +# Audio Classification Example + +This example demonstrates the implementation of audio classification in Lingua Franca, utilizing the [Tensorflow Lite API](https://www.tensorflow.org/lite). Specifically, it showcases the functionality of an Emergency Sirens Classifier, capable of real-time classification of three distinct classes: Ambulance, Firetruck, and Traffic. + +## Example Description + +The example comprises three reactors: +- **Microphone**: Responsible for capturing real-time audio input data frames and forwarding them to the Classifier reactor for classification. +- **Classifier**: This reactor loads the TensorFlow Lite model. Upon receiving audio data from the Microphone reactor, it executes the classification task and forwards the output to the Actuator reactor. +- **Actuator**: Responsible for receiving the classification results and displaying them on the terminal. Additionally, to improve classification accuracy, the Actuator reactor computes a mean classification result over a predefined number of iterations, typically every three iterations. + +![Diagram of the Lingua Franca Program](./AudioClassification.svg "Diagram of the Lingua Franca Program") + +## Running Locally + +Before cloning this repository, you need to install and configure [Git LFS](https://git-lfs.github.com/) to handle large files. Follow the [installation instructions](https://docs.github.com/en/github/managing-large-files/installing-git-large-file-storage) to set up Git LFS on your system. + +Once Git LFS is installed, you can clone the repository: + +```bash +git clone https://github.com/lf-lang/playground-lingua-franca.git +``` + +### Install Dependencies + +The example requires several Python packages, including: + +- `sounddevice` +- `numpy` +- `tensorflow` + +To install the dependencies: + +1. Navigate to the example directory: + + ```bash + cd examples/Python/src/AudioClassification + ``` + +2. Install the required packages: + + ```bash + python3 -m pip install -r requirements.txt + ``` + +This will install all the packages listed in `requirements.txt`. +> [!WARNING] +> Be sure that you are using the same Python version as Lingua Franca for building the program. + +#### Installing Tensorflow for Apple Silicon + +Installing TensorFlow for Apple Silicon can be a bit challenging. Therefore, it's important to follow this guide closely. First, ensure you update your **Xcode Command Line Tools**. Open your terminal and execute the following command: + +```bash +xcode-select --install +``` + +After the installation finishes, you'll need to set up a package manager like [Homebrew](https://brew.sh/). Refer to the website for installation instructions. + +Once you've successfully installed both **Xcode Command Line Tools** and **Homebrew Package Manager**, proceed with the following instructions: + +1. Install the `hdf5` package using Homebrew: + ```bash + brew install hdf5 + ``` + +2. Install the necessary packages: + ```bash + python3 -m pip install -r requirements_apple_silicon.txt + ``` +> [!WARNING] +> Make sure to check the versions of the packages listed in the `requirements_apple_silicon.txt` file, as they may have been updated by the time of your installation. The current versions listed in the file have been tested under Python 3.9. + +3. Finally, install TensorFlow for MacOS: + ```bash + python3 -m pip install tensorflow-macos + python3 -m pip install tensorflow-metal + ``` + +## Troubleshooting + +### Error installing packages + +If you are facing issues while installing the `h5py` package, you can try the following steps: + +1. Remove the following line from `requirements_apple_silicon.txt`: + + ```bash + h5py>=3.6.0,<3.7 + ``` +2. Then, execute again the command: + + ```bash + python3 -m pip install -r requirements_apple_silicon.txt + ``` +> [!WARNING] +> Make sure to check the versions of the packages listed in the `requirements_apple_silicon.txt` file, as they may have been updated by the time of your installation. The current versions listed in the file have been tested under Python 3.9. + +### Python version + +To successfully install and execute TensorFlow, it's recommended to use Python 3.9. However, the `CMAKELists.txt` file is configured to search for a version of Python between `3.10.0` and `<3.11.0`. To resolve this issue, you'll need to manually modify the `CMAKELists.txt` file after compiling the Lingua Franca program, followed by rebuilding the program. Here's how you can do it: + +1. Navigate to the following path: `AudioClassification/src-gen/AudioClassification`. +2. Open the `CMAKELists.txt` file. +3. Locate the line: + + ```cmake + find_package(Python 3.10.0...<3.11.0 REQUIRED COMPONENTS Interpreter Development) + ``` + +4. Modify it to: + + ```cmake + find_package(Python 3.9.0...<3.10.0 REQUIRED COMPONENTS Interpreter Development) + ``` +> [!NOTE] +> You can also use a version lower than 3.9 but not greater than 3.10, as TensorFlow may encounter execution errors otherwise. + +Once you've made these changes, proceed to rebuild the program. Follow these steps: + +1. Open the terminal and ensure you're in the directory `AudioClassification/src-gen/AudioClassification`. +2. Execute the following commands: + + ```bash + rm -rf build && mkdir -p build && cd build && cmake .. && make && cd .. + ``` + +After the build process is complete, you can now execute the Lingua Franca Program directly using Python. Make sure you're in the directory `AudioClassification/src-gen/AudioClassification`, and then run the following command: + +```bash +python3 AudioClassification.py +``` + + diff --git a/examples/Python/src/AudioClassification/evds_bin.tflite b/examples/Python/src/AudioClassification/evds_bin.tflite new file mode 100644 index 00000000..e39b66d8 --- /dev/null +++ b/examples/Python/src/AudioClassification/evds_bin.tflite @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf15e57658eb6978626a789f874e60d91bac6153dc1fadb640f8ea3cc3333a6 +size 15052806 diff --git a/examples/Python/src/AudioClassification/requirements.txt b/examples/Python/src/AudioClassification/requirements.txt new file mode 100644 index 00000000..f9cd2b74 --- /dev/null +++ b/examples/Python/src/AudioClassification/requirements.txt @@ -0,0 +1,3 @@ +numpy +sounddevice +tensorflow \ No newline at end of file diff --git a/examples/Python/src/AudioClassification/requirements_apple_silicon.txt b/examples/Python/src/AudioClassification/requirements_apple_silicon.txt new file mode 100644 index 00000000..299fd6e2 --- /dev/null +++ b/examples/Python/src/AudioClassification/requirements_apple_silicon.txt @@ -0,0 +1,3 @@ +grpcio>=1.37.0,<2.0 +h5py>=3.6.0,<3.7 +numpy>=1.22.3,<1.23.5 \ No newline at end of file diff --git a/examples/Python/src/AudioClassification/train/README_Dataset.md b/examples/Python/src/AudioClassification/train/README_Dataset.md new file mode 100644 index 00000000..e82d8999 --- /dev/null +++ b/examples/Python/src/AudioClassification/train/README_Dataset.md @@ -0,0 +1,42 @@ +# Dataset and Training + +## Dataset Details +The training dataset for the Audio Classification model comprises two distinct classes: + +- **Emergency**: This class encompasses 850 .wav files featuring sirens from emergency vehicles like Ambulances, Firetrucks, and Police units. These files were meticulously extracted from the [Emergency Vehicle Siren Sounds][EVSS] and [SireNNet][SireNNet] Dataset. + +- **Other**: Comprising 800 .wav files, this class includes a diverse range of sounds manually extracted from the [ESC-50 Dataset][ESC50]. These encompass various categories such as Animals, Natural Soundscapes & Water Sounds, Human Non-Speech Sounds, Interior/Domestic Sounds, and Urban Sounds (excluding sirens). + +You can acces the _.zip_ file containing the dataset a this [link][Drive] + +The internal directory structure is the following: +```bash +├── dataset +│ ├── test +│ │ ├── emergency +│ │ └──other +│ ├── test +│ │ ├── emergency +└── └── └── other +``` + +## Training + +Once you have downloaded the dataset, you have the option to either train your own machine learning model or utilize the `train.py` file to perform Transfer Learning using a pre-trained audio classification model like `YAMNet`. + +Before initiating the transfer learning task, ensure that you have the following Python libraries installed: +```bash +tensorflow +tflite_model_maker +numpy +matplotlib +seaborn +``` + +Additionally, inside the `train.py` script, make sure to fill in all the placeholders for paths (`path/to/the/file`) as required. + + +[ESC50]: https://github.com/karolpiczak/ESC-50 +[SireNNet]: https://data.mendeley.com/datasets/j4ydzzv4kb/1 +[EVSS]: https://www.kaggle.com/datasets/vishnu0399/emergency-vehicle-siren-sounds +[Drive]: https://drive.google.com/file/d/1iLDItoe9v7zL1AIz2bP2OVVRcN2oTziD/view?usp=drive_link \ No newline at end of file diff --git a/examples/Python/src/AudioClassification/train/train.py b/examples/Python/src/AudioClassification/train/train.py new file mode 100644 index 00000000..15d045f9 --- /dev/null +++ b/examples/Python/src/AudioClassification/train/train.py @@ -0,0 +1,65 @@ +import tensorflow as tf +import tflite_model_maker as mm +from tflite_model_maker import audio_classifier +import os + +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns + +print(f"TensorFlow Version: {tf.__version__}") +print(f"Model Maker Version: {mm.__version__}") + +def show_confusion_matrix(confusion, test_labels): + """Compute confusion matrix and normalize.""" + confusion_normalized = confusion.astype("float") / confusion.sum(axis=1) + axis_labels = test_labels + ax = sns.heatmap( + confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels, + cmap='Blues', annot=True, fmt='.2f', square=True) + plt.title("Confusion matrix") + plt.ylabel("True label") + plt.xlabel("Predicted label") + plt.savefig('path/to/the/file') + +# Define Model Spec +spec = audio_classifier.YamNetSpec( + keep_yamnet_and_custom_heads=True, + frame_step= 1 * audio_classifier.YamNetSpec.EXPECTED_WAVEFORM_LENGTH, + frame_length= 1 * audio_classifier.YamNetSpec.EXPECTED_WAVEFORM_LENGTH) + +# Loading the data +dataset_folder = "path/to/the/file" + +train_data = audio_classifier.DataLoader.from_folder( + spec, os.path.join(dataset_folder, 'train'), cache=True) +train_data, validation_data = train_data.split(0.8) + +test_data = audio_classifier.DataLoader.from_folder( + spec, os.path.join(dataset_folder, 'test'), cache=True) + +# Start training +batch_size = 128 +epochs = 100 + +print('Training the model') +model = audio_classifier.create( + train_data, + spec, + validation_data, + batch_size=batch_size, + epochs=epochs) + +# Evaluate model +print('Evaluating the model') +model.evaluate(test_data) + +# Understand data +print('Understanding data') +confusion_matrix = model.confusion_matrix(test_data) +show_confusion_matrix(confusion_matrix.numpy(), test_data.index_to_label) + +# Export model +models_path = 'path/to/the/file' +print(f'Exporting the TFLite model to {models_path}') +model.export(models_path, tflite_filename='model.tflite') \ No newline at end of file