From e5813f81e0ef7041a5d298ca84f8bbad0da9f91f Mon Sep 17 00:00:00 2001 From: Vincenzo Barbuto Date: Wed, 10 Apr 2024 17:48:59 -0700 Subject: [PATCH 1/4] First commit AudioClassification Example; Enable LFS for .tflite --- .../src/AudioClassification/.gitattributes | 1 + .../AudioClassification.lf | 165 ++++++++++++++++++ .../AudioClassification.svg | 1 + .../Python/src/AudioClassification/README.md | 108 ++++++++++++ .../audio_model_9917.tflite | 3 + .../src/AudioClassification/requirements.txt | 3 + .../requirements_apple_silicon.txt | 3 + 7 files changed, 284 insertions(+) create mode 100644 examples/Python/src/AudioClassification/.gitattributes create mode 100644 examples/Python/src/AudioClassification/AudioClassification.lf create mode 100644 examples/Python/src/AudioClassification/AudioClassification.svg create mode 100644 examples/Python/src/AudioClassification/README.md create mode 100644 examples/Python/src/AudioClassification/audio_model_9917.tflite create mode 100644 examples/Python/src/AudioClassification/requirements.txt create mode 100644 examples/Python/src/AudioClassification/requirements_apple_silicon.txt diff --git a/examples/Python/src/AudioClassification/.gitattributes b/examples/Python/src/AudioClassification/.gitattributes new file mode 100644 index 00000000..7d630108 --- /dev/null +++ b/examples/Python/src/AudioClassification/.gitattributes @@ -0,0 +1 @@ +*.tflite filter=lfs diff=lfs merge=lfs -text diff --git a/examples/Python/src/AudioClassification/AudioClassification.lf b/examples/Python/src/AudioClassification/AudioClassification.lf new file mode 100644 index 00000000..f30590d9 --- /dev/null +++ b/examples/Python/src/AudioClassification/AudioClassification.lf @@ -0,0 +1,165 @@ +/** + * This example illustrates the capabilities of an Emergency Sirens Classifier, which can + * classify three distinct categories: Ambulance, Firetruck, and Traffic + * + * @author Vincenzo Barbuto + */ + +target Python + +reactor Microphone { + + physical action send_audio_data + output audio_data + + # Thread variables + state audio_capture_thread + state thread_should_be_running + + # Audio variables + state buffer_size + state sample_rate + state num_channels + state overlapping_factor + state input_length_in_samples + state interval_between_inference + + preamble {= + import time as tm + import sounddevice as sd + import numpy as np + import threading + + def audio_capture(self, audio_action, running): + + def callback(indata, frames, time, status): + if status: + print(status) + input_data = self.np.array(indata, dtype=self.np.float32).reshape((1, self.buffer_size)) + audio_action.schedule(0, input_data) + + with self.sd.InputStream(channels=self.num_channels, samplerate=self.sample_rate, callback=callback, blocksize=self.buffer_size): + print("#" * 50) + print("Recording started. Press Ctrl+C to stop") + print("#" * 50) + try: + while True & running.is_set(): + self.tm.sleep(self.interval_between_inference) + except KeyboardInterrupt: + print("\nRecording stopped") + =} + + reaction(startup) -> send_audio_data {= + # Setup Audio recorders + self.buffer_size, self.sample_rate, self.num_channels, self.overlapping_factor = 15600, 16000, 1, 0.5 + self.input_length_in_samples =self. buffer_size + self.interval_between_inference = self.input_length_in_samples * (1 - self.overlapping_factor) + + # Launch Audio Capture Thread + self.thread_should_be_running = self.threading.Event() + self.thread_should_be_running.set() + + self.audio_capture_thread = self.threading.Thread(target=self.audio_capture, args=(send_audio_data, self.thread_should_be_running)) + self.audio_capture_thread.start() + + =} + + reaction(send_audio_data) -> audio_data {= + audio_data.set(send_audio_data.value) + =} + + reaction(shutdown) {= + self.thread_should_be_running.clear() + self.audio_capture_thread.join() + =} + +} + +reactor Classifier(model = "audio_model_9917.tflite") { + + state interpreter + state input_details + state output_details + + input input_data + output output_data + output inference_time + + preamble{= + import tensorflow as tf + =} + + reaction(startup) {= + # Specify the full path + model_path = f"./{self.model}" + self.interpreter = self.tf.lite.Interpreter(model_path) + self.interpreter.allocate_tensors() + self.input_details = self.interpreter.get_input_details() + self.output_details = self.interpreter.get_output_details() + =} + + reaction(input_data) -> output_data, inference_time {= + # Run inference + self.interpreter.set_tensor(self.input_details[0]["index"], input_data.value) + start = lf.time.physical() + self.interpreter.invoke() + inference_tm = lf.time.physical() - start + # Get output results + results = self.interpreter.get_tensor(self.output_details[1]["index"]) + output_data.set(results) + inference_time.set(inference_tm) + =} + +} + +reactor Actuator(labels = {= ["Ambulance", "Firetruck", "Traffic"] =}, window = 3) { + + state results_window + state times + state count + + input results + input inference_time + + preamble {= + import numpy as np + =} + + reaction(startup){= + self.results_window = [] + self.times = [] + self.count = 0 + =} + + reaction(results, inference_time){= + self.results_window.append(results.value) + self.times.append(inference_time.value) + + # Compute the mean results for the inferences for each window + if(((self.count + 1 )%self.window == 0)): + results_np = self.np.array(self.results_window) + mean_results = results_np.mean(axis=0) + result_index = mean_results.argmax() + times_ms = self.np.mean(self.times) / 1000000 + print("-" * 25 + f"Mean Results for {self.window} Inferences" + "-" * 25) + print(f"Classification: {self.labels[result_index]} -> {format(mean_results[0][result_index]*100, '.2f')}%") + print(f"Inference (physical) time: {format(times_ms, '.2f')}ms") + print("-" * 79) + self.results_window.clear() + self.times.clear() + self.count+=1 + =} + + +} + + +main reactor { + mic = new Microphone() + classifier = new Classifier() + actuator = new Actuator() + + mic.audio_data -> classifier.input_data + classifier.output_data, classifier.inference_time -> actuator.results, actuator.inference_time + +} diff --git a/examples/Python/src/AudioClassification/AudioClassification.svg b/examples/Python/src/AudioClassification/AudioClassification.svg new file mode 100644 index 00000000..7387e3b3 --- /dev/null +++ b/examples/Python/src/AudioClassification/AudioClassification.svg @@ -0,0 +1 @@ +AudioClassificationMicrophone123Paudio_dataClassifier12input_dataoutput_datainference_timeActuator12resultsinference_time \ No newline at end of file diff --git a/examples/Python/src/AudioClassification/README.md b/examples/Python/src/AudioClassification/README.md new file mode 100644 index 00000000..6b318b09 --- /dev/null +++ b/examples/Python/src/AudioClassification/README.md @@ -0,0 +1,108 @@ +# Audio Classification Example + +This example demonstrates the implementation of audio classification in Lingua Franca, utilizing the [Tensorflow Lite API](https://www.tensorflow.org/lite). Specifically, it showcases the functionality of an Emergency Sirens Classifier, capable of real-time classification of three distinct classes: Ambulance, Firetruck, and Traffic. + +## Example Description + +The example comprises three reactors: +- **Microphone**: Responsible for capturing real-time audio input data frames and forwarding them to the Classifier reactor for classification. +- **Classifier**: This reactor loads the TensorFlow Lite model. Upon receiving audio data from the Microphone reactor, it executes the classification task and forwards the output to the Actuator reactor. +- **Actuator**: Responsible for receiving the classification results and displaying them on the terminal. Additionally, to improve classification accuracy, the Actuator reactor computes a mean classification result over a predefined number of iterations, typically every three iterations. + +![Diagram of the Lingua Franca Program](./AudioClassification.svg "Diagram of the Lingua Franca Program") + +## Install Dependencies + +The example utilizes several libraries, including `sounddevice`, `numpy`, and `tensorflow`. You can install them effortlessly by executing the command: + +```bash +python3 -m pip install -r requirements.txt +``` +> [!WARNING] +> Be sure that you are using the same Python version as Lingua Franca for building the program. + +### Installing Tensorflow for Apple Silicon + +Installing TensorFlow for Apple Silicon can be a bit challenging. Therefore, it's important to follow this guide closely. First, ensure you update your **Xcode Command Line Tools**. Open your terminal and execute the following command: + +```bash +xcode-select --install +``` + +After the installation finishes, you'll need to set up a package manager like [Homebrew](https://brew.sh/). Refer to the website for installation instructions. + +Once you've successfully installed both **Xcode Command Line Tools** and **Homebrew Package Manager**, proceed with the following instructions: + +1. Install the `hdf5` package using Homebrew: + ```bash + brew install hdf5 + ``` + +2. Install the necessary packages: + ```bash + python3 -m pip install -r requirements_apple_silicon.txt + ``` +> [!WARNING] +> Make sure to check the versions of the packages listed in the `requirements_apple_silicon.txt` file, as they may have been updated by the time of your installation. The current versions listed in the file have been tested under Python 3.9. + +3. Finally, install TensorFlow for MacOS: + ```bash + python3 -m pip install tensorflow-macos + python3 -m pip install tensorflow-metal + ``` + +## Troubleshooting + +### Error installing packages + +If you are facing issues while installing the `h5py` package, you can try the following steps: + +1. Remove the following line from `requirements_apple_silicon.txt`: + + ```bash + h5py>=3.6.0,<3.7 + ``` +2. Then, execute again the command: + + ```bash + python3 -m pip install -r requirements_apple_silicon.txt + ``` +> [!WARNING] +> Make sure to check the versions of the packages listed in the `requirements_apple_silicon.txt` file, as they may have been updated by the time of your installation. The current versions listed in the file have been tested under Python 3.9. + +### Python version + +To successfully install and execute TensorFlow, it's recommended to use Python 3.9. However, the `CMAKELists.txt` file is configured to search for a version of Python between `3.10.0` and `<3.11.0`. To resolve this issue, you'll need to manually modify the `CMAKELists.txt` file after compiling the Lingua Franca program, followed by rebuilding the program. Here's how you can do it: + +1. Navigate to the following path: `AudioClassification/src-gen/AudioClassification`. +2. Open the `CMAKELists.txt` file. +3. Locate the line: + + ```cmake + find_package(Python 3.10.0...<3.11.0 REQUIRED COMPONENTS Interpreter Development) + ``` + +4. Modify it to: + + ```cmake + find_package(Python 3.9.0...<3.10.0 REQUIRED COMPONENTS Interpreter Development) + ``` +> [!NOTE] +> You can also use a version lower than 3.9 but not greater than 3.10, as TensorFlow may encounter execution errors otherwise. + +Once you've made these changes, proceed to rebuild the program. Follow these steps: + +1. Open the terminal and ensure you're in the directory `AudioClassification/src-gen/AudioClassification`. +2. Execute the following commands: + + ```bash + rm -rf build && mkdir -p build && cd build && cmake .. && make && cd .. + ``` + +After the build process is complete, you can now execute the Lingua Franca Program directly using Python. Make sure you're in the directory `AudioClassification/src-gen/AudioClassification`, and then run the following command: + +```bash +python3 AudioClassification.py +``` + + diff --git a/examples/Python/src/AudioClassification/audio_model_9917.tflite b/examples/Python/src/AudioClassification/audio_model_9917.tflite new file mode 100644 index 00000000..c9f7cd16 --- /dev/null +++ b/examples/Python/src/AudioClassification/audio_model_9917.tflite @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1547bf4cfb79cb87ddc242134b4977995cf3096cd0cf7caf5eb9a3957b92ea84 +size 15056918 diff --git a/examples/Python/src/AudioClassification/requirements.txt b/examples/Python/src/AudioClassification/requirements.txt new file mode 100644 index 00000000..f9cd2b74 --- /dev/null +++ b/examples/Python/src/AudioClassification/requirements.txt @@ -0,0 +1,3 @@ +numpy +sounddevice +tensorflow \ No newline at end of file diff --git a/examples/Python/src/AudioClassification/requirements_apple_silicon.txt b/examples/Python/src/AudioClassification/requirements_apple_silicon.txt new file mode 100644 index 00000000..299fd6e2 --- /dev/null +++ b/examples/Python/src/AudioClassification/requirements_apple_silicon.txt @@ -0,0 +1,3 @@ +grpcio>=1.37.0,<2.0 +h5py>=3.6.0,<3.7 +numpy>=1.22.3,<1.23.5 \ No newline at end of file From 2089edfe3d433779d47dbac8dd48c8ea1af921bf Mon Sep 17 00:00:00 2001 From: Vincenzo Barbuto Date: Wed, 10 Apr 2024 22:29:44 -0700 Subject: [PATCH 2/4] Solved formatting error in the AudioClassification.lf file --- .../AudioClassification.lf | 285 +++++++++--------- 1 file changed, 136 insertions(+), 149 deletions(-) diff --git a/examples/Python/src/AudioClassification/AudioClassification.lf b/examples/Python/src/AudioClassification/AudioClassification.lf index f30590d9..2a8f7c6c 100644 --- a/examples/Python/src/AudioClassification/AudioClassification.lf +++ b/examples/Python/src/AudioClassification/AudioClassification.lf @@ -1,165 +1,152 @@ /** - * This example illustrates the capabilities of an Emergency Sirens Classifier, which can - * classify three distinct categories: Ambulance, Firetruck, and Traffic + * This example illustrates the capabilities of an Emergency Sirens Classifier, which can classify + * three distinct categories: Ambulance, Firetruck, and Traffic * * @author Vincenzo Barbuto */ - target Python reactor Microphone { - - physical action send_audio_data - output audio_data - - # Thread variables - state audio_capture_thread - state thread_should_be_running - - # Audio variables - state buffer_size - state sample_rate - state num_channels - state overlapping_factor - state input_length_in_samples - state interval_between_inference - - preamble {= - import time as tm - import sounddevice as sd - import numpy as np - import threading - - def audio_capture(self, audio_action, running): - - def callback(indata, frames, time, status): - if status: - print(status) - input_data = self.np.array(indata, dtype=self.np.float32).reshape((1, self.buffer_size)) - audio_action.schedule(0, input_data) - - with self.sd.InputStream(channels=self.num_channels, samplerate=self.sample_rate, callback=callback, blocksize=self.buffer_size): - print("#" * 50) - print("Recording started. Press Ctrl+C to stop") - print("#" * 50) - try: - while True & running.is_set(): - self.tm.sleep(self.interval_between_inference) - except KeyboardInterrupt: - print("\nRecording stopped") - =} - - reaction(startup) -> send_audio_data {= - # Setup Audio recorders - self.buffer_size, self.sample_rate, self.num_channels, self.overlapping_factor = 15600, 16000, 1, 0.5 - self.input_length_in_samples =self. buffer_size - self.interval_between_inference = self.input_length_in_samples * (1 - self.overlapping_factor) - - # Launch Audio Capture Thread - self.thread_should_be_running = self.threading.Event() - self.thread_should_be_running.set() - - self.audio_capture_thread = self.threading.Thread(target=self.audio_capture, args=(send_audio_data, self.thread_should_be_running)) - self.audio_capture_thread.start() - - =} - - reaction(send_audio_data) -> audio_data {= - audio_data.set(send_audio_data.value) - =} - - reaction(shutdown) {= - self.thread_should_be_running.clear() - self.audio_capture_thread.join() - =} - + physical action send_audio_data + output audio_data + + state audio_capture_thread # Thread variables + state thread_should_be_running + + state buffer_size # Audio variables + state sample_rate + state num_channels + state overlapping_factor + state input_length_in_samples + state interval_between_inference + + preamble {= + import time as tm + import sounddevice as sd + import numpy as np + import threading + + def audio_capture(self, audio_action, running): + + def callback(indata, frames, time, status): + if status: + print(status) + input_data = self.np.array(indata, dtype=self.np.float32).reshape((1, self.buffer_size)) + audio_action.schedule(0, input_data) + + with self.sd.InputStream(channels=self.num_channels, samplerate=self.sample_rate, callback=callback, blocksize=self.buffer_size): + print("#" * 50) + print("Recording started. Press Ctrl+C to stop") + print("#" * 50) + try: + while True & running.is_set(): + self.tm.sleep(self.interval_between_inference) + except KeyboardInterrupt: + print("\nRecording stopped") + =} + + reaction(startup) -> send_audio_data {= + # Setup Audio recorders + self.buffer_size, self.sample_rate, self.num_channels, self.overlapping_factor = 15600, 16000, 1, 0.5 + self.input_length_in_samples =self. buffer_size + self.interval_between_inference = self.input_length_in_samples * (1 - self.overlapping_factor) + + # Launch Audio Capture Thread + self.thread_should_be_running = self.threading.Event() + self.thread_should_be_running.set() + + self.audio_capture_thread = self.threading.Thread(target=self.audio_capture, args=(send_audio_data, self.thread_should_be_running)) + self.audio_capture_thread.start() + =} + + reaction(send_audio_data) -> audio_data {= + audio_data.set(send_audio_data.value) + =} + + reaction(shutdown) {= + self.thread_should_be_running.clear() + self.audio_capture_thread.join() + =} } -reactor Classifier(model = "audio_model_9917.tflite") { - - state interpreter - state input_details - state output_details - - input input_data - output output_data - output inference_time - - preamble{= - import tensorflow as tf - =} - - reaction(startup) {= - # Specify the full path - model_path = f"./{self.model}" - self.interpreter = self.tf.lite.Interpreter(model_path) - self.interpreter.allocate_tensors() - self.input_details = self.interpreter.get_input_details() - self.output_details = self.interpreter.get_output_details() - =} - - reaction(input_data) -> output_data, inference_time {= - # Run inference - self.interpreter.set_tensor(self.input_details[0]["index"], input_data.value) - start = lf.time.physical() - self.interpreter.invoke() - inference_tm = lf.time.physical() - start - # Get output results - results = self.interpreter.get_tensor(self.output_details[1]["index"]) - output_data.set(results) - inference_time.set(inference_tm) - =} - +reactor Classifier(model="audio_model_9917.tflite") { + state interpreter + state input_details + state output_details + + input input_data + output output_data + output inference_time + + preamble {= + import tensorflow as tf + =} + + reaction(startup) {= + # Specify the full path + model_path = f"./{self.model}" + self.interpreter = self.tf.lite.Interpreter(model_path) + self.interpreter.allocate_tensors() + self.input_details = self.interpreter.get_input_details() + self.output_details = self.interpreter.get_output_details() + =} + + reaction(input_data) -> output_data, inference_time {= + # Run inference + self.interpreter.set_tensor(self.input_details[0]["index"], input_data.value) + start = lf.time.physical() + self.interpreter.invoke() + inference_tm = lf.time.physical() - start + # Get output results + results = self.interpreter.get_tensor(self.output_details[1]["index"]) + output_data.set(results) + inference_time.set(inference_tm) + =} } -reactor Actuator(labels = {= ["Ambulance", "Firetruck", "Traffic"] =}, window = 3) { - - state results_window - state times - state count - - input results - input inference_time - - preamble {= - import numpy as np - =} - - reaction(startup){= - self.results_window = [] - self.times = [] - self.count = 0 - =} - - reaction(results, inference_time){= - self.results_window.append(results.value) - self.times.append(inference_time.value) - - # Compute the mean results for the inferences for each window - if(((self.count + 1 )%self.window == 0)): - results_np = self.np.array(self.results_window) - mean_results = results_np.mean(axis=0) - result_index = mean_results.argmax() - times_ms = self.np.mean(self.times) / 1000000 - print("-" * 25 + f"Mean Results for {self.window} Inferences" + "-" * 25) - print(f"Classification: {self.labels[result_index]} -> {format(mean_results[0][result_index]*100, '.2f')}%") - print(f"Inference (physical) time: {format(times_ms, '.2f')}ms") - print("-" * 79) - self.results_window.clear() - self.times.clear() - self.count+=1 - =} - - +reactor Actuator(labels = {= ["Ambulance", "Firetruck", "Traffic"] =}, window=3) { + state results_window + state times + state count + + input results + input inference_time + + preamble {= + import numpy as np + =} + + reaction(startup) {= + self.results_window = [] + self.times = [] + self.count = 0 + =} + + reaction(results, inference_time) {= + self.results_window.append(results.value) + self.times.append(inference_time.value) + + # Compute the mean results for the inferences for each window + if(((self.count + 1 )%self.window == 0)): + results_np = self.np.array(self.results_window) + mean_results = results_np.mean(axis=0) + result_index = mean_results.argmax() + times_ms = self.np.mean(self.times) / 1000000 + print("-" * 25 + f"Mean Results for {self.window} Inferences" + "-" * 25) + print(f"Classification: {self.labels[result_index]} -> {format(mean_results[0][result_index]*100, '.2f')}%") + print(f"Inference (physical) time: {format(times_ms, '.2f')}ms") + print("-" * 79) + self.results_window.clear() + self.times.clear() + self.count+=1 + =} } - main reactor { - mic = new Microphone() - classifier = new Classifier() - actuator = new Actuator() - - mic.audio_data -> classifier.input_data - classifier.output_data, classifier.inference_time -> actuator.results, actuator.inference_time + mic = new Microphone() + classifier = new Classifier() + actuator = new Actuator() + mic.audio_data -> classifier.input_data + classifier.output_data, classifier.inference_time -> actuator.results, actuator.inference_time } From 930d9d90202ade4373bf8da412c71715a20631b2 Mon Sep 17 00:00:00 2001 From: Vincenzo Barbuto Date: Wed, 17 Apr 2024 15:11:26 -0700 Subject: [PATCH 3/4] Added dataset and training details; Update the AudioClassification.lf code to the lates version --- .../AudioClassification.lf | 44 ++++++++----- .../AudioClassification.svg | 2 +- .../audio_model_9917.tflite | 3 - .../src/AudioClassification/evds_bin.tflite | 3 + .../train/README_Dataset.md | 42 ++++++++++++ .../src/AudioClassification/train/train.py | 65 +++++++++++++++++++ 6 files changed, 140 insertions(+), 19 deletions(-) delete mode 100644 examples/Python/src/AudioClassification/audio_model_9917.tflite create mode 100644 examples/Python/src/AudioClassification/evds_bin.tflite create mode 100644 examples/Python/src/AudioClassification/train/README_Dataset.md create mode 100644 examples/Python/src/AudioClassification/train/train.py diff --git a/examples/Python/src/AudioClassification/AudioClassification.lf b/examples/Python/src/AudioClassification/AudioClassification.lf index 2a8f7c6c..63085975 100644 --- a/examples/Python/src/AudioClassification/AudioClassification.lf +++ b/examples/Python/src/AudioClassification/AudioClassification.lf @@ -1,10 +1,12 @@ /** * This example illustrates the capabilities of an Emergency Sirens Classifier, which can classify - * three distinct categories: Ambulance, Firetruck, and Traffic + * two distinct categories: Emergency, and Other * * @author Vincenzo Barbuto */ -target Python +target Python { + timeout: 100 sec +} reactor Microphone { physical action send_audio_data @@ -31,18 +33,17 @@ reactor Microphone { def callback(indata, frames, time, status): if status: print(status) - input_data = self.np.array(indata, dtype=self.np.float32).reshape((1, self.buffer_size)) + + input_data = self.np.array(indata, dtype=self.np.float32)[:self.buffer_size].reshape((1, self.buffer_size)) audio_action.schedule(0, input_data) with self.sd.InputStream(channels=self.num_channels, samplerate=self.sample_rate, callback=callback, blocksize=self.buffer_size): + # Press Enter when the shutdown procedure starts to close the audio capturing thread print("#" * 50) - print("Recording started. Press Ctrl+C to stop") + print("Recording started. Press Enter to stop") print("#" * 50) - try: - while True & running.is_set(): - self.tm.sleep(self.interval_between_inference) - except KeyboardInterrupt: - print("\nRecording stopped") + input() + print("\nRecording stopped") =} reaction(startup) -> send_audio_data {= @@ -64,12 +65,13 @@ reactor Microphone { =} reaction(shutdown) {= + print("*"*10 + " Shutting Down " + "*"*10) self.thread_should_be_running.clear() - self.audio_capture_thread.join() + # self.audio_capture_thread.join() =} } -reactor Classifier(model="audio_model_9917.tflite") { +reactor Classifier(model="evds_bin.tflite") { state interpreter state input_details state output_details @@ -83,8 +85,8 @@ reactor Classifier(model="audio_model_9917.tflite") { =} reaction(startup) {= - # Specify the full path model_path = f"./{self.model}" + print(f"Loading the model: {self.model}") self.interpreter = self.tf.lite.Interpreter(model_path) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() @@ -104,10 +106,11 @@ reactor Classifier(model="audio_model_9917.tflite") { =} } -reactor Actuator(labels = {= ["Ambulance", "Firetruck", "Traffic"] =}, window=3) { +reactor Actuator(labels = {= ["Emergency", "Other"] =}, window=3) { state results_window state times state count + state total_times input results input inference_time @@ -119,14 +122,14 @@ reactor Actuator(labels = {= ["Ambulance", "Firetruck", "Traffic"] =}, window=3) reaction(startup) {= self.results_window = [] self.times = [] + self.total_times = [] self.count = 0 =} reaction(results, inference_time) {= self.results_window.append(results.value) self.times.append(inference_time.value) - - # Compute the mean results for the inferences for each window + self.total_times.append(inference_time.value) if(((self.count + 1 )%self.window == 0)): results_np = self.np.array(self.results_window) mean_results = results_np.mean(axis=0) @@ -140,6 +143,17 @@ reactor Actuator(labels = {= ["Ambulance", "Firetruck", "Traffic"] =}, window=3) self.times.clear() self.count+=1 =} + + reaction(shutdown) {= + avg_time = self.np.mean(self.total_times) / 1000000 + max_time = self.np.max(self.total_times) / 1000000 + min_time = self.np.min(self.total_times) / 1000000 + print("-"*36 + "Summary" + "-"*36) + print(f"Mean Inference Time: {format(avg_time, '.2f')}ms") + print(f"Slowest Inference: {format(max_time, '.2f')}ms") + print(f"Fastes Inference: {format(min_time, '.2f')}ms") + print("-" * 79) + =} } main reactor { diff --git a/examples/Python/src/AudioClassification/AudioClassification.svg b/examples/Python/src/AudioClassification/AudioClassification.svg index 7387e3b3..b02aa04b 100644 --- a/examples/Python/src/AudioClassification/AudioClassification.svg +++ b/examples/Python/src/AudioClassification/AudioClassification.svg @@ -1 +1 @@ -AudioClassificationMicrophone123Paudio_dataClassifier12input_dataoutput_datainference_timeActuator12resultsinference_time \ No newline at end of file +AudioClassificationMicrophone123Paudio_dataClassifier12input_dataoutput_datainference_timeActuator123resultsinference_time \ No newline at end of file diff --git a/examples/Python/src/AudioClassification/audio_model_9917.tflite b/examples/Python/src/AudioClassification/audio_model_9917.tflite deleted file mode 100644 index c9f7cd16..00000000 --- a/examples/Python/src/AudioClassification/audio_model_9917.tflite +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1547bf4cfb79cb87ddc242134b4977995cf3096cd0cf7caf5eb9a3957b92ea84 -size 15056918 diff --git a/examples/Python/src/AudioClassification/evds_bin.tflite b/examples/Python/src/AudioClassification/evds_bin.tflite new file mode 100644 index 00000000..e39b66d8 --- /dev/null +++ b/examples/Python/src/AudioClassification/evds_bin.tflite @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf15e57658eb6978626a789f874e60d91bac6153dc1fadb640f8ea3cc3333a6 +size 15052806 diff --git a/examples/Python/src/AudioClassification/train/README_Dataset.md b/examples/Python/src/AudioClassification/train/README_Dataset.md new file mode 100644 index 00000000..e82d8999 --- /dev/null +++ b/examples/Python/src/AudioClassification/train/README_Dataset.md @@ -0,0 +1,42 @@ +# Dataset and Training + +## Dataset Details +The training dataset for the Audio Classification model comprises two distinct classes: + +- **Emergency**: This class encompasses 850 .wav files featuring sirens from emergency vehicles like Ambulances, Firetrucks, and Police units. These files were meticulously extracted from the [Emergency Vehicle Siren Sounds][EVSS] and [SireNNet][SireNNet] Dataset. + +- **Other**: Comprising 800 .wav files, this class includes a diverse range of sounds manually extracted from the [ESC-50 Dataset][ESC50]. These encompass various categories such as Animals, Natural Soundscapes & Water Sounds, Human Non-Speech Sounds, Interior/Domestic Sounds, and Urban Sounds (excluding sirens). + +You can acces the _.zip_ file containing the dataset a this [link][Drive] + +The internal directory structure is the following: +```bash +├── dataset +│ ├── test +│ │ ├── emergency +│ │ └──other +│ ├── test +│ │ ├── emergency +└── └── └── other +``` + +## Training + +Once you have downloaded the dataset, you have the option to either train your own machine learning model or utilize the `train.py` file to perform Transfer Learning using a pre-trained audio classification model like `YAMNet`. + +Before initiating the transfer learning task, ensure that you have the following Python libraries installed: +```bash +tensorflow +tflite_model_maker +numpy +matplotlib +seaborn +``` + +Additionally, inside the `train.py` script, make sure to fill in all the placeholders for paths (`path/to/the/file`) as required. + + +[ESC50]: https://github.com/karolpiczak/ESC-50 +[SireNNet]: https://data.mendeley.com/datasets/j4ydzzv4kb/1 +[EVSS]: https://www.kaggle.com/datasets/vishnu0399/emergency-vehicle-siren-sounds +[Drive]: https://drive.google.com/file/d/1iLDItoe9v7zL1AIz2bP2OVVRcN2oTziD/view?usp=drive_link \ No newline at end of file diff --git a/examples/Python/src/AudioClassification/train/train.py b/examples/Python/src/AudioClassification/train/train.py new file mode 100644 index 00000000..15d045f9 --- /dev/null +++ b/examples/Python/src/AudioClassification/train/train.py @@ -0,0 +1,65 @@ +import tensorflow as tf +import tflite_model_maker as mm +from tflite_model_maker import audio_classifier +import os + +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns + +print(f"TensorFlow Version: {tf.__version__}") +print(f"Model Maker Version: {mm.__version__}") + +def show_confusion_matrix(confusion, test_labels): + """Compute confusion matrix and normalize.""" + confusion_normalized = confusion.astype("float") / confusion.sum(axis=1) + axis_labels = test_labels + ax = sns.heatmap( + confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels, + cmap='Blues', annot=True, fmt='.2f', square=True) + plt.title("Confusion matrix") + plt.ylabel("True label") + plt.xlabel("Predicted label") + plt.savefig('path/to/the/file') + +# Define Model Spec +spec = audio_classifier.YamNetSpec( + keep_yamnet_and_custom_heads=True, + frame_step= 1 * audio_classifier.YamNetSpec.EXPECTED_WAVEFORM_LENGTH, + frame_length= 1 * audio_classifier.YamNetSpec.EXPECTED_WAVEFORM_LENGTH) + +# Loading the data +dataset_folder = "path/to/the/file" + +train_data = audio_classifier.DataLoader.from_folder( + spec, os.path.join(dataset_folder, 'train'), cache=True) +train_data, validation_data = train_data.split(0.8) + +test_data = audio_classifier.DataLoader.from_folder( + spec, os.path.join(dataset_folder, 'test'), cache=True) + +# Start training +batch_size = 128 +epochs = 100 + +print('Training the model') +model = audio_classifier.create( + train_data, + spec, + validation_data, + batch_size=batch_size, + epochs=epochs) + +# Evaluate model +print('Evaluating the model') +model.evaluate(test_data) + +# Understand data +print('Understanding data') +confusion_matrix = model.confusion_matrix(test_data) +show_confusion_matrix(confusion_matrix.numpy(), test_data.index_to_label) + +# Export model +models_path = 'path/to/the/file' +print(f'Exporting the TFLite model to {models_path}') +model.export(models_path, tflite_filename='model.tflite') \ No newline at end of file From 8a583f25532560a6a8ee3cf435d1a5997b78c6ce Mon Sep 17 00:00:00 2001 From: Vincenzo Barbuto Date: Sun, 21 Apr 2024 14:43:23 -0700 Subject: [PATCH 4/4] Updated README file --- .../AudioClassification.lf | 2 +- .../Python/src/AudioClassification/README.md | 34 ++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/examples/Python/src/AudioClassification/AudioClassification.lf b/examples/Python/src/AudioClassification/AudioClassification.lf index 63085975..9ec190bb 100644 --- a/examples/Python/src/AudioClassification/AudioClassification.lf +++ b/examples/Python/src/AudioClassification/AudioClassification.lf @@ -65,7 +65,7 @@ reactor Microphone { =} reaction(shutdown) {= - print("*"*10 + " Shutting Down " + "*"*10) + print("*"*32 + " SHUTTING DOWN " + "*"*32) self.thread_should_be_running.clear() # self.audio_capture_thread.join() =} diff --git a/examples/Python/src/AudioClassification/README.md b/examples/Python/src/AudioClassification/README.md index 6b318b09..c933bd05 100644 --- a/examples/Python/src/AudioClassification/README.md +++ b/examples/Python/src/AudioClassification/README.md @@ -11,17 +11,43 @@ The example comprises three reactors: ![Diagram of the Lingua Franca Program](./AudioClassification.svg "Diagram of the Lingua Franca Program") -## Install Dependencies +## Running Locally -The example utilizes several libraries, including `sounddevice`, `numpy`, and `tensorflow`. You can install them effortlessly by executing the command: +Before cloning this repository, you need to install and configure [Git LFS](https://git-lfs.github.com/) to handle large files. Follow the [installation instructions](https://docs.github.com/en/github/managing-large-files/installing-git-large-file-storage) to set up Git LFS on your system. + +Once Git LFS is installed, you can clone the repository: ```bash -python3 -m pip install -r requirements.txt +git clone https://github.com/lf-lang/playground-lingua-franca.git ``` + +### Install Dependencies + +The example requires several Python packages, including: + +- `sounddevice` +- `numpy` +- `tensorflow` + +To install the dependencies: + +1. Navigate to the example directory: + + ```bash + cd examples/Python/src/AudioClassification + ``` + +2. Install the required packages: + + ```bash + python3 -m pip install -r requirements.txt + ``` + +This will install all the packages listed in `requirements.txt`. > [!WARNING] > Be sure that you are using the same Python version as Lingua Franca for building the program. -### Installing Tensorflow for Apple Silicon +#### Installing Tensorflow for Apple Silicon Installing TensorFlow for Apple Silicon can be a bit challenging. Therefore, it's important to follow this guide closely. First, ensure you update your **Xcode Command Line Tools**. Open your terminal and execute the following command: