From dec9f17898415fd574662d3951d76491d07aaa44 Mon Sep 17 00:00:00 2001 From: dscripka Date: Tue, 22 Aug 2023 00:41:45 -0400 Subject: [PATCH 1/5] Fixed issue where audio would be dropped if input data length was not an integer multiple of 1280 --- openwakeword/model.py | 18 ++++++++++++------ openwakeword/utils.py | 35 ++++++++++++++++++++++++----------- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/openwakeword/model.py b/openwakeword/model.py index 1f2eea4..3a46072 100755 --- a/openwakeword/model.py +++ b/openwakeword/model.py @@ -231,7 +231,7 @@ def predict(self, x: np.ndarray, patience: dict = {}, threshold: dict = {}, timi """Predict with all of the wakeword models on the input audio frames Args: - x (Union[ndarray]): The input audio data to predict on with the models. Should be multiples of 80 ms + x (ndarray): The input audio data to predict on with the models. Should be multiples of 80 ms (1280 samples), with longer lengths reducing overall CPU usage but decreasing detection latency. patience (dict): How many consecutive frames (of 1280 samples or 80 ms) above the threshold that must @@ -260,9 +260,9 @@ def predict(self, x: np.ndarray, patience: dict = {}, threshold: dict = {}, timi # Get audio features (optionally with Speex noise suppression) if self.speex_ns: - self.preprocessor(self._suppress_noise_with_speex(x)) + n_prepared_samples = self.preprocessor(self._suppress_noise_with_speex(x)) else: - self.preprocessor(x) + n_prepared_samples = self.preprocessor(x) if timing: timing_dict["models"]["preprocessor"] = time.time() - feature_start @@ -274,9 +274,9 @@ def predict(self, x: np.ndarray, patience: dict = {}, threshold: dict = {}, timi model_start = time.time() # Run model to get predictions - if len(x) > 1280: + if n_prepared_samples > 1280: group_predictions = [] - for i in np.arange(len(x)//1280-1, -1, -1): + for i in np.arange(n_prepared_samples//1280-1, -1, -1): group_predictions.extend( self.model_prediction_function[mdl]( self.preprocessor.get_features( @@ -286,10 +286,16 @@ def predict(self, x: np.ndarray, patience: dict = {}, threshold: dict = {}, timi ) ) prediction = np.array(group_predictions).max(axis=0)[None, ] - else: + elif n_prepared_samples == 1280: prediction = self.model_prediction_function[mdl]( self.preprocessor.get_features(self.model_inputs[mdl]) ) + else: + if len(self.prediction_buffer[mdl]) > 0: + prediction = [[[self.prediction_buffer[mdl][-1]]]] + else: + for int_label, cls in self.class_mapping[mdl].items(): + prediction = [[[0]*(int(int_label)+1)]] if self.model_outputs[mdl] == 1: predictions[mdl] = prediction[0][0][0] diff --git a/openwakeword/utils.py b/openwakeword/utils.py index ab9c6a8..1ad391e 100644 --- a/openwakeword/utils.py +++ b/openwakeword/utils.py @@ -162,7 +162,7 @@ def tflite_embedding_predict(x): self.melspectrogram_buffer = np.ones((76, 32)) # n_frames x num_features self.melspectrogram_max_len = 10*97 # 97 is the number of frames in 1 second of 16hz audio self.accumulated_samples = 0 # the samples added to the buffer since the audio preprocessor was last called - # self.feature_buffer = np.vstack([self._get_embeddings(np.random.randint(-1000, 1000, 1280).astype(np.int16)) for _ in range(10)]) + self.raw_data_remainder = np.empty(0) self.feature_buffer = self._get_embeddings(np.random.randint(-1000, 1000, 16000*4).astype(np.int16)) self.feature_buffer_max_len = 120 # ~10 seconds of feature buffer history @@ -377,6 +377,9 @@ def _streaming_melspectrogram(self, n_samples): clip is calculated. It's unclear if this difference is significant and will impact model performance. In particular padding with 0 or very small values seems to demonstrate the differences well. """ + if len(self.raw_data_buffer) < 400: + raise ValueError("The number of input frames must be at least 400 samples @ 16khz (25 ms)!") + self.melspectrogram_buffer = np.vstack( (self.melspectrogram_buffer, self._get_melspectrogram(list(self.raw_data_buffer)[-n_samples-160*3:])) ) @@ -388,18 +391,25 @@ def _buffer_raw_data(self, x): """ Adds raw audio data to the input buffer """ - if len(x) < 400: - raise ValueError("The number of input frames must be at least 400 samples @ 16khz (25 ms)!") self.raw_data_buffer.extend(x.tolist() if isinstance(x, np.ndarray) else x) def _streaming_features(self, x): - # if len(x) != 1280: - # raise ValueError("You must provide input samples in frames of 1280 samples @ 1600khz." - # f"Received a frame of {len(x)} samples.") - - # Add raw audio data to buffer - self._buffer_raw_data(x) - self.accumulated_samples += len(x) + # Add raw audio data to buffer, temporarily storing extra frames if not an even number of 80 ms chunks + processed_samples = 0 + if self.raw_data_remainder.shape[0] != 0: + x = np.concatenate((self.raw_data_remainder, x)) + + if x.shape[0] < 1280 and self.accumulated_samples == 0: + self._buffer_raw_data(x) + self.accumulated_samples += len(x) + + elif (x.shape[0] >= 1280 and self.accumulated_samples == 0) or \ + (self.accumulated_samples != 0 and self.accumulated_samples + x.shape[0] >= 1280): + remainder = (self.accumulated_samples + x.shape[0]) % 1280 + x_even_chunks = x[0:x.shape[0] - remainder] + self._buffer_raw_data(x_even_chunks) + self.accumulated_samples += len(x_even_chunks) + self.raw_data_remainder = x[x.shape[0] - remainder:] # Only calculate melspectrogram once minimum samples area accumulated if self.accumulated_samples >= 1280: @@ -415,11 +425,14 @@ def _streaming_features(self, x): self.embedding_model_predict(x))) # Reset raw data buffer counter + processed_samples = self.accumulated_samples self.accumulated_samples = 0 if self.feature_buffer.shape[0] > self.feature_buffer_max_len: self.feature_buffer = self.feature_buffer[-self.feature_buffer_max_len:, :] + return processed_samples if processed_samples != 0 else self.accumulated_samples + def get_features(self, n_feature_frames: int = 16, start_ndx: int = -1): if start_ndx != -1: end_ndx = start_ndx + int(n_feature_frames) \ @@ -429,7 +442,7 @@ def get_features(self, n_feature_frames: int = 16, start_ndx: int = -1): return self.feature_buffer[int(-1*n_feature_frames):, :][None, ].astype(np.float32) def __call__(self, x): - self._streaming_features(x) + return self._streaming_features(x) # Bulk prediction function From 2bc602d4b43b55b1463b522b0ac48e5c599568b3 Mon Sep 17 00:00:00 2001 From: dscripka Date: Thu, 24 Aug 2023 11:02:29 -0400 Subject: [PATCH 2/5] Increased test coverage --- openwakeword/model.py | 11 ++++++++--- setup.py | 4 +++- tests/test_models.py | 42 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/openwakeword/model.py b/openwakeword/model.py index 3a46072..a392cac 100755 --- a/openwakeword/model.py +++ b/openwakeword/model.py @@ -231,9 +231,11 @@ def predict(self, x: np.ndarray, patience: dict = {}, threshold: dict = {}, timi """Predict with all of the wakeword models on the input audio frames Args: - x (ndarray): The input audio data to predict on with the models. Should be multiples of 80 ms + x (ndarray): The input audio data to predict on with the models. Ideally should be multiples of 80 ms (1280 samples), with longer lengths reducing overall CPU usage - but decreasing detection latency. + but decreasing detection latency. Input audio with durations greater than or less + than 80 ms is also supported, though this will add a detection delay of up to 80 ms + as the appropriate number of samples are accumulated. patience (dict): How many consecutive frames (of 1280 samples or 80 ms) above the threshold that must be observed before the current frame will be returned as non-zero. Must be provided as an a dictionary where the keys are the @@ -251,6 +253,9 @@ def predict(self, x: np.ndarray, patience: dict = {}, threshold: dict = {}, timi wake-word/wake-phrase detected. If the `timing` argument is true, returns a tuple of dicts containing model predictions and timing information, respectively. """ + # Check input data type + if not isinstance(x, np.ndarray): + raise ValueError(f"The input audio data (x) must by a Numpy array, instead received an object of type {type(x)}.") # Setup timing dict if timing: @@ -290,7 +295,7 @@ def predict(self, x: np.ndarray, patience: dict = {}, threshold: dict = {}, timi prediction = self.model_prediction_function[mdl]( self.preprocessor.get_features(self.model_inputs[mdl]) ) - else: + elif n_prepared_samples < 1280: if len(self.prediction_buffer[mdl]) > 0: prediction = [[[self.prediction_buffer[mdl][-1]]]] else: diff --git a/setup.py b/setup.py index af0d50b..4a183d9 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,9 @@ def build_additional_requires(): 'pytest-cov>=2.10.1,<3', 'pytest-flake8>=1.1.1,<2', 'flake8>=4.0,<4.1', - 'pytest-mypy>=0.10.0,<1' + 'pytest-mypy>=0.10.0,<1', + 'mock>=5.1,<6', + 'types-mock>=5.1,<6' ], 'full': [ 'mutagen>=1.46.0,<2', diff --git a/tests/test_models.py b/tests/test_models.py index c5b73e1..cf49db4 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -29,11 +29,15 @@ # Imports import openwakeword import os +import sys import numpy as np from pathlib import Path import collections import pytest import platform +import pickle +import tempfile +import mock # Tests @@ -51,9 +55,45 @@ def test_load_models_by_path(self): # Prediction on random data owwModel.predict(np.random.randint(-1000, 1000, 1280).astype(np.int16)) - # Prediction on random data with different chunk size + def test_predict_with_different_frame_sizes(self): + owwModel = openwakeword.Model(wakeword_models=[ + os.path.join("openwakeword", "resources", "models", "alexa_v0.1.onnx") + ], inference_framework="onnx") + + # Prediction on random data with integer multiples of standard chunk size (1280 samples) + owwModel.predict(np.random.randint(-1000, 1000, 1280).astype(np.int16)) owwModel.predict(np.random.randint(-1000, 1000, 1280*2).astype(np.int16)) + # Prediction on data with a chunk size not an integer multiple of 1280 + owwModel.predict(np.random.randint(-1000, 1000, 1024).astype(np.int16)) + owwModel.predict(np.random.randint(-1000, 1000, 1024*2).astype(np.int16)) + + def test_exception_handling_for_inference_framework(self): + with mock.patch.dict(sys.modules, {'onnxruntime': None}): + with pytest.raises(ValueError): + openwakeword.Model(wakeword_models=[ + os.path.join("openwakeword", "resources", "models", "alexa_v0.1.onnx") + ], inference_framework="onnx") + + with mock.patch.dict(sys.modules, {'tflite_runtime': None}): + openwakeword.Model(wakeword_models=[ + os.path.join("openwakeword", "resources", "models", "alexa_v0.1.tflite") + ], inference_framework="tflite") + + def test_predict_with_custom_verifier_model(self): + with tempfile.TemporaryDirectory() as tmp_dir: + # Train custom verifier model with random data + verifier_model = openwakeword.custom_verifier_model.train_verifier_model(np.random.random((2, 10)), np.array([0, 1])) + pickle.dump(verifier_model, open(os.path.join(tmp_dir, "test_verifier.pkl"), "wb")) + + # Load model with verifier + owwModel = openwakeword.Model(wakeword_models=[ + os.path.join("openwakeword", "resources", "models", "alexa_v0.1.onnx") + ], inference_framework="onnx", + custom_verifier_models={"alexa_v0.1": os.path.join(tmp_dir, "test_verifier.pkl")}) + + owwModel.predict(np.random.randint(-1000, 1000, 1280).astype(np.int16)) + def test_load_pretrained_model_by_name(self): # Load model with defaults owwModel = openwakeword.Model(wakeword_models=["alexa", "hey mycroft"], inference_framework="onnx") From 3dbc16e11e529dbea6eccd1555072b74d89eba29 Mon Sep 17 00:00:00 2001 From: dscripka Date: Fri, 25 Aug 2023 16:56:17 -0400 Subject: [PATCH 3/5] Increased test coverage, fixed issue with multi-class models --- openwakeword/model.py | 15 +++++++++------ tests/test_models.py | 26 +++++++++++++++++++++----- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/openwakeword/model.py b/openwakeword/model.py index a392cac..a48450e 100755 --- a/openwakeword/model.py +++ b/openwakeword/model.py @@ -295,12 +295,15 @@ def predict(self, x: np.ndarray, patience: dict = {}, threshold: dict = {}, timi prediction = self.model_prediction_function[mdl]( self.preprocessor.get_features(self.model_inputs[mdl]) ) - elif n_prepared_samples < 1280: - if len(self.prediction_buffer[mdl]) > 0: - prediction = [[[self.prediction_buffer[mdl][-1]]]] - else: - for int_label, cls in self.class_mapping[mdl].items(): - prediction = [[[0]*(int(int_label)+1)]] + elif n_prepared_samples < 1280: # get previous prediction if there aren't enough samples + if self.model_outputs[mdl] == 1: + if len(self.prediction_buffer[mdl]) > 0: + prediction = [[[self.prediction_buffer[mdl][-1]]]] + else: + prediction = [[[0]]] + elif self.model_outputs[mdl] != 1: + n_classes = max([int(i) for i in self.class_mapping[mdl].keys()]) + prediction = [[[0]*(n_classes+1)]] if self.model_outputs[mdl] == 1: predictions[mdl] = prediction[0][0][0] diff --git a/tests/test_models.py b/tests/test_models.py index cf49db4..f3cd09d 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -56,6 +56,7 @@ def test_load_models_by_path(self): owwModel.predict(np.random.randint(-1000, 1000, 1280).astype(np.int16)) def test_predict_with_different_frame_sizes(self): + # Test with binary model owwModel = openwakeword.Model(wakeword_models=[ os.path.join("openwakeword", "resources", "models", "alexa_v0.1.onnx") ], inference_framework="onnx") @@ -68,6 +69,19 @@ def test_predict_with_different_frame_sizes(self): owwModel.predict(np.random.randint(-1000, 1000, 1024).astype(np.int16)) owwModel.predict(np.random.randint(-1000, 1000, 1024*2).astype(np.int16)) + # Test with multiclass model + owwModel = openwakeword.Model(wakeword_models=[ + os.path.join("openwakeword", "resources", "models", "timer_v0.1.onnx") + ], inference_framework="onnx") + + # Prediction on random data with integer multiples of standard chunk size (1280 samples) + owwModel.predict(np.random.randint(-1000, 1000, 1280).astype(np.int16)) + owwModel.predict(np.random.randint(-1000, 1000, 1280*2).astype(np.int16)) + + # Prediction on data with a chunk size not an integer multiple of 1280 + owwModel.predict(np.random.randint(-1000, 1000, 1024).astype(np.int16)) + owwModel.predict(np.random.randint(-1000, 1000, 1024*2).astype(np.int16)) + def test_exception_handling_for_inference_framework(self): with mock.patch.dict(sys.modules, {'onnxruntime': None}): with pytest.raises(ValueError): @@ -83,14 +97,16 @@ def test_exception_handling_for_inference_framework(self): def test_predict_with_custom_verifier_model(self): with tempfile.TemporaryDirectory() as tmp_dir: # Train custom verifier model with random data - verifier_model = openwakeword.custom_verifier_model.train_verifier_model(np.random.random((2, 10)), np.array([0, 1])) + verifier_model = openwakeword.custom_verifier_model.train_verifier_model(np.random.random((2, 1536)), np.array([0, 1])) pickle.dump(verifier_model, open(os.path.join(tmp_dir, "test_verifier.pkl"), "wb")) # Load model with verifier - owwModel = openwakeword.Model(wakeword_models=[ - os.path.join("openwakeword", "resources", "models", "alexa_v0.1.onnx") - ], inference_framework="onnx", - custom_verifier_models={"alexa_v0.1": os.path.join(tmp_dir, "test_verifier.pkl")}) + owwModel = openwakeword.Model( + wakeword_models=[os.path.join("openwakeword", "resources", "models", "alexa_v0.1.onnx")], + inference_framework="onnx", + custom_verifier_models={"alexa_v0.1": os.path.join(tmp_dir, "test_verifier.pkl")}, + custom_verifier_threshold=0.0 + ) owwModel.predict(np.random.randint(-1000, 1000, 1280).astype(np.int16)) From 7056d28a3e441238a6561b3dc6bf1de6d74f081e Mon Sep 17 00:00:00 2001 From: dscripka Date: Fri, 25 Aug 2023 22:07:12 -0400 Subject: [PATCH 4/5] Fixed bugs in handling of variable input data sizes and adjusted tests --- openwakeword/model.py | 2 +- openwakeword/utils.py | 29 +++++++++++++++++------------ tests/test_models.py | 31 +++++++++++++++++++------------ 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/openwakeword/model.py b/openwakeword/model.py index a48450e..46f603a 100755 --- a/openwakeword/model.py +++ b/openwakeword/model.py @@ -97,7 +97,7 @@ def __init__( raise ValueError("Could not find pretrained model for model name '{}'".format(i)) else: wakeword_models[ndx] = matching_model[0] - wakeword_model_names.append(matching_model[0].split(os.path.sep)[-1]) + wakeword_model_names.append(i) # Create attributes to store models and metadata self.models = {} diff --git a/openwakeword/utils.py b/openwakeword/utils.py index 1ad391e..c4f9b15 100644 --- a/openwakeword/utils.py +++ b/openwakeword/utils.py @@ -396,23 +396,28 @@ def _buffer_raw_data(self, x): def _streaming_features(self, x): # Add raw audio data to buffer, temporarily storing extra frames if not an even number of 80 ms chunks processed_samples = 0 + if self.raw_data_remainder.shape[0] != 0: x = np.concatenate((self.raw_data_remainder, x)) + self.raw_data_remainder = np.empty(0) - if x.shape[0] < 1280 and self.accumulated_samples == 0: - self._buffer_raw_data(x) - self.accumulated_samples += len(x) - - elif (x.shape[0] >= 1280 and self.accumulated_samples == 0) or \ - (self.accumulated_samples != 0 and self.accumulated_samples + x.shape[0] >= 1280): + if self.accumulated_samples + x.shape[0] >= 1280: remainder = (self.accumulated_samples + x.shape[0]) % 1280 - x_even_chunks = x[0:x.shape[0] - remainder] - self._buffer_raw_data(x_even_chunks) - self.accumulated_samples += len(x_even_chunks) - self.raw_data_remainder = x[x.shape[0] - remainder:] + if remainder != 0: + x_even_chunks = x[0:-remainder] + self._buffer_raw_data(x_even_chunks) + self.accumulated_samples += len(x_even_chunks) + self.raw_data_remainder = x[-remainder:] + elif remainder == 0: + self._buffer_raw_data(x) + self.accumulated_samples += x.shape[0] + self.raw_data_remainder = np.empty(0) + else: + self.accumulated_samples += x.shape[0] + self._buffer_raw_data(x) - # Only calculate melspectrogram once minimum samples area accumulated - if self.accumulated_samples >= 1280: + # Only calculate melspectrogram once minimum samples are accumulated + if self.accumulated_samples >= 1280 and self.accumulated_samples % 1280 == 0: self._streaming_melspectrogram(self.accumulated_samples) # Calculate new audio embeddings/features based on update melspectrograms diff --git a/tests/test_models.py b/tests/test_models.py index f3cd09d..04ef066 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -57,30 +57,37 @@ def test_load_models_by_path(self): def test_predict_with_different_frame_sizes(self): # Test with binary model - owwModel = openwakeword.Model(wakeword_models=[ + owwModel1 = openwakeword.Model(wakeword_models=[ + os.path.join("openwakeword", "resources", "models", "alexa_v0.1.onnx") + ], inference_framework="onnx") + + owwModel2 = openwakeword.Model(wakeword_models=[ os.path.join("openwakeword", "resources", "models", "alexa_v0.1.onnx") ], inference_framework="onnx") # Prediction on random data with integer multiples of standard chunk size (1280 samples) - owwModel.predict(np.random.randint(-1000, 1000, 1280).astype(np.int16)) - owwModel.predict(np.random.randint(-1000, 1000, 1280*2).astype(np.int16)) + predictions1 = owwModel1.predict_clip(os.path.join("tests", "data", "alexa_test.wav"), chunk_size=1280) + predictions2 = owwModel2.predict_clip(os.path.join("tests", "data", "alexa_test.wav"), chunk_size=1280*2) + np.testing.assert_approx_equal(max([i['alexa_v0.1'] for i in predictions1]), max([i['alexa_v0.1'] for i in predictions2]), 5) # Prediction on data with a chunk size not an integer multiple of 1280 - owwModel.predict(np.random.randint(-1000, 1000, 1024).astype(np.int16)) - owwModel.predict(np.random.randint(-1000, 1000, 1024*2).astype(np.int16)) + predictions1 = owwModel1.predict_clip(os.path.join("tests", "data", "alexa_test.wav"), chunk_size=1024) + predictions2 = owwModel2.predict_clip(os.path.join("tests", "data", "alexa_test.wav"), chunk_size=1024*2) + np.testing.assert_approx_equal(max([i['alexa_v0.1'] for i in predictions1]), max([i['alexa_v0.1'] for i in predictions2]), 5) # Test with multiclass model - owwModel = openwakeword.Model(wakeword_models=[ - os.path.join("openwakeword", "resources", "models", "timer_v0.1.onnx") - ], inference_framework="onnx") + owwModel1 = openwakeword.Model(wakeword_models=["timer"], inference_framework="onnx") + owwModel2 = openwakeword.Model(wakeword_models=["timer"], inference_framework="onnx") # Prediction on random data with integer multiples of standard chunk size (1280 samples) - owwModel.predict(np.random.randint(-1000, 1000, 1280).astype(np.int16)) - owwModel.predict(np.random.randint(-1000, 1000, 1280*2).astype(np.int16)) + predictions1 = owwModel1.predict_clip(os.path.join("tests", "data", "alexa_test.wav"), chunk_size=1280) + predictions2 = owwModel2.predict_clip(os.path.join("tests", "data", "alexa_test.wav"), chunk_size=1280*2) + assert abs(max([i['1_minute_timer'] for i in predictions1]) - max([i['1_minute_timer'] for i in predictions2])) < 0.00001 # Prediction on data with a chunk size not an integer multiple of 1280 - owwModel.predict(np.random.randint(-1000, 1000, 1024).astype(np.int16)) - owwModel.predict(np.random.randint(-1000, 1000, 1024*2).astype(np.int16)) + predictions1 = owwModel1.predict_clip(os.path.join("tests", "data", "alexa_test.wav"), chunk_size=1024) + predictions2 = owwModel2.predict_clip(os.path.join("tests", "data", "alexa_test.wav"), chunk_size=1024*2) + assert abs(max([i['1_minute_timer'] for i in predictions1]) - max([i['1_minute_timer'] for i in predictions2])) < 0.00001 def test_exception_handling_for_inference_framework(self): with mock.patch.dict(sys.modules, {'onnxruntime': None}): From ee0a31891e7c7e9c712e546f457654e01ee1e897 Mon Sep 17 00:00:00 2001 From: dscripka Date: Fri, 25 Aug 2023 22:25:40 -0400 Subject: [PATCH 5/5] Adjusted tests to remove dependencies on optional libraries (fixes #32) --- tests/test_models.py | 61 ++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index 04ef066..c38ecb8 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -30,6 +30,7 @@ import openwakeword import os import sys +import logging import numpy as np from pathlib import Path import collections @@ -172,31 +173,37 @@ def test_models_with_speex_noise_cancellation(self): assert 1 == 1 else: # Load model with defaults - owwModel = openwakeword.Model(enable_speex_noise_suppression=True) - - # Get clips for each model (assumes that test clips will have the model name in the filename) - test_dict = {} - for mdl_name in owwModel.models.keys(): - all_clips = [str(i) for i in Path(os.path.join("tests", "data")).glob("*.wav")] - test_dict[mdl_name] = [i for i in all_clips if mdl_name in i] - - # Predict - for model, clips in test_dict.items(): - for clip in clips: - # Get predictions for reach frame in the clip - predictions = owwModel.predict_clip(clip) - owwModel.reset() # reset after each clip to ensure independent results - - # Make predictions dictionary flatter - predictions_flat = collections.defaultdict(list) - [predictions_flat[key].append(i[key]) for i in predictions for key in i.keys()] - - # Check scores against default threshold (0.5) - for key in predictions_flat.keys(): - if key in clip: - assert max(predictions_flat[key]) >= 0.5 - else: - assert max(predictions_flat[key]) < 0.5 + try: + owwModel = openwakeword.Model(enable_speex_noise_suppression=True) + + # Get clips for each model (assumes that test clips will have the model name in the filename) + test_dict = {} + for mdl_name in owwModel.models.keys(): + all_clips = [str(i) for i in Path(os.path.join("tests", "data")).glob("*.wav")] + test_dict[mdl_name] = [i for i in all_clips if mdl_name in i] + + # Predict + for model, clips in test_dict.items(): + for clip in clips: + # Get predictions for reach frame in the clip + predictions = owwModel.predict_clip(clip) + owwModel.reset() # reset after each clip to ensure independent results + + # Make predictions dictionary flatter + predictions_flat = collections.defaultdict(list) + [predictions_flat[key].append(i[key]) for i in predictions for key in i.keys()] + + # Check scores against default threshold (0.5) + for key in predictions_flat.keys(): + if key in clip: + assert max(predictions_flat[key]) >= 0.5 + else: + assert max(predictions_flat[key]) < 0.5 + except ImportError: + logging.warning("Attemped to test Speex noise cancelling functionality, but the 'speexdsp_ns' library was not installed!" + " If you want these tests to be run, install this library as shown in the openwakeword documentation." + ) + assert 1 == 1 def test_models_with_vad(self): # Load model with defaults @@ -264,8 +271,8 @@ def test_get_parent_model_from_prediction_label(self): def test_get_positive_prediction_frames(self): owwModel = openwakeword.Model(wakeword_models=[ - os.path.join("openwakeword", "resources", "models", "alexa_v0.1.tflite") - ], inference_framework="tflite") + os.path.join("openwakeword", "resources", "models", "alexa_v0.1.onnx") + ], inference_framework="onnx") clip = os.path.join("tests", "data", "alexa_test.wav") features = owwModel._get_positive_prediction_frames(clip)