From c63384489e48040f0a7f30a2e5d3ba4bb047c6c7 Mon Sep 17 00:00:00 2001
From: dscripka <david.scripka@gmail.com>
Date: Sun, 11 Feb 2024 12:04:41 -0500
Subject: [PATCH 1/4] Added basic debounce logic for model.predict

---
 openwakeword/model.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/openwakeword/model.py b/openwakeword/model.py
index 6ae820c..97d1303 100755
--- a/openwakeword/model.py
+++ b/openwakeword/model.py
@@ -227,7 +227,8 @@ def reset(self):
         """Reset the prediction buffer"""
         self.prediction_buffer = defaultdict(partial(deque, maxlen=30))
 
-    def predict(self, x: np.ndarray, patience: dict = {}, threshold: dict = {}, timing: bool = False):
+    def predict(self, x: np.ndarray, patience: dict = {},
+                threshold: dict = {}, debounce_time: float = 0.0, timing: bool = False):
         """Predict with all of the wakeword models on the input audio frames
 
         Args:
@@ -242,9 +243,11 @@ def predict(self, x: np.ndarray, patience: dict = {}, threshold: dict = {}, timi
                              model names and the values are the number of frames. Can reduce false-positive
                              detections at the cost of a lower true-positive rate.
                              By default, this behavior is disabled.
-            threshold (dict): The threshold values to use when the `patience` behavior is enabled.
+            threshold (dict): The threshold values to use when the `patience` or `debounce_time` behavior is enabled.
                               Must be provided as an a dictionary where the keys are the
                               model names and the values are the thresholds.
+            debounce_time (float): The time (in seconds) to wait before returning another non-zero prediction
+                                   after a non-zero prediction. Can preven multiple detections of the same wake-word.
             timing (bool): Whether to return timing information of the models. Can be useful to debug and
                            assess how efficiently models are running on the current hardware.
 
@@ -333,16 +336,22 @@ def predict(self, x: np.ndarray, patience: dict = {}, threshold: dict = {}, timi
                 timing_dict["models"][mdl] = time.time() - model_start
 
         # Update scores based on thresholds or patience arguments
-        if patience != {}:
+        if patience != {} or debounce_time > 0:
             if threshold == {}:
                 raise ValueError("Error! When using the `patience` argument, threshold "
                                  "values must be provided via the `threshold` argument!")
+            if patience != {} and debounce_time > 0:
+                raise ValueError("Error! The `patience` and `debounce_time` arguments cannot be used together!")
             for mdl in predictions.keys():
                 parent_model = self.get_parent_model_from_label(mdl)
                 if parent_model in patience.keys():
                     scores = np.array(self.prediction_buffer[mdl])[-patience[parent_model]:]
                     if (scores >= threshold[parent_model]).sum() < patience[parent_model]:
                         predictions[mdl] = 0.0
+                if debounce_time > 0:
+                    n_frames = int(debounce_time*1000/80)
+                    if (np.array(self.prediction_buffer[mdl])[-n_frames:] >= threshold[parent_model]).sum() > 0:
+                        predictions[mdl] = 0.0
 
         # (optionally) get voice activity detection scores and update model scores
         if self.vad_threshold > 0:

From 68e88c1350113a1e70afc8cb64e8d9d120db619f Mon Sep 17 00:00:00 2001
From: dscripka <david.scripka@gmail.com>
Date: Sun, 11 Feb 2024 12:08:03 -0500
Subject: [PATCH 2/4] Added/fixed reset methods

---
 openwakeword/model.py |  4 +++-
 openwakeword/utils.py | 10 +++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/openwakeword/model.py b/openwakeword/model.py
index 97d1303..8f2ef42 100755
--- a/openwakeword/model.py
+++ b/openwakeword/model.py
@@ -224,8 +224,10 @@ def get_parent_model_from_label(self, label):
         return parent_model
 
     def reset(self):
-        """Reset the prediction buffer"""
+        """Reset the prediction and audio feature buffers. Useful for re-initializing the model, though may not be efficient
+        when called too frequently."""
         self.prediction_buffer = defaultdict(partial(deque, maxlen=30))
+        self.preprocessor.reset()
 
     def predict(self, x: np.ndarray, patience: dict = {},
                 threshold: dict = {}, debounce_time: float = 0.0, timing: bool = False):
diff --git a/openwakeword/utils.py b/openwakeword/utils.py
index 8da8048..4964706 100644
--- a/openwakeword/utils.py
+++ b/openwakeword/utils.py
@@ -160,7 +160,7 @@ def tflite_embedding_predict(x):
 
             self.embedding_model_predict = tflite_embedding_predict
 
-        # Create databuffers
+        # Create databuffers with empty/random data
         self.raw_data_buffer: Deque = deque(maxlen=sr*10)
         self.melspectrogram_buffer = np.ones((76, 32))  # n_frames x num_features
         self.melspectrogram_max_len = 10*97  # 97 is the number of frames in 1 second of 16hz audio
@@ -169,6 +169,14 @@ def tflite_embedding_predict(x):
         self.feature_buffer = self._get_embeddings(np.random.randint(-1000, 1000, 16000*4).astype(np.int16))
         self.feature_buffer_max_len = 120  # ~10 seconds of feature buffer history
 
+    def reset(self):
+        """Reset the internal buffers"""
+        self.raw_data_buffer.clear()
+        self.melspectrogram_buffer = np.ones((76, 32))
+        self.accumulated_samples = 0
+        self.raw_data_remainder = np.empty(0)
+        self.feature_buffer = self._get_embeddings(np.random.randint(-1000, 1000, 16000*4).astype(np.int16))
+
     def _get_melspectrogram(self, x: Union[np.ndarray, List], melspec_transform: Callable = lambda x: x/10 + 2):
         """
         Function to compute the mel-spectrogram of the provided audio samples.

From 528f4bff2cb78f3aae594c6da127a99f312ae5cf Mon Sep 17 00:00:00 2001
From: dscripka <david.scripka@gmail.com>
Date: Sun, 11 Feb 2024 12:45:59 -0500
Subject: [PATCH 3/4] tests for debounce functionality

---
 openwakeword/model.py | 27 +++++++++++++++++----------
 tests/test_models.py  | 19 +++++++++++++++++++
 2 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/openwakeword/model.py b/openwakeword/model.py
index 8f2ef42..6029963 100755
--- a/openwakeword/model.py
+++ b/openwakeword/model.py
@@ -327,11 +327,10 @@ def predict(self, x: np.ndarray, patience: dict = {},
                             )[0][-1]
                             predictions[cls] = verifier_prediction
 
-            # Update prediction buffer, and zero predictions for first 5 frames during model initialization
+            # Zero predictions for first 5 frames during model initialization
             for cls in predictions.keys():
                 if len(self.prediction_buffer[cls]) < 5:
                     predictions[cls] = 0.0
-                self.prediction_buffer[cls].append(predictions[cls])
 
             # Get timing information
             if timing:
@@ -346,14 +345,22 @@ def predict(self, x: np.ndarray, patience: dict = {},
                 raise ValueError("Error! The `patience` and `debounce_time` arguments cannot be used together!")
             for mdl in predictions.keys():
                 parent_model = self.get_parent_model_from_label(mdl)
-                if parent_model in patience.keys():
-                    scores = np.array(self.prediction_buffer[mdl])[-patience[parent_model]:]
-                    if (scores >= threshold[parent_model]).sum() < patience[parent_model]:
-                        predictions[mdl] = 0.0
-                if debounce_time > 0:
-                    n_frames = int(debounce_time*1000/80)
-                    if (np.array(self.prediction_buffer[mdl])[-n_frames:] >= threshold[parent_model]).sum() > 0:
-                        predictions[mdl] = 0.0
+                if predictions[mdl] != 0.0:
+                    if parent_model in patience.keys():
+                        scores = np.array(self.prediction_buffer[mdl])[-patience[parent_model]:]
+                        if (scores >= threshold[parent_model]).sum() < patience[parent_model]:
+                            predictions[mdl] = 0.0
+                    elif debounce_time > 0:
+                        if parent_model in threshold.keys():
+                            n_frames = int(np.ceil(debounce_time/(n_prepared_samples/16000)))
+                            recent_predictions = np.array(self.prediction_buffer[mdl])[-n_frames:]
+                            if predictions[mdl] >= threshold[parent_model] and \
+                               (recent_predictions >= threshold[parent_model]).sum() > 0:
+                                predictions[mdl] = 0.0
+
+        # Update prediction buffer
+        for mdl in predictions.keys():
+            self.prediction_buffer[mdl].append(predictions[mdl])
 
         # (optionally) get voice activity detection scores and update model scores
         if self.vad_threshold > 0:
diff --git a/tests/test_models.py b/tests/test_models.py
index e728065..fb6defd 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -208,6 +208,25 @@ def test_models_with_speex_noise_cancellation(self):
                                 )
                 assert 1 == 1
 
+    def test_models_with_debounce(self):
+        # Load model with defaults
+        owwModel = openwakeword.Model()
+
+        # Get test clip
+        os.path.join("tests", "data", "alexa_test.wav")
+
+        # Predict with chunks of 1280 with and without debounce
+        predictions = owwModel.predict_clip(os.path.join("tests", "data", "alexa_test.wav"),
+                                            debounce_time=0, threshold={"alexa_v0.1": 0.5})
+        scores = np.array([i['alexa'] for i in predictions])
+
+        predictions = owwModel.predict_clip(os.path.join("tests", "data", "alexa_test.wav"),
+                                            debounce_time=1.25, threshold={"alexa": 0.5})
+        scores_with_debounce = np.array([i['alexa'] for i in predictions])
+        print(scores, scores_with_debounce)
+        assert (scores >= 0.5).sum() > 1
+        assert (scores_with_debounce >= 0.5).sum() == 1
+
     def test_models_with_vad(self):
         # Load model with defaults
         owwModel = openwakeword.Model(vad_threshold=0.5)

From e9eade7aacb67154a1bde1b0e766f881eb73fcc6 Mon Sep 17 00:00:00 2001
From: dscripka <david.scripka@gmail.com>
Date: Sun, 11 Feb 2024 15:08:27 -0500
Subject: [PATCH 4/4] Added tests for reset methods

---
 tests/test_models.py | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/tests/test_models.py b/tests/test_models.py
index fb6defd..b3907ff 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -39,6 +39,7 @@
 import pickle
 import tempfile
 import mock
+import wave
 
 # Download models needed for tests
 openwakeword.utils.download_models()
@@ -212,9 +213,6 @@ def test_models_with_debounce(self):
         # Load model with defaults
         owwModel = openwakeword.Model()
 
-        # Get test clip
-        os.path.join("tests", "data", "alexa_test.wav")
-
         # Predict with chunks of 1280 with and without debounce
         predictions = owwModel.predict_clip(os.path.join("tests", "data", "alexa_test.wav"),
                                             debounce_time=0, threshold={"alexa_v0.1": 0.5})
@@ -227,6 +225,32 @@ def test_models_with_debounce(self):
         assert (scores >= 0.5).sum() > 1
         assert (scores_with_debounce >= 0.5).sum() == 1
 
+    def test_model_reset(self):
+        # Load the model
+        owwModel = openwakeword.Model()
+
+        # Get test clip and load it
+        clip = os.path.join("tests", "data", "alexa_test.wav")
+        with wave.open(clip, mode='rb') as f:
+            data = np.frombuffer(f.readframes(f.getnframes()), dtype=np.int16)
+
+        # Predict frame by frame
+        for i in range(0, len(data), 1280):
+            prediction = owwModel.predict(data[i:i+1280])
+            if prediction['alexa'] > 0.5:
+                break
+
+        # Assert that next prediction is still > 0.5
+        prediction = owwModel.predict(data[i:i+1280])
+        assert prediction['alexa'] > 0.5
+
+        # Reset the model
+        owwModel.reset()
+
+        # Assert that next prediction is < 0.5
+        prediction = owwModel.predict(data[i:i+1280])
+        assert prediction['alexa'] < 0.5
+
     def test_models_with_vad(self):
         # Load model with defaults
         owwModel = openwakeword.Model(vad_threshold=0.5)