From ae66e911d7baeab683fdb344b0871df31dd360f6 Mon Sep 17 00:00:00 2001 From: Daniel Rasmussen Date: Fri, 19 Apr 2024 17:06:05 -0300 Subject: [PATCH] Correctly set output_size when hidden_cell=None --- CHANGES.rst | 8 ++++++++ keras_lmu/layers.py | 22 ++++++++++++++++++++++ keras_lmu/tests/test_layers.py | 22 ++++++++++++++++++---- 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 2425b45..e633490 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -24,6 +24,14 @@ Release history *Compatible with TensorFlow 2.6 - 2.16* +**Added** + +- Added an ``input_d`` parameter to ``LMUCell``. This only needs to be specified + when ``hidden_cell=None`` and ``input_to_hidden=True``; in that scenario it is + required in order to accurately set ``LMUCell.output_size``. (`#56`_) + +.. _#56: https://github.com/nengo/keras-lmu/pull/56 + 0.7.0 (July 20, 2023) ===================== diff --git a/keras_lmu/layers.py b/keras_lmu/layers.py index 63783d0..22b2003 100644 --- a/keras_lmu/layers.py +++ b/keras_lmu/layers.py @@ -101,6 +101,10 @@ class to create a recurrent Keras layer to process the whole sequence. Calling Dropout rate on input connections. recurrent_dropout : float Dropout rate on ``memory_to_memory`` connection. + input_d : Optional[int] + Size of last axis on input signals. This only needs to be specified if + hidden_cell=None and input_to_hidden=True, otherwise the input dimensionality + can be inferred dynamically. References ---------- @@ -132,6 +136,7 @@ def __init__( bias_regularizer=None, dropout=0, recurrent_dropout=0, + input_d=None, seed=None, **kwargs, ): @@ -155,6 +160,7 @@ def __init__( self.bias_regularizer = bias_regularizer self.dropout = dropout self.recurrent_dropout = recurrent_dropout + self.input_d = input_d self.seed = seed if tf_version >= version.parse("2.16.0"): self.seed_generator = keras.random.SeedGenerator(seed) @@ -178,6 +184,15 @@ def __init__( ) self.hidden_output_size = self.memory_d * self.order + + if self.input_to_hidden: + if self.input_d is None: + raise ValueError( + "input_d must be specified when setting input_to_hidden=True " + "with hidden_cell=None" + ) + self.hidden_output_size += self.input_d + self.hidden_state_size = [] elif hasattr(self.hidden_cell, "state_size"): self.hidden_output_size = self.hidden_cell.output_size @@ -272,6 +287,12 @@ def build(self, input_shape): super().build(input_shape) + if self.input_d is not None and input_shape[-1] != self.input_d: + raise ValueError( + f"Input dimensionality ({input_shape[-1]}) does not match expected " + f"dimensionality ({self.input_d})" + ) + enc_d = input_shape[-1] if self.hidden_to_memory: enc_d += self.hidden_output_size @@ -484,6 +505,7 @@ def get_config(self): "bias_regularizer": self.bias_regularizer, "dropout": self.dropout, "recurrent_dropout": self.recurrent_dropout, + "input_d": self.input_d, "seed": self.seed, } ) diff --git a/keras_lmu/tests/test_layers.py b/keras_lmu/tests/test_layers.py index 3e77096..ab5bfb1 100644 --- a/keras_lmu/tests/test_layers.py +++ b/keras_lmu/tests/test_layers.py @@ -308,6 +308,12 @@ def test_validation_errors(): with pytest.raises(ValueError, match="Unrecognized conv mode"): layers.LMUFeedforward(1, 2, 3, None, conv_mode="raw_bad") + with pytest.raises(ValueError, match="input_d must be specified"): + layers.LMUCell(1, 2, 3, None, input_d=None, input_to_hidden=True) + + with pytest.raises(ValueError, match="does not match expected dimensionality"): + layers.LMUCell(1, 2, 3, None, input_d=1).build((1, 1, 2)) + @pytest.mark.parametrize( "should_use_feedforward, hidden_to_memory, memory_to_memory, trainable_theta", @@ -381,7 +387,8 @@ def test_hidden_types(hidden_cell, feedforward, rng): @pytest.mark.parametrize("feedforward", (True, False)) @pytest.mark.parametrize("hidden_cell", (None, keras.layers.Dense)) -def test_connection_params(feedforward, hidden_cell): +@pytest.mark.parametrize("input_to_hidden", (True, False)) +def test_connection_params(feedforward, hidden_cell, input_to_hidden): input_shape = (32, 7 if feedforward else None, 6) x = keras.Input(batch_shape=input_shape) @@ -391,12 +398,13 @@ def test_connection_params(feedforward, hidden_cell): "order": 3, "theta": 4, "hidden_cell": hidden_cell if hidden_cell is None else hidden_cell(units=5), - "input_to_hidden": hidden_cell is not None, + "input_to_hidden": input_to_hidden, } if not feedforward: lmu_args["hidden_to_memory"] = hidden_cell is not None lmu_args["memory_to_memory"] = True + lmu_args["input_d"] = input_shape[-1] lmu = ( layers.LMUCell(**lmu_args) @@ -416,13 +424,19 @@ def test_connection_params(feedforward, hidden_cell): ) if hidden_cell is not None: assert lmu.hidden_cell.kernel.shape == ( - lmu.memory_d * lmu.order + input_shape[-1], + lmu.memory_d * lmu.order + (input_shape[-1] if input_to_hidden else 0), lmu.hidden_cell.units, ) assert y.shape == ( input_shape[0], - lmu.memory_d * lmu.order if hidden_cell is None else lmu.hidden_cell.units, + ( + (lmu.memory_d * lmu.order + (input_shape[-1] if input_to_hidden else 0)) + if hidden_cell is None + else lmu.hidden_cell.units + ), ) + if not feedforward: + assert lmu.output_size == y.shape[-1] @pytest.mark.parametrize(