Correctly set output_size when hidden_cell=None

nengo · Jun 18, 2024 · ae66e91 · ae66e91
1 parent 4d7fe22
commit ae66e91
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 4 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -24,6 +24,14 @@ Release history
 
 *Compatible with TensorFlow 2.6 - 2.16*
 
+**Added**
+
+- Added an ``input_d`` parameter to ``LMUCell``. This only needs to be specified
+  when ``hidden_cell=None`` and ``input_to_hidden=True``; in that scenario it is
+  required in order to accurately set ``LMUCell.output_size``. (`#56`_)
+
+.. _#56: https://github.com/nengo/keras-lmu/pull/56
+
 0.7.0 (July 20, 2023)
 =====================
 

diff --git a/keras_lmu/layers.py b/keras_lmu/layers.py
@@ -101,6 +101,10 @@ class to create a recurrent Keras layer to process the whole sequence. Calling
         Dropout rate on input connections.
     recurrent_dropout : float
         Dropout rate on ``memory_to_memory`` connection.
+    input_d : Optional[int]
+        Size of last axis on input signals. This only needs to be specified if
+        hidden_cell=None and input_to_hidden=True, otherwise the input dimensionality
+        can be inferred dynamically.
 
     References
     ----------
@@ -132,6 +136,7 @@ def __init__(
         bias_regularizer=None,
         dropout=0,
         recurrent_dropout=0,
+        input_d=None,
         seed=None,
         **kwargs,
     ):
@@ -155,6 +160,7 @@ def __init__(
         self.bias_regularizer = bias_regularizer
         self.dropout = dropout
         self.recurrent_dropout = recurrent_dropout
+        self.input_d = input_d
         self.seed = seed
         if tf_version >= version.parse("2.16.0"):
             self.seed_generator = keras.random.SeedGenerator(seed)
@@ -178,6 +184,15 @@ def __init__(
                 )
 
             self.hidden_output_size = self.memory_d * self.order
+
+            if self.input_to_hidden:
+                if self.input_d is None:
+                    raise ValueError(
+                        "input_d must be specified when setting input_to_hidden=True "
+                        "with hidden_cell=None"
+                    )
+                self.hidden_output_size += self.input_d
+
             self.hidden_state_size = []
         elif hasattr(self.hidden_cell, "state_size"):
             self.hidden_output_size = self.hidden_cell.output_size
@@ -272,6 +287,12 @@ def build(self, input_shape):
 
         super().build(input_shape)
 
+        if self.input_d is not None and input_shape[-1] != self.input_d:
+            raise ValueError(
+                f"Input dimensionality ({input_shape[-1]}) does not match expected "
+                f"dimensionality ({self.input_d})"
+            )
+
         enc_d = input_shape[-1]
         if self.hidden_to_memory:
             enc_d += self.hidden_output_size
@@ -484,6 +505,7 @@ def get_config(self):
                 "bias_regularizer": self.bias_regularizer,
                 "dropout": self.dropout,
                 "recurrent_dropout": self.recurrent_dropout,
+                "input_d": self.input_d,
                 "seed": self.seed,
             }
         )

diff --git a/keras_lmu/tests/test_layers.py b/keras_lmu/tests/test_layers.py
@@ -308,6 +308,12 @@ def test_validation_errors():
     with pytest.raises(ValueError, match="Unrecognized conv mode"):
         layers.LMUFeedforward(1, 2, 3, None, conv_mode="raw_bad")
 
+    with pytest.raises(ValueError, match="input_d must be specified"):
+        layers.LMUCell(1, 2, 3, None, input_d=None, input_to_hidden=True)
+
+    with pytest.raises(ValueError, match="does not match expected dimensionality"):
+        layers.LMUCell(1, 2, 3, None, input_d=1).build((1, 1, 2))
+
 
 @pytest.mark.parametrize(
     "should_use_feedforward, hidden_to_memory, memory_to_memory, trainable_theta",
@@ -381,7 +387,8 @@ def test_hidden_types(hidden_cell, feedforward, rng):
 
 @pytest.mark.parametrize("feedforward", (True, False))
 @pytest.mark.parametrize("hidden_cell", (None, keras.layers.Dense))
-def test_connection_params(feedforward, hidden_cell):
+@pytest.mark.parametrize("input_to_hidden", (True, False))
+def test_connection_params(feedforward, hidden_cell, input_to_hidden):
     input_shape = (32, 7 if feedforward else None, 6)
 
     x = keras.Input(batch_shape=input_shape)
@@ -391,12 +398,13 @@ def test_connection_params(feedforward, hidden_cell):
         "order": 3,
         "theta": 4,
         "hidden_cell": hidden_cell if hidden_cell is None else hidden_cell(units=5),
-        "input_to_hidden": hidden_cell is not None,
+        "input_to_hidden": input_to_hidden,
     }
 
     if not feedforward:
         lmu_args["hidden_to_memory"] = hidden_cell is not None
         lmu_args["memory_to_memory"] = True
+        lmu_args["input_d"] = input_shape[-1]
 
     lmu = (
         layers.LMUCell(**lmu_args)
@@ -416,13 +424,19 @@ def test_connection_params(feedforward, hidden_cell):
         )
     if hidden_cell is not None:
         assert lmu.hidden_cell.kernel.shape == (
-            lmu.memory_d * lmu.order + input_shape[-1],
+            lmu.memory_d * lmu.order + (input_shape[-1] if input_to_hidden else 0),
             lmu.hidden_cell.units,
         )
     assert y.shape == (
         input_shape[0],
-        lmu.memory_d * lmu.order if hidden_cell is None else lmu.hidden_cell.units,
+        (
+            (lmu.memory_d * lmu.order + (input_shape[-1] if input_to_hidden else 0))
+            if hidden_cell is None
+            else lmu.hidden_cell.units
+        ),
     )
+    if not feedforward:
+        assert lmu.output_size == y.shape[-1]
 
 
 @pytest.mark.parametrize(