diff --git a/configs/experiment/m2/feat_locsconfs_residualLinear.yaml b/configs/experiment/m2/feat_locsconfs_residualLinear.yaml
new file mode 100644
index 000000000..99479fe74
--- /dev/null
+++ b/configs/experiment/m2/feat_locsconfs_residualLinear.yaml
@@ -0,0 +1,136 @@
+# @package _global_
+
+defaults:
+  - override /data: ptg
+  - override /model: ptg
+  - override /callbacks: default
+  - override /trainer: gpu
+  - override /paths: default
+  #- override /logger: aim
+  - override /logger: csv
+
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+
+# Change this name to something descriptive and unique for this experiment.
+# This will differentiate the run logs and output to be separate from other
+# experiments that may have been run under the configured
+# Setting this value influences:
+# - the name of the directory under `${paths.root_dir}/logs/` in which training
+#   run files are stored.
+# Default is "train" set in the "configs/train.yaml" file.
+#task_name:
+
+# simply provide checkpoint path to resume training
+#ckpt_path: null
+
+tags: ["m2", "ms_tcn", "debug"]
+
+seed: 12345
+
+trainer:
+  min_epochs: 50
+  max_epochs: 500
+  log_every_n_steps: 1
+
+model:
+  _target_: tcn_hpl.models.components.ms_tcs_net.MultiStageModelResidual
+  num_classes: 9  # number of activity classification classes
+  linear_single_stage: true
+  num_layers: 24  # window_size - 1, for largest residual
+  do_stage_residual: true
+  do_stage_softmax: false
+  compile: false
+  scheduler:
+    # Code change to track train/loss instead of val/loss.
+    factor: 0.9
+    patience: 10
+  net:
+    # Length of feature vector for a single frame.
+    # Currently derived from the parameterization of dataset vectorizer.
+    dim: 102
+
+data:
+  coco_train_activities: "${paths.coco_file_root}/TRAIN-activity_truth.coco.json"
+  coco_train_objects: "${paths.coco_file_root}/TRAIN-object_detections.coco.json"
+  coco_train_poses: "${paths.coco_file_root}/TRAIN-pose_estimations.coco.json"
+
+  coco_validation_activities: "${paths.coco_file_root}/VALIDATION-activity_truth.coco.json"
+  coco_validation_objects: "${paths.coco_file_root}/VALIDATION-object_detections.coco.json"
+  coco_validation_poses: "${paths.coco_file_root}/VALIDATION-pose_estimations.coco.json"
+
+  coco_test_activities: "${paths.coco_file_root}/TEST-activity_truth.coco.json"
+  coco_test_objects: "${paths.coco_file_root}/TEST-object_detections.coco.json"
+  coco_test_poses: "${paths.coco_file_root}/TEST-pose_estimations.coco.json"
+
+  batch_size: 512
+  num_workers: 16
+  target_framerate: 15  # BBN Hololens2 Framerate
+  epoch_sample_factor: 1  # 1x the dataset size iterations for train/val
+
+  train_dataset:
+    window_size: 25
+    window_label_idx: ${model.pred_frame_index}
+    vectorize:
+      _target_: tcn_hpl.data.vectorize.locs_and_confs.LocsAndConfs
+      top_k: 1
+      num_classes: 7
+      use_joint_confs: True
+      use_pixel_norm: True
+      use_joint_obj_offsets: False
+      background_idx: 0
+    # Augmentations on windows of frame data before performing vectorization.
+    transform_frame_data:
+      transforms:
+        - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform
+          # These parameters are a fudge for now to experiment. Window presence
+          # looks qualitatively right with what we're seeing live.
+          frame_rate: ${data.target_framerate}
+          dets_throughput_mean: 14.5
+          pose_throughput_mean: 10
+          dets_latency: 0
+          pose_latency: 0.1
+          dets_throughput_std: 0.2
+          pose_throughput_std: 0.2
+          fixed_pattern: false
+        - _target_: tcn_hpl.data.frame_data_aug.rotate_scale_translate_jitter.FrameDataRotateScaleTranslateJitter
+          translate: 0.05
+          scale: [0.9, 1.1]
+          rotate: [-5, 5]
+          det_loc_jitter: 0.02
+          det_wh_jitter: 0.02
+          pose_kp_loc_jitter: 0.005
+          dets_score_jitter: 0.
+          pose_score_jitter: 0.
+          pose_kp_score_jitter: 0.
+  val_dataset:
+    # Augmentations on windows of frame data before performing vectorization.
+    # Sharing transform with training dataset as it is only the drop-out aug to
+    # simulate stream processing dropout the same.
+    transform_frame_data:
+      transforms:
+        - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform
+          # Mirror training hparams, except used fixed patterns.
+          frame_rate: ${data.target_framerate}
+          dets_throughput_mean: 14.5
+          pose_throughput_mean: 10
+          dets_latency: 0
+          pose_latency: 0.1
+          dets_throughput_std: 0.2
+          pose_throughput_std: 0.2
+          fixed_pattern: true
+  # Test dataset usually configured the same as val, unless there is some
+  # different set of transforms that should be used during test/prediction.
+
+paths:
+  # Base directory for training outputs.
+  root_dir: "/home/local/KHQ/cameron.johnson/code/TCN_HPL/tcn_hpl/train-TCN-M2_bbn_hololens/training_root"
+
+  # Convenience variable to where your train/val/test split COCO file datasets
+  # are stored.
+  coco_file_root: ${paths.root_dir}
+
+#logger:
+#  aim:
+#    experiment: ${task_name}
+#    capture_terminal_logs: true
diff --git a/tcn_hpl/models/components/ms_tcs_net.py b/tcn_hpl/models/components/ms_tcs_net.py
index c9508f80a..718f18746 100644
--- a/tcn_hpl/models/components/ms_tcs_net.py
+++ b/tcn_hpl/models/components/ms_tcs_net.py
@@ -56,10 +56,103 @@ def forward(self, x, mask):
         # Bring it back to input shape [batch_size, feat_dim, window_size]
         x = einops.rearrange(re_x, "b w d -> b d w")
 
+        # input here is not being softmaxed because dim1 is feature inputs, not
+        # predictions.
         out = self.stage1(x, mask)
+        # out shape: (batch_size, num_classes, window_size)
         outputs = out.unsqueeze(0)
         for s in self.stages:
             out = s(F.softmax(out, dim=1) * mask[:, None, :], mask)
+            # out shape: (batch_size, num_classes, window_size)
+            outputs = torch.cat((outputs, out.unsqueeze(0)), dim=0)
+
+        return outputs
+
+
+class MultiStageModelResidual(nn.Module):
+    """
+    Similar to the MultiStageModel class, however stages after the first is
+    added to the output of the previous.
+
+    """
+    def __init__(
+        self,
+        fc_sequence_dims: Sequence[int],
+        fc_sequence_dropout_p: float,
+        num_stages: int,
+        num_layers: int,
+        num_f_maps: int,
+        dim: int,
+        num_classes: int,
+        linear_single_stage: bool = True,
+        do_stage_residual: bool = True,
+        do_stage_softmax: bool = False,
+    ):
+        """Initialize a `MultiStageModel` module.
+
+        :param fc_sequence_dims: Create N*2 linear layers with u-net-like skip
+            connections connecting inputs and outputs of the same dimensions.
+            If an empty sequence is provided, then no FC layers are created
+        :param fc_sequence_dropout_p: P-value for drop-out layers utilized in
+            the FC u-net block.
+        :param num_stages: Number of State Model Layers.
+        :param num_layers: Number of Layers within each State Model.
+        :param num_f_maps: Feature size within the state model
+        :param dim: Feature size between state models.
+        :param num_classes: Number of output classes.
+        :param linear_single_stage: Use `SingleStageModelLinear` class for
+            single stage layers, otherwise use `SingleStageModel`.
+        :param do_stage_residual: Enable adding previous stage output to
+            successive stage outputs. Default True.
+        :param do_stage_softmax: Enable performing a softmax operation on
+            previous stage outputs before input to successive stages. This only
+            affects the inout to a stage, and does not output the optional
+            residual stage addition via `do_stage_residual`. Default False.
+        """
+        super(MultiStageModel, self).__init__()
+
+        # One FC sequence that is applied to a single frame's feature vector,
+        self.frame_fc = LinearSkipBlock([dim] + list(fc_sequence_dims), fc_sequence_dropout_p)
+
+        stage_class = SingleStageModel
+        if linear_single_stage:
+            stage_class = SingleStageModelLinear
+
+        self.stage1 = stage_class(num_layers, num_f_maps, dim, num_classes)
+        self.stages = nn.ModuleList(
+            [
+                stage_class(num_layers, num_f_maps, num_classes, num_classes)
+                for s in range(num_stages - 1)
+            ]
+        )
+
+        self.do_stage_residual = do_stage_residual
+        self.do_stage_softmax = do_stage_softmax
+
+    def forward(self, x, mask):
+        # x shape: [batch_size, feat_dim, window_size]
+        # mask shape: [batch_size, window_size]
+
+        # Shape [batch_size, window_size, feat_dim]
+        re_x = einops.rearrange(x, "b d w -> b w d")
+        re_x = self.frame_fc(re_x)
+        # Bring it back to input shape [batch_size, feat_dim, window_size]
+        x = einops.rearrange(re_x, "b w d -> b d w")
+
+        # input here is not being softmaxed because dim1 is feature inputs, not
+        # predictions.
+        out = self.stage1(x, mask)
+        # out shape: (batch_size, num_classes, window_size)
+        outputs = out.unsqueeze(0)
+        for s in self.stages:
+            s_in = out
+            if self.do_stage_softmax:
+                s_in = F.softmax(s_in, dim=1)
+            s_out = s(s_in * mask[:, None, :], mask)
+            if self.do_stage_residual:
+                s_out = out + s_out
+            out = s_out  # update the temp "out" var for cross-loop interaction
+            # out shape: (batch_size, num_classes, window_size)
             outputs = torch.cat((outputs, out.unsqueeze(0)), dim=0)
 
         return outputs
@@ -158,6 +251,36 @@ def forward(self, x, mask):
         return out
 
 
+class SingleStageModelLinear(nn.Module):
+    """
+    Version of the SingleStageModel but where the increasing dilation of
+    successive layers linearly increases instead of exponentially.
+
+    Input to the forward method should be shape (batch, dim, window_size).
+    """
+    def __init__(self, num_layers, num_f_maps, dim, num_classes):
+        super(SingleStageModel, self).__init__()
+        self.conv_1x1 = nn.Conv1d(dim, num_f_maps, 1)
+        self.layers = nn.ModuleList(
+            [
+                copy.deepcopy(DilatedResidualLayer(1 + i, num_f_maps, num_f_maps))
+                for i in range(num_layers)
+            ]
+        )
+        self.conv_out = nn.Conv1d(num_f_maps, num_classes, 1)
+
+    def forward(self, x, mask):
+
+        out = self.conv_1x1(x)
+        for layer in self.layers:
+            # the DR layers already add their output to the input, so no need
+            # do that here again.
+            out = layer(out, mask)
+        out = self.conv_out(out) * mask[:, None, :]
+
+        return out
+
+
 class DilatedResidualLayer(nn.Module):
     def __init__(self, dilation, in_channels, out_channels):
         super(DilatedResidualLayer, self).__init__()