diff --git a/configs/experiment/m2/feat_locsconfs_residualLinear.yaml b/configs/experiment/m2/feat_locsconfs_residualLinear.yaml new file mode 100644 index 000000000..99479fe74 --- /dev/null +++ b/configs/experiment/m2/feat_locsconfs_residualLinear.yaml @@ -0,0 +1,136 @@ +# @package _global_ + +defaults: + - override /data: ptg + - override /model: ptg + - override /callbacks: default + - override /trainer: gpu + - override /paths: default + #- override /logger: aim + - override /logger: csv + +# all parameters below will be merged with parameters from default configurations set above +# this allows you to overwrite only specified parameters + +# Change this name to something descriptive and unique for this experiment. +# This will differentiate the run logs and output to be separate from other +# experiments that may have been run under the configured +# Setting this value influences: +# - the name of the directory under `${paths.root_dir}/logs/` in which training +# run files are stored. +# Default is "train" set in the "configs/train.yaml" file. +#task_name: + +# simply provide checkpoint path to resume training +#ckpt_path: null + +tags: ["m2", "ms_tcn", "debug"] + +seed: 12345 + +trainer: + min_epochs: 50 + max_epochs: 500 + log_every_n_steps: 1 + +model: + _target_: tcn_hpl.models.components.ms_tcs_net.MultiStageModelResidual + num_classes: 9 # number of activity classification classes + linear_single_stage: true + num_layers: 24 # window_size - 1, for largest residual + do_stage_residual: true + do_stage_softmax: false + compile: false + scheduler: + # Code change to track train/loss instead of val/loss. + factor: 0.9 + patience: 10 + net: + # Length of feature vector for a single frame. + # Currently derived from the parameterization of dataset vectorizer. + dim: 102 + +data: + coco_train_activities: "${paths.coco_file_root}/TRAIN-activity_truth.coco.json" + coco_train_objects: "${paths.coco_file_root}/TRAIN-object_detections.coco.json" + coco_train_poses: "${paths.coco_file_root}/TRAIN-pose_estimations.coco.json" + + coco_validation_activities: "${paths.coco_file_root}/VALIDATION-activity_truth.coco.json" + coco_validation_objects: "${paths.coco_file_root}/VALIDATION-object_detections.coco.json" + coco_validation_poses: "${paths.coco_file_root}/VALIDATION-pose_estimations.coco.json" + + coco_test_activities: "${paths.coco_file_root}/TEST-activity_truth.coco.json" + coco_test_objects: "${paths.coco_file_root}/TEST-object_detections.coco.json" + coco_test_poses: "${paths.coco_file_root}/TEST-pose_estimations.coco.json" + + batch_size: 512 + num_workers: 16 + target_framerate: 15 # BBN Hololens2 Framerate + epoch_sample_factor: 1 # 1x the dataset size iterations for train/val + + train_dataset: + window_size: 25 + window_label_idx: ${model.pred_frame_index} + vectorize: + _target_: tcn_hpl.data.vectorize.locs_and_confs.LocsAndConfs + top_k: 1 + num_classes: 7 + use_joint_confs: True + use_pixel_norm: True + use_joint_obj_offsets: False + background_idx: 0 + # Augmentations on windows of frame data before performing vectorization. + transform_frame_data: + transforms: + - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform + # These parameters are a fudge for now to experiment. Window presence + # looks qualitatively right with what we're seeing live. + frame_rate: ${data.target_framerate} + dets_throughput_mean: 14.5 + pose_throughput_mean: 10 + dets_latency: 0 + pose_latency: 0.1 + dets_throughput_std: 0.2 + pose_throughput_std: 0.2 + fixed_pattern: false + - _target_: tcn_hpl.data.frame_data_aug.rotate_scale_translate_jitter.FrameDataRotateScaleTranslateJitter + translate: 0.05 + scale: [0.9, 1.1] + rotate: [-5, 5] + det_loc_jitter: 0.02 + det_wh_jitter: 0.02 + pose_kp_loc_jitter: 0.005 + dets_score_jitter: 0. + pose_score_jitter: 0. + pose_kp_score_jitter: 0. + val_dataset: + # Augmentations on windows of frame data before performing vectorization. + # Sharing transform with training dataset as it is only the drop-out aug to + # simulate stream processing dropout the same. + transform_frame_data: + transforms: + - _target_: tcn_hpl.data.frame_data_aug.window_frame_dropout.DropoutFrameDataTransform + # Mirror training hparams, except used fixed patterns. + frame_rate: ${data.target_framerate} + dets_throughput_mean: 14.5 + pose_throughput_mean: 10 + dets_latency: 0 + pose_latency: 0.1 + dets_throughput_std: 0.2 + pose_throughput_std: 0.2 + fixed_pattern: true + # Test dataset usually configured the same as val, unless there is some + # different set of transforms that should be used during test/prediction. + +paths: + # Base directory for training outputs. + root_dir: "/home/local/KHQ/cameron.johnson/code/TCN_HPL/tcn_hpl/train-TCN-M2_bbn_hololens/training_root" + + # Convenience variable to where your train/val/test split COCO file datasets + # are stored. + coco_file_root: ${paths.root_dir} + +#logger: +# aim: +# experiment: ${task_name} +# capture_terminal_logs: true diff --git a/tcn_hpl/models/components/ms_tcs_net.py b/tcn_hpl/models/components/ms_tcs_net.py index c9508f80a..718f18746 100644 --- a/tcn_hpl/models/components/ms_tcs_net.py +++ b/tcn_hpl/models/components/ms_tcs_net.py @@ -56,10 +56,103 @@ def forward(self, x, mask): # Bring it back to input shape [batch_size, feat_dim, window_size] x = einops.rearrange(re_x, "b w d -> b d w") + # input here is not being softmaxed because dim1 is feature inputs, not + # predictions. out = self.stage1(x, mask) + # out shape: (batch_size, num_classes, window_size) outputs = out.unsqueeze(0) for s in self.stages: out = s(F.softmax(out, dim=1) * mask[:, None, :], mask) + # out shape: (batch_size, num_classes, window_size) + outputs = torch.cat((outputs, out.unsqueeze(0)), dim=0) + + return outputs + + +class MultiStageModelResidual(nn.Module): + """ + Similar to the MultiStageModel class, however stages after the first is + added to the output of the previous. + + """ + def __init__( + self, + fc_sequence_dims: Sequence[int], + fc_sequence_dropout_p: float, + num_stages: int, + num_layers: int, + num_f_maps: int, + dim: int, + num_classes: int, + linear_single_stage: bool = True, + do_stage_residual: bool = True, + do_stage_softmax: bool = False, + ): + """Initialize a `MultiStageModel` module. + + :param fc_sequence_dims: Create N*2 linear layers with u-net-like skip + connections connecting inputs and outputs of the same dimensions. + If an empty sequence is provided, then no FC layers are created + :param fc_sequence_dropout_p: P-value for drop-out layers utilized in + the FC u-net block. + :param num_stages: Number of State Model Layers. + :param num_layers: Number of Layers within each State Model. + :param num_f_maps: Feature size within the state model + :param dim: Feature size between state models. + :param num_classes: Number of output classes. + :param linear_single_stage: Use `SingleStageModelLinear` class for + single stage layers, otherwise use `SingleStageModel`. + :param do_stage_residual: Enable adding previous stage output to + successive stage outputs. Default True. + :param do_stage_softmax: Enable performing a softmax operation on + previous stage outputs before input to successive stages. This only + affects the inout to a stage, and does not output the optional + residual stage addition via `do_stage_residual`. Default False. + """ + super(MultiStageModel, self).__init__() + + # One FC sequence that is applied to a single frame's feature vector, + self.frame_fc = LinearSkipBlock([dim] + list(fc_sequence_dims), fc_sequence_dropout_p) + + stage_class = SingleStageModel + if linear_single_stage: + stage_class = SingleStageModelLinear + + self.stage1 = stage_class(num_layers, num_f_maps, dim, num_classes) + self.stages = nn.ModuleList( + [ + stage_class(num_layers, num_f_maps, num_classes, num_classes) + for s in range(num_stages - 1) + ] + ) + + self.do_stage_residual = do_stage_residual + self.do_stage_softmax = do_stage_softmax + + def forward(self, x, mask): + # x shape: [batch_size, feat_dim, window_size] + # mask shape: [batch_size, window_size] + + # Shape [batch_size, window_size, feat_dim] + re_x = einops.rearrange(x, "b d w -> b w d") + re_x = self.frame_fc(re_x) + # Bring it back to input shape [batch_size, feat_dim, window_size] + x = einops.rearrange(re_x, "b w d -> b d w") + + # input here is not being softmaxed because dim1 is feature inputs, not + # predictions. + out = self.stage1(x, mask) + # out shape: (batch_size, num_classes, window_size) + outputs = out.unsqueeze(0) + for s in self.stages: + s_in = out + if self.do_stage_softmax: + s_in = F.softmax(s_in, dim=1) + s_out = s(s_in * mask[:, None, :], mask) + if self.do_stage_residual: + s_out = out + s_out + out = s_out # update the temp "out" var for cross-loop interaction + # out shape: (batch_size, num_classes, window_size) outputs = torch.cat((outputs, out.unsqueeze(0)), dim=0) return outputs @@ -158,6 +251,36 @@ def forward(self, x, mask): return out +class SingleStageModelLinear(nn.Module): + """ + Version of the SingleStageModel but where the increasing dilation of + successive layers linearly increases instead of exponentially. + + Input to the forward method should be shape (batch, dim, window_size). + """ + def __init__(self, num_layers, num_f_maps, dim, num_classes): + super(SingleStageModel, self).__init__() + self.conv_1x1 = nn.Conv1d(dim, num_f_maps, 1) + self.layers = nn.ModuleList( + [ + copy.deepcopy(DilatedResidualLayer(1 + i, num_f_maps, num_f_maps)) + for i in range(num_layers) + ] + ) + self.conv_out = nn.Conv1d(num_f_maps, num_classes, 1) + + def forward(self, x, mask): + + out = self.conv_1x1(x) + for layer in self.layers: + # the DR layers already add their output to the input, so no need + # do that here again. + out = layer(out, mask) + out = self.conv_out(out) * mask[:, None, :] + + return out + + class DilatedResidualLayer(nn.Module): def __init__(self, dilation, in_channels, out_channels): super(DilatedResidualLayer, self).__init__()