From efee4906d09738715a756ef50dd334dc3b39eb1e Mon Sep 17 00:00:00 2001 From: Paul Tunison Date: Wed, 6 Nov 2024 20:57:55 -0500 Subject: [PATCH] Optimization and edge case fixes to LocsConfs vectorizer --- tcn_hpl/data/tcn_dataset.py | 22 ++-- tcn_hpl/data/vectorize/locs_and_confs.py | 135 ++++++++++++----------- 2 files changed, 86 insertions(+), 71 deletions(-) diff --git a/tcn_hpl/data/tcn_dataset.py b/tcn_hpl/data/tcn_dataset.py index 701d2193b..e89599ac7 100644 --- a/tcn_hpl/data/tcn_dataset.py +++ b/tcn_hpl/data/tcn_dataset.py @@ -513,7 +513,7 @@ def test_dataset_for_input( top_k = 1, num_classes = 7, use_joint_confs = True, - use_pixel_norm = True, + use_pixel_norm = False, use_joint_obj_offsets = False, background_idx = 0, ) @@ -554,7 +554,7 @@ def test_dataset_for_input( # Test that we can iterate over the dataset using a DataLoader with # shuffling. - batch_size = 1 # 16384 # 512 # 16 + batch_size = 32 # 512 data_loader = DataLoader( dataset, batch_size=batch_size, @@ -576,7 +576,11 @@ def test_dataset_for_input( logger.info(f"Windows per-second: {count / duration}") # Test creating online mode with subset of data from above. - dset_online = TCNDataset(window_size=window_size, vectorize=vectorize) + dset_online = TCNDataset( + window_size=window_size, + vectorize=vectorize, + transform_frame_data=transform_frame_data, + ) dset_online.load_data_online(dataset._frame_data[:window_size]) # noqa assert len(dset_online) == 1, "Online dataset should be size 1" _ = dset_online[0] @@ -587,11 +591,13 @@ def test_dataset_for_input( except IndexError: failed_index_error = False assert not failed_index_error, "Should have had an index error at [1]" - assert ( # noqa - dataset[0][0] == dset_online[0][0] - ).all(), ( - "Online should have produced same window matrix as offline version." - ) + # With augmentation, this can no longer be expected because of random + # variation per access. + # assert ( # noqa + # dataset[0][0] == dset_online[0][0] + # ).all(), ( + # "Online should have produced same window matrix as offline version." + # ) if __name__ == "__main__": diff --git a/tcn_hpl/data/vectorize/locs_and_confs.py b/tcn_hpl/data/vectorize/locs_and_confs.py index 5a7308ba8..00ec274ba 100644 --- a/tcn_hpl/data/vectorize/locs_and_confs.py +++ b/tcn_hpl/data/vectorize/locs_and_confs.py @@ -1,6 +1,9 @@ +from typing import List + import numpy as np from numpy import typing as npt +from tcn_hpl.data.frame_data import FrameObjectDetections from tcn_hpl.data.vectorize._interface import Vectorize, FrameData @@ -34,7 +37,7 @@ def __init__( use_joint_confs: bool = True, use_pixel_norm: bool = True, use_joint_obj_offsets: bool = False, - background_idx: int = 0 + background_idx: int = 0, ): super().__init__() @@ -45,25 +48,26 @@ def __init__( self._use_joint_obj_offsets = use_joint_obj_offsets self._background_idx = background_idx - # Get the top "k" object indexes for each object @staticmethod - def get_top_k_indexes_of_one_obj_type(f_dets, k, label_ind): + def get_top_k_indexes_of_one_obj_type( + f_dets: FrameObjectDetections, + k: int, + label_ind: int, + ) -> List[int]: """ Find all instances of a label index in object detections. Then sort them and return the top K. Inputs: - object_dets: """ - labels = f_dets.labels scores = f_dets.scores # Get all labels of an obj type - filtered_idxs = [i for i, e in enumerate(labels) if e == label_ind] - if not filtered_idxs: - return None + filtered_idxs = [i for i, e in enumerate(f_dets.labels) if e == label_ind] + # Sort filtered indices return by highest score filtered_scores = [scores[i] for i in filtered_idxs] - # Sort labels by score values. - sorted_inds = [i[1] for i in sorted(zip(filtered_scores, filtered_idxs))] - return sorted_inds[:k] + return [ + i[1] for i in sorted(zip(filtered_scores, filtered_idxs), reverse=True)[:k] + ] @staticmethod def append_vector(frame_feat, i, number): @@ -93,73 +97,78 @@ def determine_vector_length(self) -> int: vector_length += 2 * NUM_POSE_JOINTS return vector_length - def vectorize(self, data: FrameData) -> npt.NDArray[np.float32]: + # I tried utilizing range assignment into frame_feat, but this was + # empirically not as fast as this method in the context of being run + # within a torch DataLoader. + # E.g. instead of + # for i, det_idx in enumerate(top_det_idxs): + # topk_offset = obj_offset + (i * 5) + # frame_feat[topk_offset + 0] = f_dets.scores[det_idx] + # frame_feat[topk_offset + 1] = f_dets.boxes[det_idx][0] / w + # frame_feat[topk_offset + 2] = f_dets.boxes[det_idx][1] / h + # frame_feat[topk_offset + 3] = f_dets.boxes[det_idx][2] / w + # frame_feat[topk_offset + 4] = f_dets.boxes[det_idx][3] / h + # doing: + # obj_end_idx = obj_offset + (len(top_det_idxs) * 5) + # frame_feat[obj_offset + 0:obj_end_idx:5] = f_dets.scores[top_det_idxs] + # frame_feat[obj_offset + 1:obj_end_idx:5] = f_dets.boxes[top_det_idxs, 0] / w + # frame_feat[obj_offset + 2:obj_end_idx:5] = f_dets.boxes[top_det_idxs, 1] / h + # frame_feat[obj_offset + 3:obj_end_idx:5] = f_dets.boxes[top_det_idxs, 2] / w + # frame_feat[obj_offset + 4:obj_end_idx:5] = f_dets.boxes[top_det_idxs, 3] / h + # Was *slower* in the context of batched computation. vector_len = self.determine_vector_length() frame_feat = np.zeros(vector_len, dtype=np.float32) - # TODO: instead of carrying around this vector_ind, we should - # directly compute the offset of each feature we add to the TCN - # input vector. This would be much easier to debug. - vector_ind = 0 + if self._use_pixel_norm: - W = data.size[0] - H = data.size[1] + w = data.size[0] + h = data.size[1] else: - W = 1 - H = 1 + w = 1 + h = 1 + + obj_num_classes = self._num_classes + obj_top_k = self._top_k + + # Indices into the feature vector where components start + objs_start_offset = 0 + pose_start_offset = obj_num_classes * obj_top_k * 5 f_dets = data.object_detections if f_dets: - # Loop through all classes: populate obj conf, obj X, obj Y. - # Assumption: class labels are [0, 1, 2,... num_classes-1]. - # TODO: this will break if top_k is ever > 1. Fix that. - for obj_ind in range(0,self._num_classes): - top_k_idxs = self.get_top_k_indexes_of_one_obj_type(f_dets, self._top_k, obj_ind) - if top_k_idxs: # This is None if there were no detections to sort for this class - for idx in top_k_idxs: - # Conf - frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.scores[idx]) - # X - frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][0] / W) - # Y - frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][1] / H) - # W - frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][2] / W) - # H - frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][3] / H) - else: - for _ in range(0, self._top_k * 5): - # 5 Zeros - frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, 0) - else: - # No detections, fill in appropriate amount of zeros. - for _ in range(self._num_classes * self._top_k * 5): - frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, 0) + for obj_ind in range(obj_num_classes): + obj_offset = objs_start_offset + (obj_ind * obj_top_k * 5) + top_det_idxs = self.get_top_k_indexes_of_one_obj_type( + f_dets, obj_top_k, obj_ind + ) + for i, det_idx in enumerate(top_det_idxs): + topk_offset = obj_offset + (i * 5) + frame_feat[topk_offset + 0] = f_dets.scores[det_idx] + frame_feat[topk_offset + 1] = f_dets.boxes[det_idx][0] / w + frame_feat[topk_offset + 2] = f_dets.boxes[det_idx][1] / h + frame_feat[topk_offset + 3] = f_dets.boxes[det_idx][2] / w + frame_feat[topk_offset + 4] = f_dets.boxes[det_idx][3] / h + # If there are less than top_k indices returned, the vector was + # already initialized to zero so nothing else to do. f_poses = data.poses if f_poses: # Find most confident body detection confident_pose_idx = np.argmax(f_poses.scores) - num_joints = f_poses.joint_positions.shape[1] - frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.scores[confident_pose_idx]) - - for joint_ind in range(0, num_joints): - # Conf + frame_feat[pose_start_offset] = f_poses.scores[confident_pose_idx] + pose_kp_offset = pose_start_offset + 1 + for joint_ind in range(NUM_POSE_JOINTS): + joint_offset = pose_kp_offset + (joint_ind * 3) if self._use_joint_confs: - frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.joint_scores[confident_pose_idx][joint_ind]) - # X - frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.joint_positions[confident_pose_idx][joint_ind][0] / W) - # Y - frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.joint_positions[confident_pose_idx][joint_ind][1] / H) - else: - if self._use_joint_confs: - rows_per_joint = 3 - else: - rows_per_joint = 2 - for _ in range(NUM_POSE_JOINTS * rows_per_joint + 1): - frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, 0) - - assert vector_ind == vector_len + frame_feat[joint_offset] = f_poses.joint_scores[ + confident_pose_idx, joint_ind + ] + frame_feat[joint_offset + 1] = ( + f_poses.joint_positions[confident_pose_idx, joint_ind, 0] / w + ) + frame_feat[joint_offset + 2] = ( + f_poses.joint_positions[confident_pose_idx, joint_ind, 1] / h + ) return frame_feat