Skip to content

Commit

Permalink
Optimization and edge case fixes to LocsConfs vectorizer
Browse files Browse the repository at this point in the history
  • Loading branch information
Purg committed Nov 7, 2024
1 parent 925296f commit efee490
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 71 deletions.
22 changes: 14 additions & 8 deletions tcn_hpl/data/tcn_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ def test_dataset_for_input(
top_k = 1,
num_classes = 7,
use_joint_confs = True,
use_pixel_norm = True,
use_pixel_norm = False,
use_joint_obj_offsets = False,
background_idx = 0,
)
Expand Down Expand Up @@ -554,7 +554,7 @@ def test_dataset_for_input(

# Test that we can iterate over the dataset using a DataLoader with
# shuffling.
batch_size = 1 # 16384 # 512 # 16
batch_size = 32 # 512
data_loader = DataLoader(
dataset,
batch_size=batch_size,
Expand All @@ -576,7 +576,11 @@ def test_dataset_for_input(
logger.info(f"Windows per-second: {count / duration}")

# Test creating online mode with subset of data from above.
dset_online = TCNDataset(window_size=window_size, vectorize=vectorize)
dset_online = TCNDataset(
window_size=window_size,
vectorize=vectorize,
transform_frame_data=transform_frame_data,
)
dset_online.load_data_online(dataset._frame_data[:window_size]) # noqa
assert len(dset_online) == 1, "Online dataset should be size 1"
_ = dset_online[0]
Expand All @@ -587,11 +591,13 @@ def test_dataset_for_input(
except IndexError:
failed_index_error = False
assert not failed_index_error, "Should have had an index error at [1]"
assert ( # noqa
dataset[0][0] == dset_online[0][0]
).all(), (
"Online should have produced same window matrix as offline version."
)
# With augmentation, this can no longer be expected because of random
# variation per access.
# assert ( # noqa
# dataset[0][0] == dset_online[0][0]
# ).all(), (
# "Online should have produced same window matrix as offline version."
# )


if __name__ == "__main__":
Expand Down
135 changes: 72 additions & 63 deletions tcn_hpl/data/vectorize/locs_and_confs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from typing import List

import numpy as np
from numpy import typing as npt

from tcn_hpl.data.frame_data import FrameObjectDetections
from tcn_hpl.data.vectorize._interface import Vectorize, FrameData


Expand Down Expand Up @@ -34,7 +37,7 @@ def __init__(
use_joint_confs: bool = True,
use_pixel_norm: bool = True,
use_joint_obj_offsets: bool = False,
background_idx: int = 0
background_idx: int = 0,
):
super().__init__()

Expand All @@ -45,25 +48,26 @@ def __init__(
self._use_joint_obj_offsets = use_joint_obj_offsets
self._background_idx = background_idx

# Get the top "k" object indexes for each object
@staticmethod
def get_top_k_indexes_of_one_obj_type(f_dets, k, label_ind):
def get_top_k_indexes_of_one_obj_type(
f_dets: FrameObjectDetections,
k: int,
label_ind: int,
) -> List[int]:
"""
Find all instances of a label index in object detections.
Then sort them and return the top K.
Inputs:
- object_dets:
"""
labels = f_dets.labels
scores = f_dets.scores
# Get all labels of an obj type
filtered_idxs = [i for i, e in enumerate(labels) if e == label_ind]
if not filtered_idxs:
return None
filtered_idxs = [i for i, e in enumerate(f_dets.labels) if e == label_ind]
# Sort filtered indices return by highest score
filtered_scores = [scores[i] for i in filtered_idxs]
# Sort labels by score values.
sorted_inds = [i[1] for i in sorted(zip(filtered_scores, filtered_idxs))]
return sorted_inds[:k]
return [
i[1] for i in sorted(zip(filtered_scores, filtered_idxs), reverse=True)[:k]
]

@staticmethod
def append_vector(frame_feat, i, number):
Expand Down Expand Up @@ -93,73 +97,78 @@ def determine_vector_length(self) -> int:
vector_length += 2 * NUM_POSE_JOINTS
return vector_length


def vectorize(self, data: FrameData) -> npt.NDArray[np.float32]:
# I tried utilizing range assignment into frame_feat, but this was
# empirically not as fast as this method in the context of being run
# within a torch DataLoader.
# E.g. instead of
# for i, det_idx in enumerate(top_det_idxs):
# topk_offset = obj_offset + (i * 5)
# frame_feat[topk_offset + 0] = f_dets.scores[det_idx]
# frame_feat[topk_offset + 1] = f_dets.boxes[det_idx][0] / w
# frame_feat[topk_offset + 2] = f_dets.boxes[det_idx][1] / h
# frame_feat[topk_offset + 3] = f_dets.boxes[det_idx][2] / w
# frame_feat[topk_offset + 4] = f_dets.boxes[det_idx][3] / h
# doing:
# obj_end_idx = obj_offset + (len(top_det_idxs) * 5)
# frame_feat[obj_offset + 0:obj_end_idx:5] = f_dets.scores[top_det_idxs]
# frame_feat[obj_offset + 1:obj_end_idx:5] = f_dets.boxes[top_det_idxs, 0] / w
# frame_feat[obj_offset + 2:obj_end_idx:5] = f_dets.boxes[top_det_idxs, 1] / h
# frame_feat[obj_offset + 3:obj_end_idx:5] = f_dets.boxes[top_det_idxs, 2] / w
# frame_feat[obj_offset + 4:obj_end_idx:5] = f_dets.boxes[top_det_idxs, 3] / h
# Was *slower* in the context of batched computation.

vector_len = self.determine_vector_length()
frame_feat = np.zeros(vector_len, dtype=np.float32)
# TODO: instead of carrying around this vector_ind, we should
# directly compute the offset of each feature we add to the TCN
# input vector. This would be much easier to debug.
vector_ind = 0

if self._use_pixel_norm:
W = data.size[0]
H = data.size[1]
w = data.size[0]
h = data.size[1]
else:
W = 1
H = 1
w = 1
h = 1

obj_num_classes = self._num_classes
obj_top_k = self._top_k

# Indices into the feature vector where components start
objs_start_offset = 0
pose_start_offset = obj_num_classes * obj_top_k * 5

f_dets = data.object_detections
if f_dets:
# Loop through all classes: populate obj conf, obj X, obj Y.
# Assumption: class labels are [0, 1, 2,... num_classes-1].
# TODO: this will break if top_k is ever > 1. Fix that.
for obj_ind in range(0,self._num_classes):
top_k_idxs = self.get_top_k_indexes_of_one_obj_type(f_dets, self._top_k, obj_ind)
if top_k_idxs: # This is None if there were no detections to sort for this class
for idx in top_k_idxs:
# Conf
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.scores[idx])
# X
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][0] / W)
# Y
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][1] / H)
# W
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][2] / W)
# H
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][3] / H)
else:
for _ in range(0, self._top_k * 5):
# 5 Zeros
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, 0)
else:
# No detections, fill in appropriate amount of zeros.
for _ in range(self._num_classes * self._top_k * 5):
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, 0)
for obj_ind in range(obj_num_classes):
obj_offset = objs_start_offset + (obj_ind * obj_top_k * 5)
top_det_idxs = self.get_top_k_indexes_of_one_obj_type(
f_dets, obj_top_k, obj_ind
)
for i, det_idx in enumerate(top_det_idxs):
topk_offset = obj_offset + (i * 5)
frame_feat[topk_offset + 0] = f_dets.scores[det_idx]
frame_feat[topk_offset + 1] = f_dets.boxes[det_idx][0] / w
frame_feat[topk_offset + 2] = f_dets.boxes[det_idx][1] / h
frame_feat[topk_offset + 3] = f_dets.boxes[det_idx][2] / w
frame_feat[topk_offset + 4] = f_dets.boxes[det_idx][3] / h
# If there are less than top_k indices returned, the vector was
# already initialized to zero so nothing else to do.

f_poses = data.poses
if f_poses:
# Find most confident body detection
confident_pose_idx = np.argmax(f_poses.scores)
num_joints = f_poses.joint_positions.shape[1]
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.scores[confident_pose_idx])

for joint_ind in range(0, num_joints):
# Conf
frame_feat[pose_start_offset] = f_poses.scores[confident_pose_idx]
pose_kp_offset = pose_start_offset + 1
for joint_ind in range(NUM_POSE_JOINTS):
joint_offset = pose_kp_offset + (joint_ind * 3)
if self._use_joint_confs:
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.joint_scores[confident_pose_idx][joint_ind])
# X
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.joint_positions[confident_pose_idx][joint_ind][0] / W)
# Y
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.joint_positions[confident_pose_idx][joint_ind][1] / H)
else:
if self._use_joint_confs:
rows_per_joint = 3
else:
rows_per_joint = 2
for _ in range(NUM_POSE_JOINTS * rows_per_joint + 1):
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, 0)

assert vector_ind == vector_len
frame_feat[joint_offset] = f_poses.joint_scores[
confident_pose_idx, joint_ind
]
frame_feat[joint_offset + 1] = (
f_poses.joint_positions[confident_pose_idx, joint_ind, 0] / w
)
frame_feat[joint_offset + 2] = (
f_poses.joint_positions[confident_pose_idx, joint_ind, 1] / h
)

return frame_feat

0 comments on commit efee490

Please sign in to comment.