Skip to content

Commit

Permalink
Adding simple 'locsAndConfs' TCN input vector
Browse files Browse the repository at this point in the history
  • Loading branch information
cameron-a-johnson committed Nov 4, 2024
1 parent 22866fc commit 8eeb729
Show file tree
Hide file tree
Showing 5 changed files with 292 additions and 18 deletions.
18 changes: 10 additions & 8 deletions configs/data/ptg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@ _target_: tcn_hpl.data.ptg_datamodule.PTGDataModule
train_dataset:
_target_: tcn_hpl.data.tcn_dataset.TCNDataset
window_size: 15
vectorizer:
_target_: tcn_hpl.data.vectorize.classic.Classic
feat_version: 6
top_k: 1
num_classes: 7
background_idx: 0
hand_left_idx: 5
hand_right_idx: 6
# No vectorizer should be specified here, as there should be no "default".
# Example of a vectorizer:
# vectorizer:
# _target_: tcn_hpl.data.vectorize.classic.Classic
# feat_version: 6
# top_k: 1
# num_classes: 7
# background_idx: 0
# hand_left_idx: 5
# hand_right_idx: 6
transform:
_target_: torchvision.transforms.Compose
transforms: []
Expand Down
122 changes: 122 additions & 0 deletions configs/experiment/m2/feat_locsConfs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# @package _global_

# to execute this experiment run:
# python train.py experiment=example
task: "m2"
# feature_version: 6
topic: "medical"

defaults:
- override /data: ptg
- override /model: ptg
- override /callbacks: default
- override /trainer: gpu
- override /paths: default
#- override /logger: aim
- override /logger: csv

# all parameters below will be merged with parameters from default configurations set above
# this allows you to overwrite only specified parameters

# Change this name to something descriptive and unique for this experiment.
# This will differentiate the run logs and output to be separate from other
# experiments that may have been run under the configured
# Setting this value influences:
# - the name of the directory under `${paths.root_dir}/logs/` in which training
# run files are stored.
# Default is "train" set in the "configs/train.yaml" file.
#task_name:
task_name: cameron_locs_and_confs

# simply provide checkpoint path to resume training
#ckpt_path: null

tags: ["m2", "ms_tcn", "debug"]

seed: 12345

trainer:
min_epochs: 50
max_epochs: 500
log_every_n_steps: 1

model:
compile: false
net:
# Length of feature vector for a single frame.
# Currently derived from feature version and other hyperparameters.
dim: 102
num_classes: 9

data:
coco_train_activities: "${paths.coco_file_root}/TRAIN-activity_truth.coco.json"
coco_train_objects: "${paths.coco_file_root}/TRAIN-object_detections.coco.json"
coco_train_poses: "${paths.coco_file_root}/TRAIN-pose_estimates.coco.json"

coco_validation_activities: "${paths.coco_file_root}/VALIDATION-activity_truth.coco.json"
coco_validation_objects: "${paths.coco_file_root}/VALIDATION-object_detections.coco.json"
coco_validation_poses: "${paths.coco_file_root}/VALIDATION-pose_estimates.coco.json"

coco_test_activities: "${paths.coco_file_root}/TEST-activity_truth.coco.json"
coco_test_objects: "${paths.coco_file_root}/TEST-object_detections.coco.json"
coco_test_poses: "${paths.coco_file_root}/TEST-pose_estimates.coco.json"

batch_size: 16384
num_workers: 16
target_framerate: 15 # BBN Hololens2 Framerate
epoch_length: 200000

train_dataset:
window_size: 25
vectorizer:
_target_: tcn_hpl.data.vectorize.locs_and_confs.LocsAndConfs
top_k: 1
num_classes: 7
use_joint_confs: True
use_pixel_norm: True
use_hand_obj_offsets: False
background_idx: 0
transform:
transforms: [] # no transforms
# - _target_: tcn_hpl.data.components.augmentations.MoveCenterPts
# hand_dist_delta: 0.05
# obj_dist_delta: 0.05
# joint_dist_delta: 0.025
# im_w: 1280
# im_h: 720
# num_obj_classes: 42
# feat_version: 2
# top_k_objects: 1
# - _target_: tcn_hpl.data.components.augmentations.NormalizePixelPts
# im_w: 1280
# im_h: 720
# num_obj_classes: 42
# feat_version: 2
# top_k_objects: 1
val_dataset:
transform:
transforms: [] # no transforms
# - _target_: tcn_hpl.data.components.augmentations.NormalizePixelPts
# im_w: 1280
# im_h: 720
# num_obj_classes: 42
# feat_version: 2
# top_k_objects: 1
# Test dataset usually configured the same as val, unless there is some
# different set of transforms that should be used during test/prediction.

paths:
# root_dir: "/data/PTG/medical/training/activity_classifier/TCN_HPL/"
# root_dir: "/home/local/KHQ/paul.tunison/data/darpa-ptg/train-TCN-M2_bbn_hololens/training_root"
root_dir: "/home/local/KHQ/cameron.johnson/code/TCN_HPL/tcn_hpl/train-TCN-M2_bbn_hololens/training_root"

# Convenience variable to where your train/val/test split COCO file datasets
# are stored.
# coco_file_root: "/home/local/KHQ/paul.tunison/data/darpa-ptg/train-TCN-M2_bbn_hololens"
coco_file_root: "/home/local/KHQ/cameron.johnson/code/TCN_HPL/train-TCN-M2_bbn_hololens"

#exp_name: "tcn_training_revive"
#logger:
# aim:
# experiment: ${task_name}
# capture_terminal_logs: true
26 changes: 16 additions & 10 deletions tcn_hpl/data/tcn_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,12 @@ def load_data_offline(
)
else:
frame_dets = empty_dets

# Frame height and width should be available.
img_info = activity_coco.index.imgs[img_id]
assert "height" in img_info
assert "width" in img_info
frame_size = (img_info["width"], img_info["height"])

# Only consider annotations that actually have keypoints.
# There may be no poses on this frame.
Expand Down Expand Up @@ -336,7 +342,8 @@ def load_data_offline(
)
else:
frame_poses = empty_pose
vid_frame_data.append(FrameData(frame_dets, frame_poses))
# import ipdb; ipdb.set_trace()
vid_frame_data.append(FrameData(frame_dets, frame_poses, frame_size))

# Compose a list of indices into frame_data that this video's
# worth of content resides.
Expand Down Expand Up @@ -639,15 +646,14 @@ def test_dataset_for_input(
pose_coco = kwcoco.CocoDataset(pose_coco)

# TODO: Some method of configuring which vectorizer to use.
from tcn_hpl.data.vectorize.classic import Classic
vectorizer = Classic(
feat_version=6,
top_k=1,
# M2/R18 object detection class indices
num_classes=7,
background_idx=0,
hand_left_idx=5,
hand_right_idx=6,
from tcn_hpl.data.vectorize.locs_and_confs import LocsAndConfs
vectorizer = LocsAndConfs(
top_k = 1,
num_classes = 7,
use_joint_confs = True,
use_pixel_norm = True,
use_hand_obj_offsets = False,
background_idx = 0
)

dataset = TCNDataset(window_size=window_size, vectorizer=vectorizer)
Expand Down
3 changes: 3 additions & 0 deletions tcn_hpl/data/vectorize/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ class FrameData:
# This may be None, which implies that an object pose estimation was not
# run for this frame.
poses: tg.Optional[FramePoses]
# FrameSize: Length-2 tuple expected: (width, height).
# This is the video frame's width and height in pixels.
size: tuple

def __bool__(self):
"""
Expand Down
141 changes: 141 additions & 0 deletions tcn_hpl/data/vectorize/locs_and_confs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import functools
import typing as tg

import numpy as np
from numpy import typing as npt

from tcn_hpl.data.vectorize._interface import Vectorize, FrameData
from tcn_hpl.data.vectorize_classic import (
obj_det2d_set_to_feature,
zero_joint_offset,
HAND_STR_LEFT,
HAND_STR_RIGHT,
)


class LocsAndConfs(Vectorize):
"""
Previous manual approach to vectorization.
Arguments:
feat_version: Version number of the feature to produce.
top_k: The number of top per-class examples to use in vector
construction.
"""

def __init__(
self,
top_k: int = 1,
num_classes: int = 7,
use_joint_confs: bool = True,
use_pixel_norm: bool = True,
use_hand_obj_offsets: bool = False,
background_idx: int = 0
):
super().__init__()

self._top_k = top_k
self._num_classes = num_classes
self._use_joint_confs = use_joint_confs
self._use_pixel_norm = use_pixel_norm
self._use_hand_obj_offsets = use_hand_obj_offsets
self._background_idx = background_idx

# Get the top "k" object indexes for each object
@staticmethod
def get_top_k_indexes_of_one_obj_type(f_dets, k, label_ind):
"""
Find all instances of a label index in object detections.
Then sort them and return the top K.
Inputs:
- object_dets:
"""
labels = f_dets.labels
scores = f_dets.scores
# Get all labels of an obj type
filtered_idxs = [i for i, e in enumerate(labels) if e == label_ind]
if not filtered_idxs:
return None
filtered_scores = [scores[i] for i in filtered_idxs]
# Sort labels by score values.
sorted_inds = [i[1] for i in sorted(zip(filtered_scores, filtered_idxs))]
return sorted_inds[:k]
@staticmethod
def append_vector(frame_feat, i, number):
frame_feat[i] = number
return frame_feat, i + 1


def vectorize(self, data: FrameData) -> npt.NDArray[np.float32]:

#########################
# Feature vector
#########################
# Length: pose confs * 22, pose X's * 22, pose Y's * 22,
# obj confs * num_objects(7 for M2),
# obj X * num_objects(7 for M2),
# obj Y * num_objects(7 for M2)
# obj W * num_objects(7 for M2)
# obj H * num_objects(7 for M2)
# casualty conf * 1
vector_len = 102
frame_feat = np.zeros(vector_len)
vector_ind = 0
if self._use_pixel_norm:
W = data.size[0]
H = data.size[1]
else:
W = 1
H = 1
f_dets = data.object_detections

# Loop through all classes: populate obj conf, obj X, obj Y.
# Assumption: class labels are [0, 1, 2,... num_classes-1].
for obj_ind in range(0,self._num_classes):
top_k_idxs = self.get_top_k_indexes_of_one_obj_type(f_dets, self._top_k, obj_ind)
if top_k_idxs: # This is None if there were no detections to sort for this class
for idx in top_k_idxs:
# Conf
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.scores[idx])
# X
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][0] / W)
# Y
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][1] / H)
# W
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][2] / W)
# H
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_dets.boxes[idx][3] / H)
else:
for _ in range(0,5):
# 5 Zeros
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, 0)

f_poses = data.poses
if f_poses:
# Find most confident body detection
confident_pose_idx = np.argmax(f_poses.scores)
num_joints = f_poses.joint_positions.shape[1]
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.scores[confident_pose_idx])

for joint_ind in range(0, num_joints):
# Conf
if self._use_joint_confs:
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.joint_scores[confident_pose_idx][joint_ind])
# X
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.joint_positions[confident_pose_idx][joint_ind][0] / W)
# Y
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, f_poses.joint_positions[confident_pose_idx][joint_ind][1] / H)
else:
num_joints = f_poses.joint_positions.shape[1]
if self._use_joint_confs:
rows_per_joint = 3
else:
rows_per_joint = 2
for _ in range(num_joints * rows_per_joint + 1):
frame_feat, vector_ind = self.append_vector(frame_feat, vector_ind, 0)

assert vector_ind == vector_len

frame_feat = frame_feat.ravel().astype(feat_dtype)

return frame_feat

0 comments on commit 8eeb729

Please sign in to comment.