diff --git a/augmentations/__init__.py b/augmentations/__init__.py index eb7f3fb..4049e49 100644 --- a/augmentations/__init__.py +++ b/augmentations/__init__.py @@ -1,6 +1,5 @@ import math -import ast import logging import cv2 import random @@ -15,23 +14,37 @@ ARM_IDENTIFIERS_ORDER = ["neck", "$side$Shoulder", "$side$Elbow", "$side$Wrist"] +def __random_pass(prob): + return random.random() < prob + + def __numpy_to_dictionary(data_array: np.ndarray) -> dict: + """ + Supplementary method converting a NumPy array of body landmark data into dictionaries. The array data must match the + order of the BODY_IDENTIFIERS list. + """ + output = {} + for landmark_index, identifier in enumerate(BODY_IDENTIFIERS): output[identifier] = data_array[:, landmark_index].tolist() + return output def __dictionary_to_numpy(landmarks_dict: dict) -> np.ndarray: + """ + Supplementary method converting dictionaries of body landmark data into respective NumPy arrays. The resulting array + will match the order of the BODY_IDENTIFIERS list. + """ + output = np.empty(shape=(len(landmarks_dict["leftEar"]), len(BODY_IDENTIFIERS), 2)) + for landmark_index, identifier in enumerate(BODY_IDENTIFIERS): output[:, landmark_index, 0] = np.array(landmarks_dict[identifier])[:, 0] output[:, landmark_index, 1] = np.array(landmarks_dict[identifier])[:, 1] - return output - -def __random_pass(prob): - return random.random() < prob + return output def __rotate(origin: tuple, point: tuple, angle: float): @@ -55,10 +68,10 @@ def __rotate(origin: tuple, point: tuple, angle: float): def __preprocess_row_sign(sign: dict) -> (dict, dict): """ - + Supplementary method splitting the single-dictionary skeletal data into two dictionaries of body and hand landmarks + respectively. """ - # sign_eval = {key: ast.literal_eval(value) for key, value in sign.items()} sign_eval = sign if "nose_X" in sign_eval: @@ -76,7 +89,7 @@ def __preprocess_row_sign(sign: dict) -> (dict, dict): def __wrap_sign_into_row(body_identifiers: dict, hand_identifiers: dict) -> dict: """ - + Supplementary method for merging body and hand data into a single dictionary. """ return {**body_identifiers, **hand_identifiers} @@ -84,21 +97,49 @@ def __wrap_sign_into_row(body_identifiers: dict, hand_identifiers: dict) -> dict def augment_rotate(sign: dict, angle_range: tuple) -> dict: """ + AUGMENTATION TECHNIQUE. All the joint coordinates in each frame are rotated by a random angle up to 13 degrees with + the center of rotation lying in the center of the frame, which is equal to [0.5; 0.5]. + + :param sign: Dictionary with sequential skeletal data of the signing person + :param angle_range: Tuple containing the angle range (minimal and maximal angle in degrees) to randomly choose the + angle by which the landmarks will be rotated from + :return: Dictionary with augmented (by rotation) sequential skeletal data of the signing person """ body_landmarks, hand_landmarks = __preprocess_row_sign(sign) angle = math.radians(random.uniform(*angle_range)) - body_landmarks = {key: [__rotate((0.5, 0.5), frame, angle) for frame in value] for key, value in body_landmarks.items()} - hand_landmarks = {key: [__rotate((0.5, 0.5), frame, angle) for frame in value] for key, value in hand_landmarks.items()} + body_landmarks = {key: [__rotate((0.5, 0.5), frame, angle) for frame in value] for key, value in + body_landmarks.items()} + hand_landmarks = {key: [__rotate((0.5, 0.5), frame, angle) for frame in value] for key, value in + hand_landmarks.items()} return __wrap_sign_into_row(body_landmarks, hand_landmarks) def augment_shear(sign: dict, type: str, squeeze_ratio: tuple) -> dict: """ - + AUGMENTATION TECHNIQUE. + + - Squeeze. All the frames are squeezed from both horizontal sides. Two different random proportions up to 15% of + the original frame's width for both left and right side are cut. + + - Perspective transformation. The joint coordinates are projected onto a new plane with a spatially defined + center of projection, which simulates recording the sign video with a slight tilt. Each time, the right or left + side, as well as the proportion by which both the width and height will be reduced, are chosen randomly. This + proportion is selected from a uniform distribution on the [0; 1) interval. Subsequently, the new plane is + delineated by reducing the width at the desired side and the respective vertical edge (height) at both of its + adjacent corners. + + :param sign: Dictionary with sequential skeletal data of the signing person + :param type: Type of shear augmentation to perform (either 'squeeze' or 'perspective') + :param squeeze_ratio: Tuple containing the relative range from what the proportion of the original width will be + randomly chosen. These proportions will either be cut from both sides or used to construct the + new projection + + :return: Dictionary with augmented (by squeezing or perspective transformation) sequential skeletal data of the + signing person """ body_landmarks, hand_landmarks = __preprocess_row_sign(sign) @@ -108,7 +149,8 @@ def augment_shear(sign: dict, type: str, squeeze_ratio: tuple) -> dict: move_right = random.uniform(*squeeze_ratio) src = np.array(((0, 1), (1, 1), (0, 0), (1, 0)), dtype=np.float32) - dest = np.array(((0 + move_left, 1), (1 - move_right, 1), (0 + move_left, 0), (1 - move_right, 0)), dtype=np.float32) + dest = np.array(((0 + move_left, 1), (1 - move_right, 1), (0 + move_left, 0), (1 - move_right, 0)), + dtype=np.float32) mtx = cv2.getPerspectiveTransform(src, dest) elif type == "perspective": @@ -117,9 +159,11 @@ def augment_shear(sign: dict, type: str, squeeze_ratio: tuple) -> dict: src = np.array(((0, 1), (1, 1), (0, 0), (1, 0)), dtype=np.float32) if __random_pass(0.5): - dest = np.array(((0 + move_ratio, 1 - move_ratio), (1, 1), (0 + move_ratio, 0 + move_ratio), (1, 0)), dtype=np.float32) + dest = np.array(((0 + move_ratio, 1 - move_ratio), (1, 1), (0 + move_ratio, 0 + move_ratio), (1, 0)), + dtype=np.float32) else: - dest = np.array(((0, 1), (1 - move_ratio, 1 - move_ratio), (0, 0), (1 - move_ratio, 0 + move_ratio)), dtype=np.float32) + dest = np.array(((0, 1), (1 - move_ratio, 1 - move_ratio), (0, 0), (1 - move_ratio, 0 + move_ratio)), + dtype=np.float32) mtx = cv2.getPerspectiveTransform(src, dest) @@ -141,7 +185,17 @@ def augment_shear(sign: dict, type: str, squeeze_ratio: tuple) -> dict: def augment_arm_joint_rotate(sign: dict, probability: float, angle_range: tuple) -> dict: """ + AUGMENTATION TECHNIQUE. The joint coordinates of both arms are passed successively, and the impending landmark is + slightly rotated with respect to the current one. The chance of each joint to be rotated is 3:10 and the angle of + alternation is a uniform random angle up to +-4 degrees. This simulates slight, negligible variances in each + execution of a sign, which do not change its semantic meaning. + + :param sign: Dictionary with sequential skeletal data of the signing person + :param probability: Probability of each joint to be rotated (float from the range [0, 1]) + :param angle_range: Tuple containing the angle range (minimal and maximal angle in degrees) to randomly choose the + angle by which the landmarks will be rotated from + :return: Dictionary with augmented (by arm joint rotation) sequential skeletal data of the signing person """ body_landmarks, hand_landmarks = __preprocess_row_sign(sign) @@ -167,7 +221,8 @@ def augment_arm_joint_rotate(sign: dict, probability: float, angle_range: tuple) if to_be_rotated not in body_landmarks: continue - body_landmarks[to_be_rotated] = [__rotate(body_landmarks[landmark_origin][frame_index], frame, angle) for frame_index, frame in enumerate(body_landmarks[to_be_rotated])] + body_landmarks[to_be_rotated] = [__rotate(body_landmarks[landmark_origin][frame_index], frame, + angle) for frame_index, frame in enumerate(body_landmarks[to_be_rotated])] return __wrap_sign_into_row(body_landmarks, hand_landmarks)