Skip to content

Commit

Permalink
Use motion estimator and mask generator when using videos
Browse files Browse the repository at this point in the history
  • Loading branch information
Agustín Castro committed Feb 5, 2024
1 parent 1fdf76a commit c5968a0
Showing 1 changed file with 58 additions and 28 deletions.
86 changes: 58 additions & 28 deletions norfair/common_reference_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def set_reference(
reference: str,
footage: str,
transformation_getter: TransformationGetter = None,
mask_generator=None,
mask_generator=None,
desired_size=700,
motion_estimator=None,
):
Expand All @@ -58,11 +58,11 @@ def set_reference(
To add a point, just click a pair of points (one from the footage window, and another from the reference window) and select "Add"
To remove a point, just select the corresponding point at the bottom left corner, and select "Remove".
If either footage or reference are videos, you can jump to future frames to pick points that match.
If either footage or reference are videos, you can jump to future frames to pick points that match.
For example, to jump 215 frames in the footage, just write an integer number of frames to jump next to 'Frames to skip (footage)', and select "Skip frames".
A motion estimator can be used to relate the coordinates of the current frame you see (in either footage or reference) to coordinates in its corresponding first frame.
Once a transformation has been estimated, you can test it:
Once a transformation has been estimated, you can test it:
To Test your transformation, Select the 'Test' mode, and pick a point in either the reference or the footage, and see the associated point in the other window.
You can keep adding more associated points until you are satisfied with the estimated transformation
Expand All @@ -74,16 +74,16 @@ def set_reference(
Path to the footage image or video
- transformation_getter: TransformationGetter, optional
TransformationGetter defining the type of transformation you want to fix between reference and footage.
Since the transformation can be really far from identity (given that the perspectives in footage and reference can be immensely different),
TransformationGetter defining the type of transformation you want to fix between reference and footage.
Since the transformation can be really far from identity (given that the perspectives in footage and reference can be immensely different),
and also knowing that outliers shouldn't be common given that a human is picking the points, it is recommended to use a high ransac_reproj_threshold (~ 1000)
- mask_generator: optional function that creates a mask (np.ndarray) from a PIL image. This mask is then provided to the corresponding MotionEstimator to avoid
- mask_generator: optional function that creates a mask (np.ndarray) from a PIL image. This mask is then provided to the corresponding MotionEstimator to avoid
sampling points within the mask.
- desired_size: int, optional
How large you want the clickable windows in the UI to be.
- motion_estimator: MotionEstimator, optional
When using videos for either the footage or the reference, you can provide a MotionEstimator to relate the coordinates in all the frames in the video.
The motion estimator is only useful if the camera in either the video of the footage or the video of the reference can move. Otherwise, avoid using it.
Expand Down Expand Up @@ -176,15 +176,22 @@ def estimate_transformation(points):
return None

def test_transformation(
change_of_coordinates, canvas, point, original_size, canvas_size, motion_transformation=None,
change_of_coordinates,
canvas,
point,
original_size,
canvas_size,
motion_transformation=None,
):
point_in_new_coordinates = change_of_coordinates(np.array([point]))[0]

try:
point_in_new_coordinates = motion_transformation.abs_to_rel(np.array([point_in_new_coordinates]))[0]
point_in_new_coordinates = motion_transformation.abs_to_rel(
np.array([point_in_new_coordinates])
)[0]
except AttributeError:
pass

point_in_canvas_coordinates = np.multiply(
point_in_new_coordinates,
np.array(
Expand Down Expand Up @@ -256,37 +263,46 @@ def handle_annotation(event):
global reference_canvas_size
global footage_original_size
global footage_canvas_size

points[key]["marked"] = not points[key]["marked"]

if points[key]["marked"]:
points[key]["button"].configure(fg="black", highlightbackground="red")

try:
footage_point_in_rel_coords = skipper["footage"]["motion_transformation"].abs_to_rel(np.array([points[key]["footage"]]))[0]
footage_point_in_rel_coords = skipper["footage"][
"motion_transformation"
].abs_to_rel(np.array([points[key]["footage"]]))[0]
footage_point_in_rel_coords = np.multiply(
footage_point_in_rel_coords,
np.array(
[footage_canvas_size[0] / footage_original_size[0], footage_canvas_size[1] / footage_original_size[1]]
[
footage_canvas_size[0] / footage_original_size[0],
footage_canvas_size[1] / footage_original_size[1],
]
),
).astype(int)
except AttributeError:
footage_point_in_rel_coords = points[key]["footage_canvas"]
pass

try:
reference_point_in_rel_coords = skipper["reference"]["motion_transformation"].abs_to_rel(np.array([points[key]["footage"]]))[0]
reference_point_in_rel_coords = skipper["reference"][
"motion_transformation"
].abs_to_rel(np.array([points[key]["footage"]]))[0]
reference_point_in_rel_coords = np.multiply(
reference_point_in_rel_coords,
np.array(
[reference_canvas_size[0] / reference_original_size[0], reference_canvas_size[1] / reference_original_size[1]]
[
reference_canvas_size[0] / reference_original_size[0],
reference_canvas_size[1] / reference_original_size[1],
]
),
).astype(int)
except AttributeError:
reference_point_in_rel_coords = points[key]["reference_canvas"]
pass


draw_point_in_canvas(
canvas_footage, footage_point_in_rel_coords, color="red"
)
Expand Down Expand Up @@ -348,7 +364,9 @@ def handle_annotation(event):
mask = mask_generator(image)
else:
mask = None
motion_transformation = motion_estimator_footage.update(np.array(image), mask)
motion_transformation = motion_estimator_footage.update(
np.array(image), mask
)

footage_original_width = image.width
footage_original_height = image.height
Expand Down Expand Up @@ -379,14 +397,20 @@ def reference_coord_chosen_in_footage(event):
footage_point_canvas = (event.x, event.y)
draw_point_in_canvas(canvas_footage, footage_point_canvas)


footage_point = np.array(
[event.x * (footage_original_width / footage_canvas_width), event.y * (footage_original_height / footage_canvas_height)]
[
event.x * (footage_original_width / footage_canvas_width),
event.y * (footage_original_height / footage_canvas_height),
]
)
print("Footage window clicked at: ", footage_point.round(1))

try:
footage_point = skipper["footage"]["motion_transformation"].rel_to_abs(np.array([footage_point]))[0].round(1)
footage_point = (
skipper["footage"]["motion_transformation"]
.rel_to_abs(np.array([footage_point]))[0]
.round(1)
)
except AttributeError:
pass

Expand Down Expand Up @@ -420,7 +444,6 @@ def reference_coord_chosen_in_footage(event):
"current_frame_label": None,
}


motion_estimator_reference = None
motion_transformation = None
try:
Expand Down Expand Up @@ -467,18 +490,23 @@ def reference_coord_chosen_in_reference(event):
global footage_canvas_size
global skipper



reference_point_canvas = (event.x, event.y)
draw_point_in_canvas(canvas_reference, reference_point_canvas)

reference_point = np.array(
[event.x * (reference_original_width / reference_canvas_width), event.y * (reference_original_height / reference_canvas_height)]
[
event.x * (reference_original_width / reference_canvas_width),
event.y * (reference_original_height / reference_canvas_height),
]
)
print("Reference window clicked at: ", reference_point.round(1))

try:
reference_point = skipper["reference"]["motion_transformation"].rel_to_abs(np.array([reference_point]))[0].round(1)
reference_point = (
skipper["reference"]["motion_transformation"]
.rel_to_abs(np.array([reference_point]))[0]
.round(1)
)
except AttributeError:
pass

Expand Down Expand Up @@ -560,8 +588,10 @@ def handle_skip_frame(event):
mask = mask_generator(image)
else:
mask = None
motion_transformation = motion_estimator.update(np.array(image), mask)

motion_transformation = motion_estimator.update(
np.array(image), mask
)

skipper[video_type]["motion_estimator"] = motion_estimator
skipper[video_type]["motion_transformation"] = motion_transformation

Expand Down

0 comments on commit c5968a0

Please sign in to comment.