diff --git a/tests/test_unit/test_load_bboxes.py b/tests/test_unit/test_load_bboxes.py index 5b7dff6e..e6f9c57a 100644 --- a/tests/test_unit/test_load_bboxes.py +++ b/tests/test_unit/test_load_bboxes.py @@ -12,13 +12,65 @@ from movement.io import load_bboxes +# --------- @pytest.fixture() -def via_tracks_file(): - """Return the file path for a VIA tracks .csv file.""" +def single_animal_via_tracks_file(): + """Return the file path for a VIA tracks .csv file with a single animal.""" + via_sample_file_name = "VIA_single-crab_MOCA-crab-1.csv" + return pytest.DATA_PATHS.get(via_sample_file_name) + + +@pytest.fixture() +def multi_animal_via_tracks_file(): + """Return the file path for a VIA tracks .csv file with multiple + animals. + """ via_sample_file_name = "VIA_multiple-crabs_5-frames_labels.csv" return pytest.DATA_PATHS.get(via_sample_file_name) +# --------- + + +@pytest.fixture() +def df_input_via_tracks_small(request): + """Return the first 3 rows of the dataframe that results from + reading the input VIA tracks .csv filepath. + """ + df = pd.read_csv(request.param, sep=",", header=0) + return df.loc[:2, :] + + +@pytest.fixture() +def df_input_via_tracks_small_with_confidence(request): + """Return the first 3 rows of the dataframe that results from + reading the input VIA tracks .csv filepath and add some + confidence values. + """ + df = pd.read_csv(request.param, sep=",", header=0).loc[:2, :] + + return update_attribute_column( + df_input=df, + attribute_column_name="region_attributes", + dict_to_append={"confidence": "0.5"}, + ) + + +@pytest.fixture() +def df_input_via_tracks_small_with_frame_number(request): + """Return the first 3 rows of the dataframe that results from + reading the input VIA tracks .csv filepath and add some + frame numbers. + """ + df = pd.read_csv(request.param, sep=",", header=0).loc[:2, :] + + return update_attribute_column( + df_input=df, + attribute_column_name="file_attributes", + dict_to_append={"frame": "1"}, + ) + + @pytest.fixture() def valid_from_numpy_inputs_required_arrays(): """Return a dictionary with valid numpy arrays for the `from_numpy()` @@ -60,39 +112,46 @@ def valid_from_numpy_inputs_all_arrays( return valid_from_numpy_inputs_required_arrays -@pytest.fixture() -def df_input_via_tracks_small(via_tracks_file): - """Return the first 3 rows of the VIA tracks .csv file as a dataframe.""" - df = pd.read_csv(via_tracks_file, sep=",", header=0) - return df.loc[:2, :] - - -@pytest.fixture() -def df_input_via_tracks_small_with_confidence(df_input_via_tracks_small): - """Return a dataframe with the first three rows of the VIA tracks .csv file - and add confidence values to the bounding boxes. - """ - df = update_attribute_column( - df_input=df_input_via_tracks_small, - attribute_column_name="region_attributes", - dict_to_append={"confidence": "0.5"}, - ) - - return df - - -@pytest.fixture() -def df_input_via_tracks_small_with_frame_number(df_input_via_tracks_small): - """Return a dataframe with the first three rows of the VIA tracks .csv file - and add frame number values to the bounding boxes. - """ - df = update_attribute_column( - df_input=df_input_via_tracks_small, - attribute_column_name="file_attributes", - dict_to_append={"frame": "1"}, - ) - - return df +# ------------- +# def df_input_multi_animal_via_tracks_small(multi_animal_via_tracks_file): +# """Return the first 3 rows of the VIA tracks .csv file as a dataframe.""" +# df = pd.read_csv(multi_animal_via_tracks_file, sep=",", header=0) +# return df.loc[:2, :] + + +# @pytest.fixture() +# def df_input_multi_animal_via_tracks_small_w_conf( +# df_input_multi_animal_via_tracks_small, +# ): +# """Return a dataframe with the first three rows of the +# VIA tracks .csv file +# and add confidence values to the bounding boxes. +# """ +# df = update_attribute_column( +# df_input=df_input_multi_animal_via_tracks_small, +# attribute_column_name="region_attributes", +# dict_to_append={"confidence": "0.5"}, +# ) + +# return df + + +# @pytest.fixture() +# def df_input_via_tracks_small_with_frame_number( +# df_input_multi_animal_via_tracks_small, +# ): +# """Return a dataframe with the first three rows of the VIA tracks .csv +# file +# and add frame number values to the bounding boxes. +# """ +# df = update_attribute_column( +# df_input=df_input_multi_animal_via_tracks_small, +# attribute_column_name="file_attributes", +# dict_to_append={"frame": "1"}, +# ) + +# return df +# --------- def update_attribute_column(df_input, attribute_column_name, dict_to_append): @@ -152,24 +211,25 @@ def assert_dataset( ) -def assert_time_coordinates(ds, fps, start_frame): +def assert_time_coordinates(ds, fps, start_frame=None, frame_array=None): """Assert that the time coordinates are as expected, depending on - fps value and start_frame. + fps value and start_frame or time_array. start_frame takes precedence + over frame_array if both are provided. """ # scale time coordinates with 1/fps if provided scale = 1 / fps if fps else 1 + # build frame array if not provided + if start_frame is not None: + frame_array = np.array( + range(start_frame, len(ds.coords["time"].data) + start_frame) + ) + # elif not frame_array: + # frame_array = np.arange(1, len(ds.coords["time"].data) + 1) + # assert numpy array of time coordinates np.testing.assert_allclose( - ds.coords["time"].data, - np.array( - [ - f * scale - for f in range( - start_frame, len(ds.coords["time"].data) + start_frame - ) - ] - ), + ds.coords["time"].data, np.array([f * scale for f in frame_array]) ) @@ -207,26 +267,29 @@ def test_from_file(source_software, fps, use_frame_numbers_from_file): ) +@pytest.mark.parametrize( + "via_tracks_file_str", + ["multi_animal_via_tracks_file", "single_animal_via_tracks_file"], +) @pytest.mark.parametrize("fps", [None, 30, 60.0]) @pytest.mark.parametrize("use_frame_numbers_from_file", [True, False]) def test_from_via_tracks_file( - via_tracks_file, fps, use_frame_numbers_from_file + via_tracks_file_str, fps, use_frame_numbers_from_file, request ): """Test that loading tracked bounding box data from - a valid VIA tracks .csv file returns a proper Dataset - and that the time coordinates are as expected. + a valid VIA tracks .csv file returns a proper Dataset. """ + # get the input file + via_tracks_file = request.getfixturevalue(via_tracks_file_str) + # run general dataset checks ds = load_bboxes.from_via_tracks_file( - via_tracks_file, fps, use_frame_numbers_from_file + file_path=via_tracks_file, + fps=fps, + use_frame_numbers_from_file=False, ) assert_dataset(ds, via_tracks_file, "VIA-tracks", fps) - # check time coordinates are as expected - # in sample VIA tracks .csv file frame numbers start from 1 - start_frame = 1 if use_frame_numbers_from_file else 0 - assert_time_coordinates(ds, fps, start_frame) - @pytest.mark.parametrize( "valid_from_numpy_inputs", @@ -263,52 +326,89 @@ def test_from_numpy(valid_from_numpy_inputs, fps, source_software, request): @pytest.mark.parametrize( - "via_column_name, list_keys, cast_fn, expected_attribute_array", + "df_input_via_tracks_small, expected_attributes_dict", [ + ( # multi animal VIA tracks file + pytest.DATA_PATHS.get("VIA_multiple-crabs_5-frames_labels.csv"), + { + "region_shape_attributes": { + "name": np.array(["rect"] * 3), + "x_y": np.array( + [ + [526.2366942646654, 393.280914246804], + [2565, 468], + [759.6484377108334, 136.60946673708338], + ] + ).reshape(-1, 2), + "width_height": np.array( + [[46, 38], [41, 30], [29, 25]] + ).reshape(-1, 2), + }, + "region_attributes": { + "track": np.array([71, 70, 69]), + }, + }, + ), ( - "file_attributes", - ["clip"], - int, - np.array([123] * 3), # .reshape(-1, 1), + # single animal VIA tracks file + pytest.DATA_PATHS.get("VIA_single-crab_MOCA-crab-1.csv"), + { + "region_shape_attributes": { + "name": np.array(["rect"] * 3), + "x_y": np.array( + [ + [957.85, 296.708], + [919.149, 296.708], + [889.317, 303.158], + ] + ).reshape(-1, 2), + "width_height": np.array( + [ + [320.09, 153.191], + [357.984, 158.835], + [366.853, 162.867], + ] + ).reshape(-1, 2), + }, + "region_attributes": { + "track": np.ones((3,)), + }, + }, ), + ], + indirect=["df_input_via_tracks_small"], +) +@pytest.mark.parametrize( + "via_column_name, list_keys, cast_fn", + [ ( "region_shape_attributes", ["name"], str, - np.array(["rect"] * 3), # .reshape(-1, 1), ), ( "region_shape_attributes", ["x", "y"], float, - np.array( - [ - [526.2366942646654, 393.280914246804], - [2565, 468], - [759.6484377108334, 136.60946673708338], - ] - ).reshape(-1, 2), ), ( "region_shape_attributes", ["width", "height"], float, - np.array([[46, 38], [41, 30], [29, 25]]).reshape(-1, 2), ), ( "region_attributes", ["track"], int, - np.array([71, 70, 69]), # .reshape(-1, 1), ), ], ) def test_via_attribute_column_to_numpy( df_input_via_tracks_small, + expected_attributes_dict, via_column_name, list_keys, cast_fn, - expected_attribute_array, ): """Test that the function correctly extracts the desired data from the VIA attributes. @@ -320,9 +420,13 @@ def test_via_attribute_column_to_numpy( cast_fn=cast_fn, ) + expected_attribute_array = expected_attributes_dict[via_column_name][ + "_".join(list_keys) + ] assert np.array_equal(attribute_array, expected_attribute_array) +# TODO @pytest.mark.parametrize( "df_input, expected_array", [ @@ -345,6 +449,7 @@ def test_extract_confidence_from_via_tracks_df( assert np.array_equal(confidence_array, expected_array, equal_nan=True) +# TODO @pytest.mark.parametrize( "df_input, expected_array", [ @@ -370,6 +475,16 @@ def test_extract_frame_number_from_via_tracks_df( assert np.array_equal(frame_array, expected_array) +@pytest.mark.parametrize( + "via_tracks_file, frame_array_from_file", + [ + ("multi_animal_via_tracks_file", np.array(range(1, 6))), + ( + "single_animal_via_tracks_file", + np.array(list(range(0, 168, 5)) + [167]), + ), + ], +) @pytest.mark.parametrize( "fps, expected_fps, expected_time_unit", [ @@ -383,27 +498,26 @@ def test_extract_frame_number_from_via_tracks_df( @pytest.mark.parametrize("use_frame_numbers_from_file", [True, False]) def test_fps_and_time_coords( via_tracks_file, + frame_array_from_file, fps, expected_fps, expected_time_unit, use_frame_numbers_from_file, + request, ): - """Test that fps conversion is as expected and time coordinates are set - according to the expected fps. + """Test that fps conversion is as expected, and time coordinates are set + according to the input "fps" and the "use_frame_numbers_from_file" + parameters. """ + via_tracks_file = request.getfixturevalue(via_tracks_file) + + # load dataset with inputs ds = load_bboxes.from_via_tracks_file( via_tracks_file, fps=fps, use_frame_numbers_from_file=use_frame_numbers_from_file, ) - # load dataset with frame numbers from file - ds_in_frames_from_file = load_bboxes.from_via_tracks_file( - via_tracks_file, - fps=None, - use_frame_numbers_from_file=True, - ) - # check time unit assert ds.time_unit == expected_time_unit @@ -413,26 +527,39 @@ def test_fps_and_time_coords( else: assert ds.fps == expected_fps - # check time coordinates + # check loading frame numbers from file if use_frame_numbers_from_file: - start_frame = ds_in_frames_from_file.coords["time"].data[0] + assert_time_coordinates( + ds, expected_fps, frame_array=frame_array_from_file + ) else: - start_frame = 0 - assert_time_coordinates(ds, expected_fps, start_frame) + assert_time_coordinates(ds, expected_fps, start_frame=0) -def test_df_from_via_tracks_file(via_tracks_file): - """Test that the helper function correctly reads the VIA tracks .csv file - as a dataframe. +@pytest.mark.parametrize( + "via_tracks_file, expected_n_frames, expected_n_individuals", + [ + ("multi_animal_via_tracks_file", 5, 86), + ("single_animal_via_tracks_file", 35, 1), + ], +) +def test_df_from_via_tracks_file( + via_tracks_file, expected_n_frames, expected_n_individuals, request +): + """Test that the `_df_from_via_tracks_file` helper function correctly + reads the VIA tracks .csv file as a dataframe. """ + via_tracks_file = request.getfixturevalue(via_tracks_file) + df = load_bboxes._df_from_via_tracks_file( file_path=via_tracks_file, ) assert isinstance(df, pd.DataFrame) - assert len(df.frame_number.unique()) == 5 + assert len(df.frame_number.unique()) == expected_n_frames + assert len(df.ID.unique()) == expected_n_individuals assert ( - df.shape[0] == len(df.ID.unique()) * 5 + df.shape[0] == len(df.ID.unique()) * expected_n_frames ) # all individuals in all frames (even if nan) assert list(df.columns) == [ "ID", @@ -445,10 +572,16 @@ def test_df_from_via_tracks_file(via_tracks_file): ] -def test_position_numpy_array_from_via_tracks_file(via_tracks_file): +@pytest.mark.parametrize( + "via_tracks_file", + ["multi_animal_via_tracks_file", "single_animal_via_tracks_file"], +) +def test_position_numpy_array_from_via_tracks_file(via_tracks_file, request): """Test the extracted position array from the VIA tracks .csv file represents the centroid of the bbox. """ + via_tracks_file = request.getfixturevalue(via_tracks_file) + # Extract numpy arrays from VIA tracks .csv file bboxes_arrays = load_bboxes._numpy_arrays_from_via_tracks_file( via_tracks_file