From ecd0bdbb12ebe84190a3c2b84657cbc686c65405 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 4 Dec 2024 18:36:10 +0000 Subject: [PATCH] Simplify frame extraction --- movement/validators/files.py | 109 ++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 46 deletions(-) diff --git a/movement/validators/files.py b/movement/validators/files.py index 59cbb5c7..2769054f 100644 --- a/movement/validators/files.py +++ b/movement/validators/files.py @@ -304,55 +304,15 @@ def _file_contains_valid_frame_numbers(self, attribute, value): # If 'frame' is a file_attribute for all files: # extract frame number - list_frame_numbers = [] if all(["frame" in d for d in file_attributes_dicts]): - for k_i, k in enumerate(file_attributes_dicts): - try: - list_frame_numbers.append(int(k["frame"])) - except Exception as e: - raise log_error( - ValueError, - f"{df.filename.iloc[k_i]} (row {k_i}): " - "'frame' file attribute cannot be cast as an integer. " - f"Please review the file attributes: {k}.", - ) from e - + list_frame_numbers = ( + self._extract_frame_numbers_from_file_attributes( + df, file_attributes_dicts + ) + ) # else: extract frame number from filename. else: - for f_i, f in enumerate(df["filename"]): - # try compiling the frame regexp - try: - regex_match = re.search(self.frame_regexp, f) - except re.error as e: - raise log_error( - re.error, - "The provided regular expression for the frame " - f"numbers ({self.frame_regexp}) could not be compiled." - " Please review its syntax.", - ) from e - - # try extracting the frame number from the filename using the - # compiled regexp - try: - list_frame_numbers.append(int(regex_match.group(1))) # type: ignore - except AttributeError as e: - raise log_error( - AttributeError, - f"{f} (row {f_i}): The frame regexp did not " - "return any matches and a frame number could not " - "be extracted from the filename. If included in " - "the filename, the frame number is expected as a " - "zero-padded integer before the file extension " - "(e.g. 00234.png).", - ) from e - except ValueError as e: - raise log_error( - ValueError, - f"{f} (row {f_i}): " - "The frame number extracted from the filename using " - f"the provided regexp ({self.frame_regexp}) could not " - "be cast as an integer.", - ) from e + list_frame_numbers = self._extract_frame_numbers_using_regexp(df) # Check we have as many unique frame numbers as unique image files if len(set(list_frame_numbers)) != len(df.filename.unique()): @@ -364,6 +324,63 @@ def _file_contains_valid_frame_numbers(self, attribute, value): "file. ", ) + def _extract_frame_numbers_from_file_attributes( + self, df, file_attributes_dicts + ): + """Get frame numbers from the 'frame' key under 'file_attributes'.""" + list_frame_numbers = [] + for k_i, k in enumerate(file_attributes_dicts): + try: + list_frame_numbers.append(int(k["frame"])) + except Exception as e: + raise log_error( + ValueError, + f"{df.filename.iloc[k_i]} (row {k_i}): " + "'frame' file attribute cannot be cast as an integer. " + f"Please review the file attributes: {k}.", + ) from e + return list_frame_numbers + + def _extract_frame_numbers_using_regexp(self, df): + """Get frame numbers from the file names using the provided regexp.""" + list_frame_numbers = [] + for f_i, f in enumerate(df["filename"]): + # try compiling the frame regexp + try: + regex_match = re.search(self.frame_regexp, f) + except re.error as e: + raise log_error( + re.error, + "The provided regular expression for the frame " + f"numbers ({self.frame_regexp}) could not be compiled." + " Please review its syntax.", + ) from e + + # try extracting the frame number from the filename using the + # compiled regexp + try: + list_frame_numbers.append(int(regex_match.group(1))) # type: ignore + except AttributeError as e: + raise log_error( + AttributeError, + f"{f} (row {f_i}): The frame regexp did not " + "return any matches and a frame number could not " + "be extracted from the filename. If included in " + "the filename, the frame number is expected as a " + "zero-padded integer before the file extension " + "(e.g. 00234.png).", + ) from e + except ValueError as e: + raise log_error( + ValueError, + f"{f} (row {f_i}): " + "The frame number extracted from the filename using " + f"the provided regexp ({self.frame_regexp}) could not " + "be cast as an integer.", + ) from e + + return list_frame_numbers + @path.validator def _file_contains_tracked_bboxes(self, attribute, value): """Ensure that the VIA tracks .csv contains tracked bounding boxes.