From 03ddcc7f65229a0c09841f024499c089fb3f1b4f Mon Sep 17 00:00:00 2001 From: iback Date: Thu, 12 Dec 2024 12:23:29 +0000 Subject: [PATCH] increased panoptica statistics utility --- panoptica/panoptica_statistics.py | 55 ++++++------------- panoptica/utils/processing_pair.py | 84 +++++++----------------------- 2 files changed, 36 insertions(+), 103 deletions(-) diff --git a/panoptica/panoptica_statistics.py b/panoptica/panoptica_statistics.py index 7495054..ff66f37 100644 --- a/panoptica/panoptica_statistics.py +++ b/panoptica/panoptica_statistics.py @@ -86,9 +86,7 @@ def from_file(cls, file: str): rows = [row for row in rd] header = rows[0] - assert ( - header[0] == "subject_name" - ), "First column is not subject_names, something wrong with the file?" + assert header[0] == "subject_name", "First column is not subject_names, something wrong with the file?" keys_in_order = list([tuple(c.split("-")) for c in header[1:]]) metric_names = [] @@ -129,19 +127,13 @@ def from_file(cls, file: str): return Panoptica_Statistic(subj_names=subj_names, value_dict=value_dict) def _assertgroup(self, group): - assert ( - group in self.__groupnames - ), f"group {group} not existent, only got groups {self.__groupnames}" + assert group in self.__groupnames, f"group {group} not existent, only got groups {self.__groupnames}" def _assertmetric(self, metric): - assert ( - metric in self.__metricnames - ), f"metric {metric} not existent, only got metrics {self.__metricnames}" + assert metric in self.__metricnames, f"metric {metric} not existent, only got metrics {self.__metricnames}" def _assertsubject(self, subjectname): - assert ( - subjectname in self.__subj_names - ), f"subject {subjectname} not in list of subjects, got {self.__subj_names}" + assert subjectname in self.__subj_names, f"subject {subjectname} not in list of subjects, got {self.__subj_names}" def get(self, group, metric, remove_nones: bool = False) -> list[float]: """Returns the list of values for given group and metric @@ -174,10 +166,7 @@ def get_one_subject(self, subjectname: str): """ self._assertsubject(subjectname) sidx = self.__subj_names.index(subjectname) - return { - g: {m: self.get(g, m)[sidx] for m in self.__metricnames} - for g in self.__groupnames - } + return {g: {m: self.get(g, m)[sidx] for m in self.__metricnames} for g in self.__groupnames} def get_across_groups(self, metric) -> list[float]: """Given metric, gives list of all values (even across groups!) Treat with care! @@ -206,13 +195,8 @@ def get_summary_across_groups(self) -> dict[str, ValueSummary]: summary_dict[m] = ValueSummary(value_list) return summary_dict - def get_summary_dict( - self, include_across_group: bool = True - ) -> dict[str, dict[str, ValueSummary]]: - summary_dict = { - g: {m: self.get_summary(g, m) for m in self.__metricnames} - for g in self.__groupnames - } + def get_summary_dict(self, include_across_group: bool = True) -> dict[str, dict[str, ValueSummary]]: + summary_dict = {g: {m: self.get_summary(g, m) for m in self.__metricnames} for g in self.__groupnames} if include_across_group: summary_dict["across_groups"] = self.get_summary_across_groups() return summary_dict @@ -257,10 +241,7 @@ def get_summary_figure( _type_: _description_ """ orientation = "h" if horizontal else "v" - data_plot = { - g: np.asarray(self.get(g, metric, remove_nones=True)) - for g in self.__groupnames - } + data_plot = {g: np.asarray(self.get(g, metric, remove_nones=True)) for g in self.__groupnames} if manual_metric_range is not None: assert manual_metric_range[0] < manual_metric_range[1], manual_metric_range change = (manual_metric_range[1] - manual_metric_range[0]) / 100 @@ -293,6 +274,7 @@ def make_curve_over_setups( fig: None = None, plot_dotsize: int | None = None, plot_lines: bool = True, + plot_std: bool = False, ): if groups is None: groups = list(statistics_dict.values())[0].groupnames @@ -303,9 +285,7 @@ def make_curve_over_setups( alternate_groupnames = [alternate_groupnames] # for setupname, stat in statistics_dict.items(): - assert ( - metric in stat.metricnames - ), f"metric {metric} not in statistic obj {setupname}" + assert metric in stat.metricnames, f"metric {metric} not in statistic obj {setupname}" setupnames = list(statistics_dict.keys()) convert_x_to_digit = True @@ -330,18 +310,19 @@ def make_curve_over_setups( plt.grid("major") # Y values are average metric values in that group and metric for idx, g in enumerate(groups): - Y = [ - ValueSummary(stat.get(g, metric, remove_nones=True)).avg - for stat in statistics_dict.values() - ] + Y = [ValueSummary(stat.get(g, metric, remove_nones=True)).avg for stat in statistics_dict.values()] + Ystd = [ValueSummary(stat.get(g, metric, remove_nones=True)).std for stat in statistics_dict.values()] if plot_lines: - plt.plot( + p = plt.plot( X, Y, label=g if alternate_groupnames is None else alternate_groupnames[idx], ) + if plot_std: + plt.fill_between(X, np.subtract(Y, Ystd), np.add(Y, Ystd), alpha=0.25, edgecolor=p[-1].get_color()) + if plot_dotsize is not None: plt.scatter(X, Y, s=plot_dotsize) @@ -380,9 +361,7 @@ def plot_box( if sort: df_by_spec_count = df_data.groupby(name_method).mean() df_by_spec_count = dict(df_by_spec_count[name_metric].items()) - df_data["mean"] = df_data[name_method].apply( - lambda x: df_by_spec_count[x] * (1 if orientation == "h" else -1) - ) + df_data["mean"] = df_data[name_method].apply(lambda x: df_by_spec_count[x] * (1 if orientation == "h" else -1)) df_data = df_data.sort_values(by="mean") if orientation == "v": fig = px.strip( diff --git a/panoptica/utils/processing_pair.py b/panoptica/utils/processing_pair.py index f6901b7..09d5668 100644 --- a/panoptica/utils/processing_pair.py +++ b/panoptica/utils/processing_pair.py @@ -33,9 +33,7 @@ class _ProcessingPair(ABC): _pred_labels: tuple[int, ...] n_dim: int - def __init__( - self, prediction_arr: np.ndarray, reference_arr: np.ndarray, dtype: type | None - ) -> None: + def __init__(self, prediction_arr: np.ndarray, reference_arr: np.ndarray, dtype: type | None) -> None: """Initializes the processing pair with prediction and reference arrays. Args: @@ -48,12 +46,8 @@ def __init__( self._reference_arr = reference_arr self.dtype = dtype self.n_dim = reference_arr.ndim - self._ref_labels: tuple[int, ...] = tuple( - _unique_without_zeros(reference_arr) - ) # type:ignore - self._pred_labels: tuple[int, ...] = tuple( - _unique_without_zeros(prediction_arr) - ) # type:ignore + self._ref_labels: tuple[int, ...] = tuple(_unique_without_zeros(reference_arr)) # type:ignore + self._pred_labels: tuple[int, ...] = tuple(_unique_without_zeros(prediction_arr)) # type:ignore self.crop: tuple[slice, ...] = None self.is_cropped: bool = False self.uncropped_shape: tuple[int, ...] = reference_arr.shape @@ -75,13 +69,7 @@ def crop_data(self, verbose: bool = False): self._prediction_arr = self._prediction_arr[self.crop] self._reference_arr = self._reference_arr[self.crop] - ( - print( - f"-- Cropped from {self.uncropped_shape} to {self._prediction_arr.shape}" - ) - if verbose - else None - ) + (print(f"-- Cropped from {self.uncropped_shape} to {self._prediction_arr.shape}") if verbose else None) self.is_cropped = True def uncrop_data(self, verbose: bool = False): @@ -92,22 +80,14 @@ def uncrop_data(self, verbose: bool = False): """ if self.is_cropped == False: return - assert ( - self.uncropped_shape is not None - ), "Calling uncrop_data() without having cropped first" + assert self.uncropped_shape is not None, "Calling uncrop_data() without having cropped first" prediction_arr = np.zeros(self.uncropped_shape) prediction_arr[self.crop] = self._prediction_arr self._prediction_arr = prediction_arr reference_arr = np.zeros(self.uncropped_shape) reference_arr[self.crop] = self._reference_arr - ( - print( - f"-- Uncropped from {self._reference_arr.shape} to {self.uncropped_shape}" - ) - if verbose - else None - ) + (print(f"-- Uncropped from {self._reference_arr.shape} to {self.uncropped_shape}") if verbose else None) self._reference_arr = reference_arr self.is_cropped = False @@ -117,9 +97,7 @@ def set_dtype(self, type): Args: dtype (type): Expected integer type for the arrays. """ - assert np.issubdtype( - type, int_type - ), "set_dtype: tried to set dtype to something other than integers" + assert np.issubdtype(type, int_type), "set_dtype: tried to set dtype to something other than integers" self._prediction_arr = self._prediction_arr.astype(type) self._reference_arr = self._reference_arr.astype(type) @@ -211,9 +189,7 @@ def copy(self): ) # type:ignore -def _check_array_integrity( - prediction_arr: np.ndarray, reference_arr: np.ndarray, dtype: type | None = None -): +def _check_array_integrity(prediction_arr: np.ndarray, reference_arr: np.ndarray, dtype: type | None = None): """Validates integrity between two arrays, checking shape, dtype, and consistency with `dtype`. Args: @@ -234,12 +210,8 @@ def _check_array_integrity( assert isinstance(prediction_arr, np.ndarray) and isinstance( reference_arr, np.ndarray ), "prediction and/or reference are not numpy arrays" - assert ( - prediction_arr.shape == reference_arr.shape - ), f"shape mismatch, got {prediction_arr.shape},{reference_arr.shape}" - assert ( - prediction_arr.dtype == reference_arr.dtype - ), f"dtype mismatch, got {prediction_arr.dtype},{reference_arr.dtype}" + assert prediction_arr.shape == reference_arr.shape, f"shape mismatch, got {prediction_arr.shape},{reference_arr.shape}" + # assert prediction_arr.dtype == reference_arr.dtype, f"dtype mismatch, got {prediction_arr.dtype},{reference_arr.dtype}" if dtype is not None: assert ( np.issubdtype(prediction_arr.dtype, dtype) @@ -331,15 +303,11 @@ def __init__( self.matched_instances = matched_instances if missed_reference_labels is None: - missed_reference_labels = list( - [i for i in self._ref_labels if i not in self._pred_labels] - ) + missed_reference_labels = list([i for i in self._ref_labels if i not in self._pred_labels]) self.missed_reference_labels = missed_reference_labels if missed_prediction_labels is None: - missed_prediction_labels = list( - [i for i in self._pred_labels if i not in self._ref_labels] - ) + missed_prediction_labels = list([i for i in self._pred_labels if i not in self._ref_labels]) self.missed_prediction_labels = missed_prediction_labels @property @@ -412,9 +380,7 @@ class InputType(_Enum_Compare): UNMATCHED_INSTANCE = UnmatchedInstancePair MATCHED_INSTANCE = MatchedInstancePair - def __call__( - self, prediction_arr: np.ndarray, reference_arr: np.ndarray - ) -> _ProcessingPair: + def __call__(self, prediction_arr: np.ndarray, reference_arr: np.ndarray) -> _ProcessingPair: return self.value(prediction_arr, reference_arr) @@ -432,9 +398,7 @@ def __init__(self, original_input: _ProcessingPair | None): self._original_input = original_input self._intermediatesteps: dict[str, _ProcessingPair] = {} - def add_intermediate_arr_data( - self, processing_pair: _ProcessingPair, inputtype: InputType - ): + def add_intermediate_arr_data(self, processing_pair: _ProcessingPair, inputtype: InputType): type_name = inputtype.name self.add_intermediate_data(type_name, processing_pair) @@ -444,36 +408,26 @@ def add_intermediate_data(self, key, value): @property def original_prediction_arr(self): - assert ( - self._original_input is not None - ), "Original prediction_arr is None, there are no intermediate steps" + assert self._original_input is not None, "Original prediction_arr is None, there are no intermediate steps" return self._original_input.prediction_arr @property def original_reference_arr(self): - assert ( - self._original_input is not None - ), "Original reference_arr is None, there are no intermediate steps" + assert self._original_input is not None, "Original reference_arr is None, there are no intermediate steps" return self._original_input.reference_arr def prediction_arr(self, inputtype: InputType): type_name = inputtype.name procpair = self[type_name] - assert isinstance( - procpair, _ProcessingPair - ), f"step {type_name} is not a processing pair, error" + assert isinstance(procpair, _ProcessingPair), f"step {type_name} is not a processing pair, error" return procpair.prediction_arr def reference_arr(self, inputtype: InputType): type_name = inputtype.name procpair = self[type_name] - assert isinstance( - procpair, _ProcessingPair - ), f"step {type_name} is not a processing pair, error" + assert isinstance(procpair, _ProcessingPair), f"step {type_name} is not a processing pair, error" return procpair.reference_arr def __getitem__(self, key): - assert ( - key in self._intermediatesteps - ), f"key {key} not in intermediate steps, maybe the step was skipped?" + assert key in self._intermediatesteps, f"key {key} not in intermediate steps, maybe the step was skipped?" return self._intermediatesteps[key]