From af844e563072a39f0c1ffe9b2c9634247c0d79ad Mon Sep 17 00:00:00 2001 From: iback Date: Thu, 18 Apr 2024 14:52:45 +0000 Subject: [PATCH] added relative_volume_difference metric, where average positive means over-segmentation and negative the opposite. added edge case handling for it and refactored some metrics code to be consistent --- panoptica/__init__.py | 1 + panoptica/metrics/__init__.py | 7 +- panoptica/metrics/assd.py | 25 +++ panoptica/metrics/cldice.py | 9 +- panoptica/metrics/dice.py | 9 +- panoptica/metrics/iou.py | 9 +- panoptica/metrics/metrics.py | 27 ++- .../metrics/relative_volume_difference.py | 70 +++++++ panoptica/panoptic_evaluator.py | 7 +- panoptica/panoptic_result.py | 48 +++++ panoptica/utils/edge_case_handling.py | 9 + unit_tests/test_metrics.py | 176 ++++++++++++++++++ 12 files changed, 378 insertions(+), 19 deletions(-) create mode 100644 panoptica/metrics/relative_volume_difference.py create mode 100644 unit_tests/test_metrics.py diff --git a/panoptica/__init__.py b/panoptica/__init__.py index 9e17409..ada7cd0 100644 --- a/panoptica/__init__.py +++ b/panoptica/__init__.py @@ -10,3 +10,4 @@ UnmatchedInstancePair, MatchedInstancePair, ) +from panoptica.metrics import Metric, MetricMode, MetricType diff --git a/panoptica/metrics/__init__.py b/panoptica/metrics/__init__.py index f0e515f..ff6a809 100644 --- a/panoptica/metrics/__init__.py +++ b/panoptica/metrics/__init__.py @@ -1,5 +1,5 @@ from panoptica.metrics.assd import ( - _average_surface_distance, + _compute_instance_average_symmetric_surface_distance, _average_symmetric_surface_distance, ) from panoptica.metrics.cldice import ( @@ -10,6 +10,11 @@ _compute_dice_coefficient, _compute_instance_volumetric_dice, ) +from panoptica.metrics.overunder_segmentation import ( + _compute_instance_segmentation_tendency, + _compute_segmentation_tendency, +) +from panoptica.metrics.relative_volume_difference import _compute_instance_relative_volume_difference, _compute_relative_volume_difference from panoptica.metrics.iou import _compute_instance_iou, _compute_iou from panoptica.metrics.metrics import ( Evaluation_List_Metric, diff --git a/panoptica/metrics/assd.py b/panoptica/metrics/assd.py index 7f030e3..b98b407 100644 --- a/panoptica/metrics/assd.py +++ b/panoptica/metrics/assd.py @@ -3,6 +3,31 @@ from scipy.ndimage._nd_image import euclidean_feature_transform +def _compute_instance_average_symmetric_surface_distance( + ref_labels: np.ndarray, + pred_labels: np.ndarray, + ref_instance_idx: int | None = None, + pred_instance_idx: int | None = None, + voxelspacing=None, + connectivity=1, +): + if ref_instance_idx is None and pred_instance_idx is None: + return _average_symmetric_surface_distance( + reference=ref_labels, + prediction=pred_labels, + voxelspacing=voxelspacing, + connectivity=connectivity, + ) + ref_instance_mask = ref_labels == ref_instance_idx + pred_instance_mask = pred_labels == pred_instance_idx + return _average_symmetric_surface_distance( + reference=ref_instance_mask, + prediction=pred_instance_mask, + voxelspacing=voxelspacing, + connectivity=connectivity, + ) + + def _average_symmetric_surface_distance( reference, prediction, diff --git a/panoptica/metrics/cldice.py b/panoptica/metrics/cldice.py index bdbe1fc..d97a637 100644 --- a/panoptica/metrics/cldice.py +++ b/panoptica/metrics/cldice.py @@ -18,8 +18,8 @@ def cl_score(volume: np.ndarray, skeleton: np.ndarray): def _compute_centerline_dice( ref_labels: np.ndarray, pred_labels: np.ndarray, - ref_instance_idx: int, - pred_instance_idx: int, + ref_instance_idx: int | None = None, + pred_instance_idx: int | None = None, ) -> float: """Compute the centerline Dice (clDice) coefficient between a specific pair of instances. @@ -32,6 +32,11 @@ def _compute_centerline_dice( Returns: float: clDice coefficient """ + if ref_instance_idx is None and pred_instance_idx is None: + return _compute_centerline_dice_coefficient( + reference=ref_labels, + prediction=pred_labels, + ) ref_instance_mask = ref_labels == ref_instance_idx pred_instance_mask = pred_labels == pred_instance_idx return _compute_centerline_dice_coefficient( diff --git a/panoptica/metrics/dice.py b/panoptica/metrics/dice.py index 55e3b3a..ed70f06 100644 --- a/panoptica/metrics/dice.py +++ b/panoptica/metrics/dice.py @@ -4,8 +4,8 @@ def _compute_instance_volumetric_dice( ref_labels: np.ndarray, pred_labels: np.ndarray, - ref_instance_idx: int, - pred_instance_idx: int, + ref_instance_idx: int | None = None, + pred_instance_idx: int | None = None, ) -> float: """ Compute the Dice coefficient between a specific pair of instances. @@ -25,6 +25,11 @@ def _compute_instance_volumetric_dice( float: Dice coefficient between the specified instances. A value between 0 and 1, where higher values indicate better overlap and similarity between instances. """ + if ref_instance_idx is None and pred_instance_idx is None: + return _compute_dice_coefficient( + reference=ref_labels, + prediction=pred_labels, + ) ref_instance_mask = ref_labels == ref_instance_idx pred_instance_mask = pred_labels == pred_instance_idx return _compute_dice_coefficient( diff --git a/panoptica/metrics/iou.py b/panoptica/metrics/iou.py index 7cfbd81..a039c49 100644 --- a/panoptica/metrics/iou.py +++ b/panoptica/metrics/iou.py @@ -4,8 +4,8 @@ def _compute_instance_iou( reference_arr: np.ndarray, prediction_arr: np.ndarray, - ref_instance_idx: int, - pred_instance_idx: int, + ref_instance_idx: int | None = None, + pred_instance_idx: int | None = None, ) -> float: """ Compute Intersection over Union (IoU) between a specific pair of reference and prediction instances. @@ -19,6 +19,11 @@ def _compute_instance_iou( Returns: float: IoU between the specified instances. """ + if ref_instance_idx is None and pred_instance_idx is None: + return _compute_iou( + reference_arr=reference_arr, + prediction_arr=prediction_arr, + ) ref_instance_mask = reference_arr == ref_instance_idx pred_instance_mask = prediction_arr == pred_instance_idx return _compute_iou(ref_instance_mask, pred_instance_mask) diff --git a/panoptica/metrics/metrics.py b/panoptica/metrics/metrics.py index 960e6af..fa40a59 100644 --- a/panoptica/metrics/metrics.py +++ b/panoptica/metrics/metrics.py @@ -5,10 +5,12 @@ import numpy as np from panoptica.metrics import ( - _average_symmetric_surface_distance, - _compute_centerline_dice_coefficient, - _compute_dice_coefficient, - _compute_iou, + _compute_instance_average_symmetric_surface_distance, + _compute_centerline_dice, + _compute_instance_volumetric_dice, + _compute_instance_iou, + _compute_instance_relative_volume_difference, + # _compute_instance_segmentation_tendency, ) from panoptica.utils.constants import _Enum_Compare, auto @@ -89,10 +91,12 @@ class Metric(_Enum_Compare): _type_: _description_ """ - DSC = _Metric("DSC", False, _compute_dice_coefficient) - IOU = _Metric("IOU", False, _compute_iou) - ASSD = _Metric("ASSD", True, _average_symmetric_surface_distance) - clDSC = _Metric("clDSC", False, _compute_centerline_dice_coefficient) + DSC = _Metric("DSC", False, _compute_instance_volumetric_dice) + IOU = _Metric("IOU", False, _compute_instance_iou) + ASSD = _Metric("ASSD", True, _compute_instance_average_symmetric_surface_distance) + clDSC = _Metric("clDSC", False, _compute_centerline_dice) + RVD = _Metric("RVD", True, _compute_instance_relative_volume_difference) + # ST = _Metric("ST", False, _compute_instance_segmentation_tendency) def __call__( self, @@ -166,6 +170,8 @@ class MetricMode(_Enum_Compare): AVG = auto() SUM = auto() STD = auto() + MIN = auto() + MAX = auto() class MetricType(_Enum_Compare): @@ -287,9 +293,14 @@ def __init__( if is_edge_case: self.AVG: float | None = edge_case_result self.SUM: None | float = edge_case_result + self.MIN: None | float = edge_case_result + self.MAX: None | float = edge_case_result else: self.AVG = None if self.ALL is None else np.average(self.ALL) self.SUM = None if self.ALL is None else np.sum(self.ALL) + self.MIN = None if self.ALL is None or len(self.ALL) == 0 else np.min(self.ALL) + self.MAX = None if self.ALL is None or len(self.ALL) == 0 else np.max(self.ALL) + self.STD = ( None if self.ALL is None diff --git a/panoptica/metrics/relative_volume_difference.py b/panoptica/metrics/relative_volume_difference.py new file mode 100644 index 0000000..bbb131b --- /dev/null +++ b/panoptica/metrics/relative_volume_difference.py @@ -0,0 +1,70 @@ +import numpy as np + + +def _compute_instance_relative_volume_difference( + ref_labels: np.ndarray, + pred_labels: np.ndarray, + ref_instance_idx: int | None = None, + pred_instance_idx: int | None = None, +) -> float: + """ + Compute the Dice coefficient between a specific pair of instances. + + The Dice coefficient measures the similarity or overlap between two binary masks representing instances. + It is defined as: + + Dice = (2 * intersection) / (ref_area + pred_area) + + Args: + ref_labels (np.ndarray): Reference instance labels. + pred_labels (np.ndarray): Prediction instance labels. + ref_instance_idx (int): Index of the reference instance. + pred_instance_idx (int): Index of the prediction instance. + + Returns: + float: Dice coefficient between the specified instances. A value between 0 and 1, where higher values + indicate better overlap and similarity between instances. + """ + if ref_instance_idx is None and pred_instance_idx is None: + return _compute_relative_volume_difference( + reference=ref_labels, + prediction=pred_labels, + ) + ref_instance_mask = ref_labels == ref_instance_idx + pred_instance_mask = pred_labels == pred_instance_idx + return _compute_relative_volume_difference( + reference=ref_instance_mask, + prediction=pred_instance_mask, + ) + + +def _compute_relative_volume_difference( + reference: np.ndarray, + prediction: np.ndarray, + *args, +) -> float: + """ + Compute the relative volume difference between two binary masks. + + The relative volume difference is the predicted volume of an instance in relation to the reference volume (>0 oversegmented, <0 undersegmented) + + RVD = ((pred_volume-ref_volume) / ref_volume) + + Args: + reference (np.ndarray): Reference binary mask. + prediction (np.ndarray): Prediction binary mask. + + Returns: + float: Relative volume Error between the two binary masks. A value between 0 and 1, where higher values + indicate better overlap and similarity between masks. + """ + reference_mask = np.sum(reference) + prediction_mask = np.sum(prediction) + + # Handle division by zero + if reference_mask == 0 and prediction_mask == 0: + return 0.0 + + # Calculate Dice coefficient + rvd = (prediction_mask - reference_mask) / reference_mask + return rvd diff --git a/panoptica/panoptic_evaluator.py b/panoptica/panoptic_evaluator.py index 9f60481..5ea18d0 100644 --- a/panoptica/panoptic_evaluator.py +++ b/panoptica/panoptic_evaluator.py @@ -18,15 +18,14 @@ class Panoptic_Evaluator: + def __init__( self, - expected_input: ( - Type[SemanticPair] | Type[UnmatchedInstancePair] | Type[MatchedInstancePair] - ) = MatchedInstancePair, + expected_input: Type[SemanticPair] | Type[UnmatchedInstancePair] | Type[MatchedInstancePair] = MatchedInstancePair, instance_approximator: InstanceApproximator | None = None, instance_matcher: InstanceMatchingAlgorithm | None = None, edge_case_handler: EdgeCaseHandler | None = None, - eval_metrics: list[Metric] = [Metric.DSC, Metric.IOU, Metric.ASSD], + eval_metrics: list[Metric] = [Metric.DSC, Metric.IOU, Metric.ASSD, Metric.RVD], decision_metric: Metric | None = None, decision_threshold: float | None = None, log_times: bool = False, diff --git a/panoptica/panoptic_result.py b/panoptica/panoptic_result.py index 0abbba9..c037e85 100644 --- a/panoptica/panoptic_result.py +++ b/panoptica/panoptic_result.py @@ -14,6 +14,7 @@ _compute_centerline_dice_coefficient, _compute_dice_coefficient, _average_symmetric_surface_distance, + _compute_relative_volume_difference, ) from panoptica.utils import EdgeCaseHandler @@ -142,6 +143,14 @@ def __init__( global_bin_assd, long_name="Global Binary Average Symmetric Surface Distance", ) + # + self.global_bin_rvd: int + self._add_metric( + "global_bin_rvd", + MetricType.GLOBAL, + global_bin_rvd, + long_name="Global Binary Relative Volume Difference", + ) # endregion # # region IOU @@ -232,6 +241,23 @@ def __init__( long_name="Segmentation Quality Assd Standard Deviation", ) # endregion + # + # region RVD + self.sq_rvd: float + self._add_metric( + "sq_rvd", + MetricType.INSTANCE, + sq_rvd, + long_name="Segmentation Quality Relative Volume Difference", + ) + self.sq_rvd_std: float + self._add_metric( + "sq_rvd_std", + MetricType.INSTANCE, + sq_rvd_std, + long_name="Segmentation Quality Relative Volume Difference Standard Deviation", + ) + # endregion ################## # List Metrics # @@ -468,6 +494,18 @@ def sq_assd_std(res: PanopticaResult): # endregion +# region RVD +def sq_rvd(res: PanopticaResult): + return res.get_list_metric(Metric.RVD, mode=MetricMode.AVG) + + +def sq_rvd_std(res: PanopticaResult): + return res.get_list_metric(Metric.RVD, mode=MetricMode.STD) + + +# endregion + + # region Global def global_bin_dsc(res: PanopticaResult): if res.tp == 0: @@ -499,6 +537,16 @@ def global_bin_assd(res: PanopticaResult): return _average_symmetric_surface_distance(ref_binary, pred_binary) +def global_bin_rvd(res: PanopticaResult): + if res.tp == 0: + return 0.0 + pred_binary = res._prediction_arr.copy() + ref_binary = res._reference_arr.copy() + pred_binary[pred_binary != 0] = 1 + ref_binary[ref_binary != 0] = 1 + return _compute_relative_volume_difference(ref_binary, pred_binary) + + # endregion diff --git a/panoptica/utils/edge_case_handling.py b/panoptica/utils/edge_case_handling.py index c474557..e0ecc7e 100644 --- a/panoptica/utils/edge_case_handling.py +++ b/panoptica/utils/edge_case_handling.py @@ -78,6 +78,7 @@ def __str__(self) -> str: class EdgeCaseHandler: + def __init__( self, listmetric_zeroTP_handling: dict[Metric, MetricZeroTPEdgeCaseHandling] = { @@ -85,6 +86,10 @@ def __init__( no_instances_result=EdgeCaseResult.NAN, default_result=EdgeCaseResult.ZERO, ), + Metric.clDSC: MetricZeroTPEdgeCaseHandling( + no_instances_result=EdgeCaseResult.NAN, + default_result=EdgeCaseResult.ZERO, + ), Metric.IOU: MetricZeroTPEdgeCaseHandling( no_instances_result=EdgeCaseResult.NAN, empty_prediction_result=EdgeCaseResult.ZERO, @@ -94,6 +99,10 @@ def __init__( no_instances_result=EdgeCaseResult.NAN, default_result=EdgeCaseResult.INF, ), + Metric.RVD: MetricZeroTPEdgeCaseHandling( + no_instances_result=EdgeCaseResult.NAN, + default_result=EdgeCaseResult.NAN, + ), }, empty_list_std: EdgeCaseResult = EdgeCaseResult.NAN, ) -> None: diff --git a/unit_tests/test_metrics.py b/unit_tests/test_metrics.py new file mode 100644 index 0000000..729b321 --- /dev/null +++ b/unit_tests/test_metrics.py @@ -0,0 +1,176 @@ +# Call 'python -m unittest' on this folder +# coverage run -m unittest +# coverage report +# coverage html +import os +import unittest + +import numpy as np + +from panoptica.metrics import Metric +from panoptica.panoptic_result import MetricCouldNotBeComputedException, PanopticaResult +from panoptica.utils.edge_case_handling import EdgeCaseHandler, EdgeCaseResult + + +def case_simple_identical(): + # trivial 100% overlap + prediction_arr = np.array( + [ + [0, 1, 1, 1], + [0, 1, 1, 1], + [0, 1, 0, 0], + [0, 1, 0, 0], + ] + ) + return prediction_arr, prediction_arr.copy() + + +def case_simple_nooverlap(): + # binary opposites + prediction_arr = np.array( + [ + [0, 1, 0, 1], + [0, 1, 0, 1], + [0, 1, 0, 0], + [0, 1, 0, 0], + ] + ) + reference_arr = 1 - prediction_arr + return prediction_arr, reference_arr + + +def case_simple_overpredicted(): + # reference is real subset of prediction + prediction_arr = np.array( + [ + [0, 0, 1, 0], + [1, 1, 1, 0], + [1, 1, 1, 1], + [0, 1, 1, 0], + ] + ) + reference_arr = np.array( + [ + [0, 0, 0, 0], + [0, 1, 1, 0], + [0, 1, 1, 0], + [0, 0, 0, 0], + ] + ) + return prediction_arr, reference_arr + + +def case_simple_underpredicted(): + # prediction is real subset of reference + prediction_arr = np.array( + [ + [0, 0, 0, 0], + [0, 1, 1, 0], + [0, 1, 1, 0], + [0, 0, 0, 0], + ] + ) + reference_arr = np.array( + [ + [0, 0, 1, 0], + [1, 1, 1, 0], + [1, 1, 1, 1], + [0, 1, 1, 0], + ] + ) + return prediction_arr, reference_arr + + +class Test_RVD(unittest.TestCase): + # case_simple_nooverlap + # case_simple_nooverlap + # case_simple_overpredicted + # case_simple_underpredicted + + def setUp(self) -> None: + os.environ["PANOPTICA_CITATION_REMINDER"] = "False" + return super().setUp() + + def test_rvd_case_simple_identical(self): + + pred_arr, ref_arr = case_simple_identical() + rvd = Metric.RVD(reference_arr=ref_arr, prediction_arr=pred_arr) + self.assertEqual(rvd, 0.0) + + def test_rvd_case_simple_nooverlap(self): + + pred_arr, ref_arr = case_simple_nooverlap() + rvd = Metric.RVD(reference_arr=ref_arr, prediction_arr=pred_arr) + self.assertEqual(rvd, -0.4) + + def test_rvd_case_simple_overpredicted(self): + + pred_arr, ref_arr = case_simple_overpredicted() + rvd = Metric.RVD(reference_arr=ref_arr, prediction_arr=pred_arr) + self.assertEqual(rvd, 1.5) + + def test_rvd_case_simple_underpredicted(self): + + pred_arr, ref_arr = case_simple_underpredicted() + rvd = Metric.RVD(reference_arr=ref_arr, prediction_arr=pred_arr) + self.assertEqual(rvd, -0.6) + + +class Test_DSC(unittest.TestCase): + def setUp(self) -> None: + os.environ["PANOPTICA_CITATION_REMINDER"] = "False" + return super().setUp() + + def test_dsc_case_simple_identical(self): + + pred_arr, ref_arr = case_simple_identical() + dsc = Metric.DSC(reference_arr=ref_arr, prediction_arr=pred_arr) + self.assertEqual(dsc, 1.0) + + def test_dsc_case_simple_nooverlap(self): + + pred_arr, ref_arr = case_simple_nooverlap() + dsc = Metric.DSC(reference_arr=ref_arr, prediction_arr=pred_arr) + self.assertEqual(dsc, 0.0) + + def test_dsc_case_simple_overpredicted(self): + + pred_arr, ref_arr = case_simple_overpredicted() + dsc = Metric.DSC(reference_arr=ref_arr, prediction_arr=pred_arr) + self.assertEqual(dsc, 0.5714285714285714) + + def test_dsc_case_simple_underpredicted(self): + + pred_arr, ref_arr = case_simple_underpredicted() + dsc = Metric.DSC(reference_arr=ref_arr, prediction_arr=pred_arr) + self.assertEqual(dsc, 0.5714285714285714) + + +# class Test_ST(unittest.TestCase): +# def setUp(self) -> None: +# os.environ["PANOPTICA_CITATION_REMINDER"] = "False" +# return super().setUp() + +# def test_st_case_simple_identical(self): +# +# pred_arr, ref_arr = case_simple_identical() +# st = Metric.ST(reference_arr=ref_arr, prediction_arr=pred_arr) +# self.assertEqual(st, 0.0) + +# def test_st_case_simple_nooverlap(self): +# +# pred_arr, ref_arr = case_simple_nooverlap() +# st = Metric.ST(reference_arr=ref_arr, prediction_arr=pred_arr) +# self.assertEqual(st, -0.4) +# +# def test_st_case_simple_overpredicted(self): +# +# pred_arr, ref_arr = case_simple_overpredicted() +# st = Metric.ST(reference_arr=ref_arr, prediction_arr=pred_arr) +# self.assertEqual(st, 0.5714285714285714) +# +# def test_st_case_simple_underpredicted(self): +# +# pred_arr, ref_arr = case_simple_underpredicted() +# st = Metric.ST(reference_arr=ref_arr, prediction_arr=pred_arr) +# self.assertEqual(st, 0.5714285714285714)