Skip to content

Commit

Permalink
Merge pull request #19 from fraunhoferportugal/dev
Browse files Browse the repository at this point in the history
Tabular docs and Pypi depdency fix
  • Loading branch information
ivo-facoco authored Nov 27, 2024
2 parents 2e0c160 + 43d6519 commit df8586b
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 86 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,6 @@ If you publish work that uses pyMDMA, please cite pyMDMA as follows:
```

## Acknowledgments
This work was funded by AISym4Med project number 101095387, supported by the European Heath and Digital Executive Agency (HADEA), granting authority under the powers delegated by the Europeam Commision. More information on this project can be found [here](https://aisym4med.eu/).
This work was funded by AISym4Med project number 101095387, supported by the European Health and Digital Executive Agency (HADEA), granting authority under the powers delegated by the European Commision. More information on this project can be found [here](https://aisym4med.eu/).

This work was supported by European funds through the Recovery and Resilience Plan, project ”Center for Responsible AI”, project number C645008882-00000055. Learn more about this project [here](https://centerforresponsible.ai/).
1 change: 0 additions & 1 deletion docs/image/input_val.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

## Data-based
### Quality (No-reference)
::: pymdma.image.measures.input_val.DOM
::: pymdma.image.measures.input_val.Tenengrad
::: pymdma.image.measures.input_val.TenengradRelative
::: pymdma.image.measures.input_val.EME
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ gudhi = {version = ">=3.9.0, <=4.0.0"}
scikit-learn = {version = ">1.4.0"}

# Image dependencies
pydom = {git = "https://github.com/umang-singhal/pydom.git", rev = "2554af8d0", optional = true}
# pydom = {git = "https://github.com/umang-singhal/pydom.git", rev = "2554af8d0", optional = true}
torchvision = {version = ">=0.15.2, <0.19.0", optional = true}
torchmetrics = {version = ">=1.3.2, <1.4.0", extras = ["image"], optional = true}
pycocotools = {version = ">=2.0.8", optional = true}
Expand Down
159 changes: 80 additions & 79 deletions src/pymdma/image/measures/input_val/data/no_reference.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Literal, Tuple, Union

import cv2
import dom as _dom
# import dom as _dom
import numpy as np
import torch
from PIL import Image, ImageEnhance
Expand All @@ -17,84 +17,84 @@
# TODO review documentations and attributes


class DOM(Metric):
"""Computes DOM sharpness score for an image. It is effective in detecting
motion-blur, de-focused images or inherent properties of imaging system.
**Objective**: Sharpness
Parameters
----------
width : int, optional, default=2
Width of the edge filter.
sharpness_threshold : int, optional, default=2
Threshold for considering if a pixel is sharp or not.
edge_threshold : float, optional, default=0.0001
Threshold for edge.
**kwargs : dict, optional
Additional keyword arguments for compatibility.
References
----------
Kumar et al., Sharpness estimation for document and scene images (2012).
https://ieeexplore.ieee.org/document/6460868
Code was adapted from:
pydom, Sharpness Estimation for Document and Scene Images.
https://github.com/umang-singhal/pydom
Examples
--------
>>> dom = DOM()
>>> imgs = np.random.rand(20, 100, 100, 3) # (N, H, W, C)
>>> result: MetricResult = dom.compute(imgs)
"""

reference_type = ReferenceType.NONE
evaluation_level = EvaluationLevel.INSTANCE
metric_group = MetricGroup.QUALITY

higher_is_better: bool = True
min_value: float = 0.0
max_value: float = 1.0

def __init__(
self,
width: int = 2,
sharpness_threshold: int = 2,
edge_threshold: float = 0.0001,
**kwargs,
):
super().__init__(**kwargs)
self._dom = _dom.DOM()
self.width = width
self.sharpness_threshold = sharpness_threshold
self.edge_threshold = edge_threshold

def compute(
self,
imgs: np.ndarray,
**kwargs,
) -> MetricResult:
"""Computes DOM score for an image.
Parameters
----------
imgs : {(N, H, W, C) ndarray, (N, H, W) ndarray}
List of arrays representing RGB or grayscale image of shape (H, W, C) or (H, W), respectively.
Returns
-------
result: MetricResult
DOM score for each image.
"""
scores = [
self._dom.get_sharpness(img, self.width, self.sharpness_threshold, self.edge_threshold) for img in imgs
]

return DistributionResult(
instance_level={"dtype": OutputsTypes.ARRAY, "subtype": "float", "value": scores},
)
# class DOM(Metric):
# """Computes DOM sharpness score for an image. It is effective in detecting
# motion-blur, de-focused images or inherent properties of imaging system.

# **Objective**: Sharpness

# Parameters
# ----------
# width : int, optional, default=2
# Width of the edge filter.
# sharpness_threshold : int, optional, default=2
# Threshold for considering if a pixel is sharp or not.
# edge_threshold : float, optional, default=0.0001
# Threshold for edge.
# **kwargs : dict, optional
# Additional keyword arguments for compatibility.

# References
# ----------
# Kumar et al., Sharpness estimation for document and scene images (2012).
# https://ieeexplore.ieee.org/document/6460868

# Code was adapted from:
# pydom, Sharpness Estimation for Document and Scene Images.
# https://github.com/umang-singhal/pydom

# Examples
# --------
# >>> dom = DOM()
# >>> imgs = np.random.rand(20, 100, 100, 3) # (N, H, W, C)
# >>> result: MetricResult = dom.compute(imgs)
# """

# reference_type = ReferenceType.NONE
# evaluation_level = EvaluationLevel.INSTANCE
# metric_group = MetricGroup.QUALITY

# higher_is_better: bool = True
# min_value: float = 0.0
# max_value: float = 1.0

# def __init__(
# self,
# width: int = 2,
# sharpness_threshold: int = 2,
# edge_threshold: float = 0.0001,
# **kwargs,
# ):
# super().__init__(**kwargs)
# self._dom = _dom.DOM()
# self.width = width
# self.sharpness_threshold = sharpness_threshold
# self.edge_threshold = edge_threshold

# def compute(
# self,
# imgs: np.ndarray,
# **kwargs,
# ) -> MetricResult:
# """Computes DOM score for an image.

# Parameters
# ----------
# imgs : {(N, H, W, C) ndarray, (N, H, W) ndarray}
# List of arrays representing RGB or grayscale image of shape (H, W, C) or (H, W), respectively.

# Returns
# -------
# result: MetricResult
# DOM score for each image.
# """
# scores = [
# self._dom.get_sharpness(img, self.width, self.sharpness_threshold, self.edge_threshold) for img in imgs
# ]

# return DistributionResult(
# instance_level={"dtype": OutputsTypes.ARRAY, "subtype": "float", "value": scores},
# )


class Tenengrad(Metric):
Expand Down Expand Up @@ -380,6 +380,7 @@ def compute(
# TODO documentation
class ExposureBrightness(Metric):
"""Computes Exposure and Brightness level Metric.
Values higher than 1 indicate overexposure, while values closer to 0 indicate underexposure.
**Objective**: Exposure and Brightness
Expand Down
5 changes: 5 additions & 0 deletions src/pymdma/tabular/measures/input_val/data/privacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ class KAnonymityScore(Metric):
**kwargs : dict
Additional keyword arguments passed to the parent class.
References
----------
Díaz and García, A python library to check the level of anonymity of a dataset. (2022).
http://dx.doi.org/10.1038/s41597-022-01894-2
Returns
-------
MetricResult
Expand Down
25 changes: 25 additions & 0 deletions src/pymdma/tabular/measures/input_val/data/quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ class CorrelationScore(Metric):
**kwargs : dict
Additional keyword arguments for compatibility or future use.
References
----------
Shrestha, Detecting multicollinearity in regression analysis (2020).
http://pubs.sciepub.com/ajams/8/2/1
Returns
-------
MetricResult
Expand Down Expand Up @@ -148,6 +153,11 @@ class UniquenessScore(Metric):
**kwargs : dict
Additional keyword arguments for compatibility or future use.
References
----------
Sukhobok, Tabular data anomaly patterns (2017).
https://ieeexplore.ieee.org/document/8316296
Returns
-------
MetricResult
Expand Down Expand Up @@ -390,6 +400,11 @@ class OutlierScore(Metric):
**kwargs : dict
Additional keyword arguments passed to the parent class.
References
----------
Iglewicz, B. and Hoaglin, D. (1993) The ASQC Basic References in Quality Control: Statistical Techniques.
In: Mykytka, E.F., Eds., How to Detect and Handle Outliers, ASQC Quality Press, Milwaukee, Vol. 16
Returns
-------
MetricResult
Expand Down Expand Up @@ -516,6 +531,11 @@ class MissingScore(Metric):
**kwargs : dict
Additional keyword arguments passed to the parent class.
References
----------
Taleb et al., Big data quality: A quality dimensions evaluation (2016).
https://ieeexplore.ieee.org/document/7816918
Returns
-------
MetricResult
Expand Down Expand Up @@ -691,6 +711,11 @@ class VIFactorScore(Metric):
**kwargs : dict
Additional keyword arguments passed to the parent class.
References
----------
Marcoulides and Raykov, Evaluation of variance inflation factors in regression models using latent variable modeling methods (2019).
https://pmc.ncbi.nlm.nih.gov/articles/PMC6713981/
Returns
-------
MetricResult
Expand Down
21 changes: 18 additions & 3 deletions src/pymdma/tabular/measures/synthesis_val/data/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class StatisticalSimScore(Metric):
This metric assesses how closely the statistical properties of the synthetic dataset
resemble those of the real dataset, providing a fidelity measure for synthetic data generation.
**Objective**: Similarity
**Objective**: Fidelity
Parameters
----------
Expand All @@ -28,6 +28,11 @@ class StatisticalSimScore(Metric):
**kwargs : dict
Additional keyword arguments passed to the parent class.
References
----------
Yang et al., Structured evaluation of synthetic tabular data (2024).
https://arxiv.org/abs/2403.10424
Returns
-------
MetricResult
Expand Down Expand Up @@ -204,7 +209,7 @@ class StatisiticalDivergenceScore(Metric):
"""Computes a statistical divergence score for each column, specifically
the Jensen-Shannon (JS) and Kullback-Leibler (KL) divergence scores.
**Objective**: Similarity
**Objective**: Fidelity
Parameters
----------
Expand All @@ -216,6 +221,11 @@ class StatisiticalDivergenceScore(Metric):
**kwargs : dict
Additional keyword arguments passed to the parent class.
References
----------
Fonseca and Bacao, Tabular and latent space synthetic data generation: a literature review (2023).
https://doi.org/10.1186/s40537-023-00792-7
Returns
-------
MetricResult
Expand Down Expand Up @@ -400,7 +410,7 @@ class CoherenceScore(Metric):
target and synthetic datasets. A higher coherence score indicates better
fidelity between the datasets in terms of their correlation structures.
**Objective**: Similarity
**Objective**: Fidelity
Parameters
----------
Expand All @@ -413,6 +423,11 @@ class CoherenceScore(Metric):
**kwargs : dict
Additional keyword arguments passed to the parent class.
References
----------
Yang et al., Structured evaluation of synthetic tabular data (2024).
https://arxiv.org/abs/2403.10424
Returns
-------
MetricResult
Expand Down
10 changes: 10 additions & 0 deletions src/pymdma/tabular/measures/synthesis_val/feature/privacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ class NNDRPrivacy(FeatureMetric):
**kwargs : dict
Additional keyword arguments passed to the parent class.
References
----------
Liu et al., Scaling while privacy preserving: A comprehensive synthetic tabular data generation and evaluation in learning analytics (2024).
https://doi.org/10.1145/3636555.3636921
Returns
-------
MetricResult
Expand Down Expand Up @@ -123,6 +128,11 @@ class DCRPrivacy(FeatureMetric):
**kwargs : dict
Additional keyword arguments passed to the parent class.
References
----------
Liu et al., Scaling while privacy preserving: A comprehensive synthetic tabular data generation and evaluation in learning analytics (2024).
https://doi.org/10.1145/3636555.3636921
Returns
-------
MetricResult
Expand Down
2 changes: 1 addition & 1 deletion tests/test_ts_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ def test_distribution_shift(metric_name, sample_distribution, expected_upper, si
(synth_distance_metrics.CosineSimilarity, 0.8370494332671239),
(synth_shared_metrics.PrecisionRecallDistribution, (0.6881853042325229, 0.6920392785323591)),
(synth_shared_metrics.FrechetDistance, 0.5000000060902672),
(synth_shared_metrics.MultiScaleIntrinsicDistance, 24.476226229017197),
# (synth_shared_metrics.MultiScaleIntrinsicDistance, 24.476226229017197),
(synth_shared_metrics.Authenticity, 0.5),
(synth_shared_metrics.ImprovedPrecision, 1.0),
(synth_shared_metrics.ImprovedRecall, 0.8),
Expand Down

0 comments on commit df8586b

Please sign in to comment.