diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7d9c5cc..ded9ee6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -153,7 +153,7 @@ repos: name: isort - sort Python imports description: Library to sort imports - repo: https://github.com/asottile/pyupgrade - rev: v3.18.0 + rev: v3.19.0 hooks: - id: pyupgrade name: pyupgrade - upgrade syntax for newer versions of the language @@ -169,7 +169,7 @@ repos: # Notebooks ########################################################################################## - repo: https://github.com/nbQA-dev/nbQA - rev: 1.8.7 + rev: 1.9.1 hooks: - id: nbqa-flake8 name: nbqa-flake8 - Python linting (notebooks) @@ -191,7 +191,7 @@ repos: # - id: nbqa-pydocstyle # additional_dependencies: [pydocstyle, toml==0.10.2] - repo: https://github.com/kynan/nbstripout - rev: 0.7.1 + rev: 0.8.1 hooks: - id: nbstripout name: nbstripout - strip outputs from notebooks @@ -270,7 +270,7 @@ repos: # Documentation ########################################################################################## - repo: https://github.com/executablebooks/mdformat - rev: 0.7.17 + rev: 0.7.19 hooks: - id: mdformat name: mdformat - Markdown formatter that can be used to enforce a consistent style in Markdown files diff --git a/CHANGELOG.md b/CHANGELOG.md index 24a3c1e..1bb30dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,11 +3,31 @@ All notable changes to this project will be documented in this file. This format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.5] - 2024-11-29 +Introduce new time-series metrics and documentation updates. + +### Added + - `DTW` metric for time-series synthetic data + - `CrossCorrelation` metric for time-series synthetic data + - Time-series example notebook + - PyPI ckassifiers for development status and machine learning topics + - Documentation page in pyproject.toml + - `fastdtw` dependency for the `DTW` metric + +### Changed + - removed `pydom` dependency from the `DOM` metric + - DOM metric now has a local implementation + +### Fixed + - PyPI security issues due to direct external `pydom` dependency + + ## [0.1.4] - 2024-11-21 Taxonomy rework and documentation updates. ### Added - readthedocs slug in the README file + - References to tabular metrics ### Changed - Renamed `validation_type` to `validation_domain` diff --git a/DEVELOPER.md b/DEVELOPER.md index a499a97..6ae929d 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -4,11 +4,11 @@ This project uses [Conda](https://anaconda.org/anaconda/python) to manage Python There are three main modalities: `image`, `time_series`, and `tabular`. Each modality has its own folder/submodule in the `pymdma` package. The `general` and `common` modules contain the main classes definitions used in the API and on the package version of the project. -Each modality dependency is defined as an extra in the [pyproject](pyproject.toml) configuration file. Development dependencies are defined as poetry groups in the same file. More information about packaging and dependencies can be found below. +Each modality dependency is defined as an extra in the [pyproject](https://python-poetry.org/docs/pyproject/) configuration file. Development dependencies are defined as poetry groups in the same file. More information about packaging and dependencies can be found below. The `scripts` folder contains shell scripts that can be used to automate common tasks. You can find some examples of execution in this folder. Additionally, the `notebooks` folder contains Jupyter notebooks with examples of how to import and use the package. -We also provide a docker image to run a REST API version of the repository. The docker image is built using the [Dockerfile](Dockerfile) in the root of the repository. +We also provide a docker image to run a REST API version of the repository. The docker image is built using the [Dockerfile](https://docs.docker.com/reference/dockerfile/) in the root of the repository. A coding standard is enforced using [Black](https://github.com/psf/black), [isort](https://pypi.org/project/isort/) and [Flake8](https://flake8.pycqa.org/en/latest/). Python 3 type hinting is validated using @@ -25,7 +25,7 @@ ensure you're not pushing any passwords or sensitive information into your Bitbu Commits are rejected if the tool matches any of the configured regular expression patterns that indicate that sensitive information has been stored improperly. -We use [mkdocs](https://www.mkdocs.org) with the [Numpydocs](https://numpydoc.readthedocs.io/en/latest/format.html) style for building documentation. More information on how to build the documentation can be found below. +We use [mkdocs](https://www.mkdocs.org) with the [Numpydocs](https://numpydoc.readthedocs.io/en/latest/format.html) style for building documentation. More information on how to build the documentation can be found below. ## Prerequisites @@ -46,6 +46,7 @@ bash Miniconda3-latest-Linux-x86_64.sh ``` ## Environment Setup + We recommend you install [Conda](https://docs.conda.io/en/latest/) (or similar) to manage your Python versions in your computer. After which, you can create a new environment with the following commands: ```shell @@ -70,8 +71,6 @@ Alternatively, you can install the dependencies manually by running the followin (da_metrics) poetry run pre-commit install --install-hooks -t pre-commit -t commit-msg ``` - - ## Packaging and Dependencies This project uses [Conda](https://anaconda.org/anaconda/python) to manage Python virtual environments and [Poetry](https://python-poetry.org/) as the main packaging and dependency manager. @@ -83,6 +82,7 @@ poetry add ``` ### Dependency Groups + To add a dependency into a specific group, use: ``` @@ -114,33 +114,37 @@ poetry install --only ,,... ``` ### Extra Dependencies + To add an extra dependency, use: + ``` -poetry add --extras +poetry add ``` +> **Important:** You will need to update the pyproject.toml dependencies to indicate it as optional, and to specify it in the `extras` parameter of the same configuration file. For an example refer to the poetry documentation on this topic [here](https://python-poetry.org/docs/pyproject/#extras). + To install the extra dependencies, use: + ``` poetry install --extras ``` -Note that `` is the name of the extra dependencies group or a space separated list of extra dependencies. +Note that `` is the name of the extra dependencies group or a space separated list of extra dependencies. -A list of all dependencies can be found in the [pyproject.toml](pyproject.toml) configuration file. - +A list of all dependencies can be found in the [pyproject.toml](https://github.com/fraunhoferportugal/pymdma/blob/main/pyproject.toml) configuration file. ## Git Hooks We rely on [pre-commit](https://pre-commit.com) hooks to ensure that the code is properly-formatted, clean, and type-safe when it's checked in. The `run install` step described below installs the project pre-commit hooks into your repository. These hooks -are configured in [`.pre-commit-config.yaml`](/.pre-commit-config.yaml). After installing the development requirements +are configured in [`.pre-commit-config.yaml`](https://github.com/fraunhoferportugal/pymdma/blob/main/.pre-commit-config.yaml). After installing the development requirements and cloning the package, run ``` pre-commit install ``` -from the project root to install the hooks locally. Now before every `git commit ...` these hooks will be run to verify +from the project root to install the hooks locally. Now before every `git commit ...` these hooks will be run to verify that the linting and type checking is correct. If there are errors, the commit will fail, and you will see the changes that need to be made. Alternatively, you can run pre-commit @@ -166,11 +170,11 @@ the pre-commit hooks in the currently active environment). ### Markdown -Local links can be written as normal, but external links should be referenced at the bottom of the Markdown file for clarity. +Local links can be written as normal, but external links should be referenced at the bottom of the Markdown file for clarity. For example: ```md -Use a local link to reference the [`README.md`](../README.md) file, but an external link for [Fraunhofer AICOS][fhp-aicos]. +Use a local link to reference the `README.md` file, but an external link for [Fraunhofer AICOS][fhp-aicos]. [fhp-aicos]: https://www.fraunhofer.pt/ ``` @@ -180,7 +184,7 @@ enforced in all cases, for example with long hyperlinks. ## Testing -\[Tests are written using the `pytest` framework\]\[pytest\], with its configuration in the `pyproject.toml` file. +\[Tests are written using the `pytest` framework\][pytest], with its configuration in the `pyproject.toml` file. Note, only tests in `pymdma/tests` folders folder are run. To run the tests, enter the following command in your terminal: @@ -190,7 +194,7 @@ pytest -vvv ### Code coverage -\[Code coverage of Python scripts is measured using the `coverage` Python package\]\[coverage\]; its configuration +\[Code coverage of Python scripts is measured using the `coverage` Python package\][coverage]; its configuration can be found in `pyproject.toml`. To run code coverage, and view it as an HTML report, enter the following command in your terminal: @@ -208,6 +212,7 @@ make coverage-html The HTML report can be accessed at `htmlcov/index.html`. ## Generating Documentation + The documentation is written in Markdown and follows the [Numpy Style Python Docstring](https://numpydoc.readthedocs.io/en/latest/format.html) format. All documentation source files is in the `docs` folder. To build the documentation, run the following commands: ```shell @@ -218,7 +223,8 @@ make mkdocs-serve # serve the documentation locally The documentation will be built in the `docs/_build` folder. The default link to access the documentation is `http://localhost:8000`. ## Docker Encapsulation -We developed a Docker image to encapsulate the REST API server version of the repository, for internal use. The server is built using the [FastAPI](https://fastapi.tiangolo.com/) framework. A list of frozen dependencies can be found in [requirements-prod.txt](requirements/requirements-prod.txt). The image is built from the [Dockerfile](Dockerfile) in the root of the repository. + +We developed a Docker image to encapsulate the REST API server version of the repository, for internal use. The server is built using the [FastAPI](https://fastapi.tiangolo.com/) framework. A list of frozen dependencies can be found in [requirements-prod.txt](https://github.com/fraunhoferportugal/pymdma/blob/main/requirements/requirements-prod.txt). The image is built from the [Dockerfile](https://github.com/fraunhoferportugal/pymdma/blob/main/Dockerfile) in the root of the repository. To build the Docker image, run the following command: @@ -231,8 +237,8 @@ To run the Docker image, run the following command: ```shell docker run -d -p 8080:8000 -v ./data/:/app/data/ pymdma ``` -This will start the server on port `8080` in the host machine and mount the `data` folder in the container. The server documentation can be accessed at `http://localhost:8080/docs`. Dataset files should be placed in the data folder to be accessed by the server. You should follow the current structure of datasets in the data +This will start the server on port `8080` in the host machine and mount the `data` folder in the container. The server documentation can be accessed at `http://localhost:8080/docs`. Dataset files should be placed in the data folder to be accessed by the server. You should follow the current structure of datasets in the data ## Set private environment variables in .envrc file @@ -250,7 +256,6 @@ All variables from .env are loaded in config.py automatically. If you have [direnv](https://direnv.net/docs/installation.html) correctly configured, when entering the directory of this project through the command line interface the conda environment and the virtual environment should be automatically activated. If this does not work, try running `$ direnv allow`, cd out of the directory and then cd into the directory again; the identification of the two activated environments should appear to the left of the terminal (not always the case when using VS Code). - +You can either install this package via pip (if you want access to individual modules) or clone the repository (if you wish to contribute to the project or change the code in any way). Currently, the package supports the following modalities: `image`, `tabular`, and `time_series`. -At this moment only the installation from source is available. +You should install the package in a virtual environment to avoid conflicts with system packages. Please consult the official [documentation](https://docs.python.org/3/library/venv.html) for developing with virtual environments. -You should install the package in a virtual environment to avoid conflicts with other packages. Please consult the official [documentation](https://docs.python.org/3/library/venv.html) for developing with virtual environments. - - - -### 1.1 Installing from source - -If you want to clone the repository, you can do so with the following commands: - -```bash -git clone --recursive https://github.com/fraunhoferportugal/pymdma.git -cd pymdma -``` - -**(Recommended)** Install and activate conda dependencies for python version and package management: +Choose the one(s) that best suits your needs. -```bash -conda env create -f environment.yml -conda activate da_metrics -``` +> **Note:** for a minimal installation without CUDA support, you can install the package without the `cuda` dependencies. This can be done by forcing pip to install torch from the CPU index with the `--find-url=https://download.pytorch.org/whl/cpu/torch_stable.html` command. **You will not have access to the GPU-accelerated features.** -This repository can evaluate three different modalities: `image`, `tabular`, and `time_series`. If you wish to test only one data modality, you can install only the required dependencies. Before running any commands, make sure you have the latest versions of `pip` and `setuptools` installed. +### 1.1 Installing from source -After this, you can install the package with the following command: +You can install the package from source with the following command: ```bash -pip install . # install base from source +pip install "pymdma @ git+https://github.com/fraunhoferportugal/pymdma.git" ``` Depending on the data modality you want to use, you may need to install additional dependencies. The following commands will install the dependencies for each modality: ```bash -pip install .[image] # image dependencies -pip install .[tabular] # tabular dependencies -pip install .[time_series] # time series dependencies +pip install "pymdma[image] @ git+https://github.com/fraunhoferportugal/pymdma.git" # image dependencies +pip install "pymdma[tabular] @ git+https://github.com/fraunhoferportugal/pymdma.git" # tabular dependencies +pip install "pymdma[tabular] @ git+https://github.com/fraunhoferportugal/pymdma.git" # time series dependencies ``` -> **Note:** The previous commands install the components from the base of the repository. If you are in another directory, you should replace `.` with the path to the repository's base. - For a minimal installation, you can install the package without CUDA support by forcing pip to install torch from the CPU index with the `--find-url` command. ## 2. Execution Examples -The package provides a CLI interface for automatically evaluating folder datasets. You can also import the metrics for a specific modality and use them in your code. +The package provides a CLI interface for automatically evaluating folder datasets. You can also import the metrics for a specific modality and use them in your code. Before running any commands make sure the package was correctly installed. -## 2.1. CLI Execution +### 2.1. Importing Modality Metrics -To evaluate a dataset, you can use the CLI interface. The following command will list the available commands: - -```bash -pymdma --help # list available commands -``` - -Following is an example of executing the evaluation of a synthetic dataset with regard to a reference dataset: - -```bash -pymdma --modality image \ - --validation_domain synth \ -    --reference_type dataset \ - --evaluation_level dataset \ -    --reference_data data/test/image/synthesis_val/reference \ - --target_data data/test/image/synthesis_val/dataset \ -    --batch_size 3 \ - --metric_category feature \ -    --output_dir reports/image_metrics/ -``` - -This will evaluate the synthetic dataset in the `data/test/image/synthesis_val/dataset` with regard to the reference dataset in `data/test/image/synthesis_val/reference`. The evaluation will be done at the dataset level and the report will be saved in the `reports/image_metrics/` folder in JSON format. Only feature metrics will be computed for this evaluation. - -## 2.2. Importing Modality Metrics - -You can also import the metrics for a specific modality and use them in your code. The following example shows how to import an image metric and use it to evaluate input images in terms of sharpness. Note that this metric only returns the sharpness value for each image (i.e. the instance level value). The dataset level value is none. +You can import the metrics for a specific modality and use them in your code. The following example shows how to import an image metric and use it to evaluate input images in terms of sharpness. Note that this metric only returns the sharpness value for each image (i.e. the instance- level value). The dataset-level value is none. ```python from pymdma.image.measures.input_val import Tenengrad @@ -163,8 +123,8 @@ Now you can calculate the Improved Precision and Recall of the synthetic dataset ```python from pymdma.image.measures.synthesis_val import ImprovedPrecision, ImprovedRecall -ip = ImprovedPrecision() # Improved Precision metric -ir = ImprovedRecall() # Improved Recall metric +ip = ImprovedPrecision() # Improved Precision metric +ir = ImprovedRecall() # Improved Recall metric # Compute the metrics ip_result = ip.compute(ref_features, synth_features) @@ -181,6 +141,30 @@ print(f"Precision: {precision_instance} | Recall: {recall_instance}") You can find more examples of execution in the [notebooks](notebooks) folder. +### 2.2. CLI Execution + +To evaluate a dataset, you can use the CLI interface. The following command will list the available commands: + +```bash +pymdma --help # list available commands +``` + +Following is an example of executing the evaluation of a synthetic dataset with regard to a reference dataset: + +```bash +pymdma --modality image \ + --validation_domain synth \ + --reference_type dataset \ + --evaluation_level dataset \ + --reference_data data/test/image/synthesis_val/reference \ + --target_data data/test/image/synthesis_val/dataset \ + --batch_size 3 \ + --metric_category feature \ + --output_dir reports/image_metrics/ +``` + +This will evaluate the synthetic dataset in the `data/test/image/synthesis_val/dataset` with regard to the reference dataset in `data/test/image/synthesis_val/reference`. The evaluation will be done at the dataset level and the report will be saved in the `reports/image_metrics/` folder in JSON format. Only feature metrics will be computed for this evaluation. + ## Documentation Full documentation is available here: [`docs/`](docs). @@ -225,6 +209,7 @@ If you publish work that uses pyMDMA, please cite pyMDMA as follows: ``` ## Acknowledgments -This work was funded by AISym4Med project number 101095387, supported by the European Health and Digital Executive Agency (HADEA), granting authority under the powers delegated by the European Commision. More information on this project can be found [here](https://aisym4med.eu/). + +This work was funded by AISym4Med project number 101095387, supported by the European Health and Digital Executive Agency (HADEA), granting authority under the powers delegated by the European Commission. More information on this project can be found [here](https://aisym4med.eu/). This work was supported by European funds through the Recovery and Resilience Plan, project ”Center for Responsible AI”, project number C645008882-00000055. Learn more about this project [here](https://centerforresponsible.ai/). diff --git a/VERSION b/VERSION index 446ba66..9faa1b7 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.1.4 \ No newline at end of file +0.1.5 diff --git a/data/test/time_series/synthesis_val/dataset/g10/HR09000.hea b/data/test/time_series/synthesis_val/dataset/g10/HR09000.hea deleted file mode 100755 index 0ec4512..0000000 --- a/data/test/time_series/synthesis_val/dataset/g10/HR09000.hea +++ /dev/null @@ -1,19 +0,0 @@ -HR09000 12 500 5000 -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -40 3961 0 I -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 285 -31929 0 II -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 325 29643 0 III -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -123 14145 0 aVR -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -183 20017 0 aVL -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 305 31742 0 aVF -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 75 30470 0 V1 -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -280 856 0 V2 -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -535 6824 0 V3 -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -45 3984 0 V4 -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -245 18812 0 V5 -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -265 24315 0 V6 -# Age: 72 -# Sex: Female -# Dx: 164934002,284470004,426783006,427084000 -# Rx: Unknown -# Hx: Unknown -# Sx: Unknown diff --git a/data/test/time_series/synthesis_val/dataset/g10/HR09000.mat b/data/test/time_series/synthesis_val/dataset/g10/HR09000.mat deleted file mode 100755 index 84b33e6..0000000 Binary files a/data/test/time_series/synthesis_val/dataset/g10/HR09000.mat and /dev/null differ diff --git a/data/test/time_series/synthesis_val/reference/g1/HR00001.hea b/data/test/time_series/synthesis_val/reference/g1/HR00001.hea deleted file mode 100755 index 759b7ba..0000000 --- a/data/test/time_series/synthesis_val/reference/g1/HR00001.hea +++ /dev/null @@ -1,19 +0,0 @@ -HR00001 12 500 5000 -HR00001.mat 16x1+24 1000.0(0)/mv 16 0 -115 13047 0 I -HR00001.mat 16x1+24 1000.0(0)/mv 16 0 -50 11561 0 II -HR00001.mat 16x1+24 1000.0(0)/mv 16 0 65 -1486 0 III -HR00001.mat 16x1+24 1000.0(0)/mv 16 0 82 -12346 0 aVR -HR00001.mat 16x1+24 1000.0(0)/mv 16 0 -90 7539 0 aVL -HR00001.mat 16x1+24 1000.0(0)/mv 16 0 7 5145 0 aVF -HR00001.mat 16x1+24 1000.0(0)/mv 16 0 -65 -5719 0 V1 -HR00001.mat 16x1+24 1000.0(0)/mv 16 0 -40 -21509 0 V2 -HR00001.mat 16x1+24 1000.0(0)/mv 16 0 -5 -1304 0 V3 -HR00001.mat 16x1+24 1000.0(0)/mv 16 0 -35 -15227 0 V4 -HR00001.mat 16x1+24 1000.0(0)/mv 16 0 -35 4821 0 V5 -HR00001.mat 16x1+24 1000.0(0)/mv 16 0 -75 12159 0 V6 -# Age: 56 -# Sex: Female -# Dx: 251146004,426783006 -# Rx: Unknown -# Hx: Unknown -# Sx: Unknown diff --git a/data/test/time_series/synthesis_val/reference/g1/HR00001.mat b/data/test/time_series/synthesis_val/reference/g1/HR00001.mat deleted file mode 100755 index 9886e45..0000000 Binary files a/data/test/time_series/synthesis_val/reference/g1/HR00001.mat and /dev/null differ diff --git a/data/test/time_series/synthesis_val/reference/g1/HR00002.hea b/data/test/time_series/synthesis_val/reference/g1/HR00002.hea deleted file mode 100755 index 675e51f..0000000 --- a/data/test/time_series/synthesis_val/reference/g1/HR00002.hea +++ /dev/null @@ -1,19 +0,0 @@ -HR00002 12 500 5000 -HR00002.mat 16x1+24 1000.0(0)/mv 16 0 -15 -17863 0 I -HR00002.mat 16x1+24 1000.0(0)/mv 16 0 120 -17160 0 II -HR00002.mat 16x1+24 1000.0(0)/mv 16 0 135 734 0 III -HR00002.mat 16x1+24 1000.0(0)/mv 16 0 -53 17529 0 aVR -HR00002.mat 16x1+24 1000.0(0)/mv 16 0 -75 -9284 0 aVL -HR00002.mat 16x1+24 1000.0(0)/mv 16 0 127 -8311 0 aVF -HR00002.mat 16x1+24 1000.0(0)/mv 16 0 -105 22419 0 V1 -HR00002.mat 16x1+24 1000.0(0)/mv 16 0 70 -2478 0 V2 -HR00002.mat 16x1+24 1000.0(0)/mv 16 0 115 -5164 0 V3 -HR00002.mat 16x1+24 1000.0(0)/mv 16 0 160 4753 0 V4 -HR00002.mat 16x1+24 1000.0(0)/mv 16 0 60 15615 0 V5 -HR00002.mat 16x1+24 1000.0(0)/mv 16 0 65 1959 0 V6 -# Age: 19 -# Sex: Male -# Dx: 426177001,426783006 -# Rx: Unknown -# Hx: Unknown -# Sx: Unknown diff --git a/data/test/time_series/synthesis_val/reference/g1/HR00002.mat b/data/test/time_series/synthesis_val/reference/g1/HR00002.mat deleted file mode 100755 index 456cd4e..0000000 Binary files a/data/test/time_series/synthesis_val/reference/g1/HR00002.mat and /dev/null differ diff --git a/data/test/time_series/synthesis_val/reference/g1/HR00003.hea b/data/test/time_series/synthesis_val/reference/g1/HR00003.hea deleted file mode 100755 index 2ae20c5..0000000 --- a/data/test/time_series/synthesis_val/reference/g1/HR00003.hea +++ /dev/null @@ -1,19 +0,0 @@ -HR00003 12 500 5000 -HR00003.mat 16x1+24 1000.0(0)/mv 16 0 -35 -13900 0 I -HR00003.mat 16x1+24 1000.0(0)/mv 16 0 -70 28343 0 II -HR00003.mat 16x1+24 1000.0(0)/mv 16 0 -35 -23318 0 III -HR00003.mat 16x1+24 1000.0(0)/mv 16 0 53 -7439 0 aVR -HR00003.mat 16x1+24 1000.0(0)/mv 16 0 0 -28050 0 aVL -HR00003.mat 16x1+24 1000.0(0)/mv 16 0 -52 -30366 0 aVF -HR00003.mat 16x1+24 1000.0(0)/mv 16 0 10 -1493 0 V1 -HR00003.mat 16x1+24 1000.0(0)/mv 16 0 -50 2635 0 V2 -HR00003.mat 16x1+24 1000.0(0)/mv 16 0 -115 -15551 0 V3 -HR00003.mat 16x1+24 1000.0(0)/mv 16 0 -100 1491 0 V4 -HR00003.mat 16x1+24 1000.0(0)/mv 16 0 -75 25979 0 V5 -HR00003.mat 16x1+24 1000.0(0)/mv 16 0 -65 21763 0 V6 -# Age: 37 -# Sex: Female -# Dx: 426783006 -# Rx: Unknown -# Hx: Unknown -# Sx: Unknown diff --git a/data/test/time_series/synthesis_val/reference/g1/HR00003.mat b/data/test/time_series/synthesis_val/reference/g1/HR00003.mat deleted file mode 100755 index 8d6273d..0000000 Binary files a/data/test/time_series/synthesis_val/reference/g1/HR00003.mat and /dev/null differ diff --git a/data/test/time_series/synthesis_val/reference/g1/HR00004.hea b/data/test/time_series/synthesis_val/reference/g1/HR00004.hea deleted file mode 100755 index 9d363a5..0000000 --- a/data/test/time_series/synthesis_val/reference/g1/HR00004.hea +++ /dev/null @@ -1,19 +0,0 @@ -HR00004 12 500 5000 -HR00004.mat 16x1+24 1000.0(0)/mv 16 0 -55 -5312 0 I -HR00004.mat 16x1+24 1000.0(0)/mv 16 0 -155 19933 0 II -HR00004.mat 16x1+24 1000.0(0)/mv 16 0 -100 25209 0 III -HR00004.mat 16x1+24 1000.0(0)/mv 16 0 105 25396 0 aVR -HR00004.mat 16x1+24 1000.0(0)/mv 16 0 23 17493 0 aVL -HR00004.mat 16x1+24 1000.0(0)/mv 16 0 -127 22471 0 aVF -HR00004.mat 16x1+24 1000.0(0)/mv 16 0 25 -18092 0 V1 -HR00004.mat 16x1+24 1000.0(0)/mv 16 0 -40 5914 0 V2 -HR00004.mat 16x1+24 1000.0(0)/mv 16 0 -45 22646 0 V3 -HR00004.mat 16x1+24 1000.0(0)/mv 16 0 -305 -17099 0 V4 -HR00004.mat 16x1+24 1000.0(0)/mv 16 0 -185 3660 0 V5 -HR00004.mat 16x1+24 1000.0(0)/mv 16 0 -175 -11623 0 V6 -# Age: 24 -# Sex: Male -# Dx: 426783006 -# Rx: Unknown -# Hx: Unknown -# Sx: Unknown diff --git a/data/test/time_series/synthesis_val/reference/g1/HR00004.mat b/data/test/time_series/synthesis_val/reference/g1/HR00004.mat deleted file mode 100755 index 3841b5f..0000000 Binary files a/data/test/time_series/synthesis_val/reference/g1/HR00004.mat and /dev/null differ diff --git a/data/test/time_series/synthesis_val/reference/g10/HR09000.hea b/data/test/time_series/synthesis_val/reference/g10/HR09000.hea deleted file mode 100755 index 0ec4512..0000000 --- a/data/test/time_series/synthesis_val/reference/g10/HR09000.hea +++ /dev/null @@ -1,19 +0,0 @@ -HR09000 12 500 5000 -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -40 3961 0 I -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 285 -31929 0 II -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 325 29643 0 III -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -123 14145 0 aVR -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -183 20017 0 aVL -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 305 31742 0 aVF -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 75 30470 0 V1 -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -280 856 0 V2 -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -535 6824 0 V3 -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -45 3984 0 V4 -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -245 18812 0 V5 -HR09000.mat 16x1+24 1000.0(0)/mv 16 0 -265 24315 0 V6 -# Age: 72 -# Sex: Female -# Dx: 164934002,284470004,426783006,427084000 -# Rx: Unknown -# Hx: Unknown -# Sx: Unknown diff --git a/data/test/time_series/synthesis_val/reference/g10/HR09000.mat b/data/test/time_series/synthesis_val/reference/g10/HR09000.mat deleted file mode 100755 index 84b33e6..0000000 Binary files a/data/test/time_series/synthesis_val/reference/g10/HR09000.mat and /dev/null differ diff --git a/data/test/time_series/synthesis_val/reference/g10/HR09001.hea b/data/test/time_series/synthesis_val/reference/g10/HR09001.hea deleted file mode 100755 index 3b34275..0000000 --- a/data/test/time_series/synthesis_val/reference/g10/HR09001.hea +++ /dev/null @@ -1,19 +0,0 @@ -HR09001 12 500 5000 -HR09001.mat 16x1+24 1000.0(0)/mv 16 0 -245 -13599 0 I -HR09001.mat 16x1+24 1000.0(0)/mv 16 0 -110 7041 0 II -HR09001.mat 16x1+24 1000.0(0)/mv 16 0 135 20624 0 III -HR09001.mat 16x1+24 1000.0(0)/mv 16 0 177 3072 0 aVR -HR09001.mat 16x1+24 1000.0(0)/mv 16 0 -190 -16654 0 aVL -HR09001.mat 16x1+24 1000.0(0)/mv 16 0 12 13696 0 aVF -HR09001.mat 16x1+24 1000.0(0)/mv 16 0 55 23475 0 V1 -HR09001.mat 16x1+24 1000.0(0)/mv 16 0 120 19284 0 V2 -HR09001.mat 16x1+24 1000.0(0)/mv 16 0 -40 -23886 0 V3 -HR09001.mat 16x1+24 1000.0(0)/mv 16 0 110 18254 0 V4 -HR09001.mat 16x1+24 1000.0(0)/mv 16 0 20 16013 0 V5 -HR09001.mat 16x1+24 1000.0(0)/mv 16 0 90 18172 0 V6 -# Age: 62 -# Sex: Female -# Dx: 426783006 -# Rx: Unknown -# Hx: Unknown -# Sx: Unknown diff --git a/data/test/time_series/synthesis_val/reference/g10/HR09001.mat b/data/test/time_series/synthesis_val/reference/g10/HR09001.mat deleted file mode 100755 index 1042e21..0000000 Binary files a/data/test/time_series/synthesis_val/reference/g10/HR09001.mat and /dev/null differ diff --git a/data/test/time_series/synthesis_val/reference/g10/HR09002.hea b/data/test/time_series/synthesis_val/reference/g10/HR09002.hea deleted file mode 100755 index a260dc1..0000000 --- a/data/test/time_series/synthesis_val/reference/g10/HR09002.hea +++ /dev/null @@ -1,19 +0,0 @@ -HR09002 12 500 5000 -HR09002.mat 16x1+24 1000.0(0)/mv 16 0 -260 -27919 0 I -HR09002.mat 16x1+24 1000.0(0)/mv 16 0 -170 6686 0 II -HR09002.mat 16x1+24 1000.0(0)/mv 16 0 90 -30919 0 III -HR09002.mat 16x1+24 1000.0(0)/mv 16 0 215 10209 0 aVR -HR09002.mat 16x1+24 1000.0(0)/mv 16 0 -175 -30950 0 aVL -HR09002.mat 16x1+24 1000.0(0)/mv 16 0 -40 20736 0 aVF -HR09002.mat 16x1+24 1000.0(0)/mv 16 0 145 13060 0 V1 -HR09002.mat 16x1+24 1000.0(0)/mv 16 0 -55 11636 0 V2 -HR09002.mat 16x1+24 1000.0(0)/mv 16 0 -80 11654 0 V3 -HR09002.mat 16x1+24 1000.0(0)/mv 16 0 -45 28058 0 V4 -HR09002.mat 16x1+24 1000.0(0)/mv 16 0 -25 -5071 0 V5 -HR09002.mat 16x1+24 1000.0(0)/mv 16 0 -75 23897 0 V6 -# Age: 49 -# Sex: Female -# Dx: 426783006 -# Rx: Unknown -# Hx: Unknown -# Sx: Unknown diff --git a/data/test/time_series/synthesis_val/reference/g10/HR09002.mat b/data/test/time_series/synthesis_val/reference/g10/HR09002.mat deleted file mode 100755 index 83264fe..0000000 Binary files a/data/test/time_series/synthesis_val/reference/g10/HR09002.mat and /dev/null differ diff --git a/data/test/time_series/synthesis_val/reference/g10/HR09003.hea b/data/test/time_series/synthesis_val/reference/g10/HR09003.hea deleted file mode 100755 index 637271b..0000000 --- a/data/test/time_series/synthesis_val/reference/g10/HR09003.hea +++ /dev/null @@ -1,19 +0,0 @@ -HR09003 12 500 5000 -HR09003.mat 16x1+24 1000.0(0)/mv 16 0 155 -1138 0 I -HR09003.mat 16x1+24 1000.0(0)/mv 16 0 265 -8069 0 II -HR09003.mat 16x1+24 1000.0(0)/mv 16 0 110 -6877 0 III -HR09003.mat 16x1+24 1000.0(0)/mv 16 0 -210 4735 0 aVR -HR09003.mat 16x1+24 1000.0(0)/mv 16 0 22 3057 0 aVL -HR09003.mat 16x1+24 1000.0(0)/mv 16 0 187 -7275 0 aVF -HR09003.mat 16x1+24 1000.0(0)/mv 16 0 -210 -1002 0 V1 -HR09003.mat 16x1+24 1000.0(0)/mv 16 0 -265 -4324 0 V2 -HR09003.mat 16x1+24 1000.0(0)/mv 16 0 235 -1098 0 V3 -HR09003.mat 16x1+24 1000.0(0)/mv 16 0 835 4133 0 V4 -HR09003.mat 16x1+24 1000.0(0)/mv 16 0 510 -1304 0 V5 -HR09003.mat 16x1+24 1000.0(0)/mv 16 0 325 -14175 0 V6 -# Age: 68 -# Sex: Male -# Dx: 111975006,164884008,270492004 -# Rx: Unknown -# Hx: Unknown -# Sx: Unknown diff --git a/data/test/time_series/synthesis_val/reference/g10/HR09003.mat b/data/test/time_series/synthesis_val/reference/g10/HR09003.mat deleted file mode 100755 index 3784cab..0000000 Binary files a/data/test/time_series/synthesis_val/reference/g10/HR09003.mat and /dev/null differ diff --git a/data/test/time_series/synthesis_val/reference/g10/HR09004.hea b/data/test/time_series/synthesis_val/reference/g10/HR09004.hea deleted file mode 100755 index 835e793..0000000 --- a/data/test/time_series/synthesis_val/reference/g10/HR09004.hea +++ /dev/null @@ -1,19 +0,0 @@ -HR09004 12 500 5000 -HR09004.mat 16x1+24 1000.0(0)/mv 16 0 100 -15360 0 I -HR09004.mat 16x1+24 1000.0(0)/mv 16 0 -490 3876 0 II -HR09004.mat 16x1+24 1000.0(0)/mv 16 0 -590 19228 0 III -HR09004.mat 16x1+24 1000.0(0)/mv 16 0 195 5232 0 aVR -HR09004.mat 16x1+24 1000.0(0)/mv 16 0 345 -17428 0 aVL -HR09004.mat 16x1+24 1000.0(0)/mv 16 0 -540 11278 0 aVF -HR09004.mat 16x1+24 1000.0(0)/mv 16 0 -155 4836 0 V1 -HR09004.mat 16x1+24 1000.0(0)/mv 16 0 -100 -4126 0 V2 -HR09004.mat 16x1+24 1000.0(0)/mv 16 0 -140 10475 0 V3 -HR09004.mat 16x1+24 1000.0(0)/mv 16 0 -165 2657 0 V4 -HR09004.mat 16x1+24 1000.0(0)/mv 16 0 -145 -15225 0 V5 -HR09004.mat 16x1+24 1000.0(0)/mv 16 0 -250 2304 0 V6 -# Age: 86 -# Sex: Female -# Dx: 164865005,445118002 -# Rx: Unknown -# Hx: Unknown -# Sx: Unknown diff --git a/data/test/time_series/synthesis_val/reference/g10/HR09004.mat b/data/test/time_series/synthesis_val/reference/g10/HR09004.mat deleted file mode 100755 index 9f3a3ad..0000000 Binary files a/data/test/time_series/synthesis_val/reference/g10/HR09004.mat and /dev/null differ diff --git a/data/test/time_series/synthesis_val/reference/g11/HR10000.hea b/data/test/time_series/synthesis_val/reference/g11/HR10000.hea deleted file mode 100755 index 9c0f102..0000000 --- a/data/test/time_series/synthesis_val/reference/g11/HR10000.hea +++ /dev/null @@ -1,19 +0,0 @@ -HR10000 12 500 5000 -HR10000.mat 16x1+24 1000.0(0)/mv 16 0 20 31646 0 I -HR10000.mat 16x1+24 1000.0(0)/mv 16 0 -35 16416 0 II -HR10000.mat 16x1+24 1000.0(0)/mv 16 0 -55 -15220 0 III -HR10000.mat 16x1+24 1000.0(0)/mv 16 0 8 -24064 0 aVR -HR10000.mat 16x1+24 1000.0(0)/mv 16 0 38 23562 0 aVL -HR10000.mat 16x1+24 1000.0(0)/mv 16 0 -45 779 0 aVF -HR10000.mat 16x1+24 1000.0(0)/mv 16 0 0 8881 0 V1 -HR10000.mat 16x1+24 1000.0(0)/mv 16 0 -30 26340 0 V2 -HR10000.mat 16x1+24 1000.0(0)/mv 16 0 -40 -7604 0 V3 -HR10000.mat 16x1+24 1000.0(0)/mv 16 0 -70 21911 0 V4 -HR10000.mat 16x1+24 1000.0(0)/mv 16 0 5 21378 0 V5 -HR10000.mat 16x1+24 1000.0(0)/mv 16 0 145 -3865 0 V6 -# Age: 45 -# Sex: Female -# Dx: 426783006 -# Rx: Unknown -# Hx: Unknown -# Sx: Unknown diff --git a/data/test/time_series/synthesis_val/reference/g11/HR10000.mat b/data/test/time_series/synthesis_val/reference/g11/HR10000.mat deleted file mode 100755 index 8ca2097..0000000 Binary files a/data/test/time_series/synthesis_val/reference/g11/HR10000.mat and /dev/null differ diff --git a/data/test/time_series/synthesis_val/reference/g12/HR11000.hea b/data/test/time_series/synthesis_val/reference/g12/HR11000.hea deleted file mode 100755 index 3c4dda8..0000000 --- a/data/test/time_series/synthesis_val/reference/g12/HR11000.hea +++ /dev/null @@ -1,19 +0,0 @@ -HR11000 12 500 5000 -HR11000.mat 16x1+24 1000.0(0)/mv 16 0 -30 -27985 0 I -HR11000.mat 16x1+24 1000.0(0)/mv 16 0 -60 -16989 0 II -HR11000.mat 16x1+24 1000.0(0)/mv 16 0 -30 10993 0 III -HR11000.mat 16x1+24 1000.0(0)/mv 16 0 45 22809 0 aVR -HR11000.mat 16x1+24 1000.0(0)/mv 16 0 0 -19023 0 aVL -HR11000.mat 16x1+24 1000.0(0)/mv 16 0 -45 -2421 0 aVF -HR11000.mat 16x1+24 1000.0(0)/mv 16 0 -5 -15109 0 V1 -HR11000.mat 16x1+24 1000.0(0)/mv 16 0 20 12014 0 V2 -HR11000.mat 16x1+24 1000.0(0)/mv 16 0 -125 -21530 0 V3 -HR11000.mat 16x1+24 1000.0(0)/mv 16 0 -240 -1286 0 V4 -HR11000.mat 16x1+24 1000.0(0)/mv 16 0 -135 14231 0 V5 -HR11000.mat 16x1+24 1000.0(0)/mv 16 0 -125 29062 0 V6 -# Age: 35 -# Sex: Male -# Dx: 426783006 -# Rx: Unknown -# Hx: Unknown -# Sx: Unknown diff --git a/data/test/time_series/synthesis_val/reference/g12/HR11000.mat b/data/test/time_series/synthesis_val/reference/g12/HR11000.mat deleted file mode 100755 index b507f54..0000000 Binary files a/data/test/time_series/synthesis_val/reference/g12/HR11000.mat and /dev/null differ diff --git a/data/test/time_series/synthesis_val/reference/g13/HR12000.hea b/data/test/time_series/synthesis_val/reference/g13/HR12000.hea deleted file mode 100755 index 7b9a0d9..0000000 --- a/data/test/time_series/synthesis_val/reference/g13/HR12000.hea +++ /dev/null @@ -1,19 +0,0 @@ -HR12000 12 500 5000 -HR12000.mat 16x1+24 1000.0(0)/mv 16 0 705 30373 0 I -HR12000.mat 16x1+24 1000.0(0)/mv 16 0 485 -7795 0 II -HR12000.mat 16x1+24 1000.0(0)/mv 16 0 -220 27374 0 III -HR12000.mat 16x1+24 1000.0(0)/mv 16 0 -595 21176 0 aVR -HR12000.mat 16x1+24 1000.0(0)/mv 16 0 462 -31042 0 aVL -HR12000.mat 16x1+24 1000.0(0)/mv 16 0 132 9821 0 aVF -HR12000.mat 16x1+24 1000.0(0)/mv 16 0 -435 31835 0 V1 -HR12000.mat 16x1+24 1000.0(0)/mv 16 0 490 24428 0 V2 -HR12000.mat 16x1+24 1000.0(0)/mv 16 0 40 -4286 0 V3 -HR12000.mat 16x1+24 1000.0(0)/mv 16 0 -260 -18112 0 V4 -HR12000.mat 16x1+24 1000.0(0)/mv 16 0 -370 -8573 0 V5 -HR12000.mat 16x1+24 1000.0(0)/mv 16 0 -115 -10681 0 V6 -# Age: 87 -# Sex: Female -# Dx: 10370003 -# Rx: Unknown -# Hx: Unknown -# Sx: Unknown diff --git a/data/test/time_series/synthesis_val/reference/g13/HR12000.mat b/data/test/time_series/synthesis_val/reference/g13/HR12000.mat deleted file mode 100755 index d88ba84..0000000 Binary files a/data/test/time_series/synthesis_val/reference/g13/HR12000.mat and /dev/null differ diff --git a/docs/image/input_val.md b/docs/image/input_val.md index d36e115..35d4911 100644 --- a/docs/image/input_val.md +++ b/docs/image/input_val.md @@ -2,6 +2,7 @@ ## Data-based ### Quality (No-reference) +::: pymdma.image.measures.input_val.DOM ::: pymdma.image.measures.input_val.Tenengrad ::: pymdma.image.measures.input_val.TenengradRelative ::: pymdma.image.measures.input_val.EME diff --git a/docs/installation.md b/docs/installation.md index 5521a12..7381343 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -1,21 +1,21 @@ # Installation -PyPI is currently unavailable. To install the package, you can install it directly from the git repository. To do so, run the following command: +To install the package, you can run the following command: -```bash -$ pip install "pymdma @ git+https://github.com/fraunhoferportugal/pymdma.git" +```shell +$ pip install pymdma ``` Depending on the data modality you are working with, you may need to install additional dependencies. We have three groups of denpendencies: `image`, `tabular` and `time_series`. As an example, to work with image data, you will need to run the following command: -```bash -$ pip install "pymdma[image] @ git+https://github.com/fraunhoferportugal/pymdma.git" +```shell +$ pip install pymdma[image] ``` -You can also install multiple modalities by passing the desired modalities as a comma-separated list. For example, to install both image and tabular modalities, you can run the following command: +Alternatively, you can install multiple modalities by passing the desired modalities as a comma-separated list. For example, to install both image and tabular modalities, you can run the following command: -```bash -$ pip install "pymdma[image,tabular] @ git+https://github.com/fraunhoferportugal/pymdma.git" +```shell +$ pip install pymdma[image,tabular] ``` ## Minimal Version (CPU) @@ -23,5 +23,5 @@ $ pip install "pymdma[image,tabular] @ git+https://github.com/fraunhoferportugal For a minimal installation (without GPU support), you can install the package with CPU version of torch, which will skip the installation of CUDA dependencies. To do so, run the following command: ```bash -$ pip install pymdma[...] @ git+https://github.com/fraunhoferportugal/pymdma.git --find-url=https://download.pytorch.org/whl/cpu/torch_stable.html +$ pip install pymdma[...] --find-url=https://download.pytorch.org/whl/cpu/torch_stable.html ``` diff --git a/docs/resources/pymdma_schema_1.png b/docs/resources/pymdma_schema_1.png index e6ffbe5..6f7f355 100644 Binary files a/docs/resources/pymdma_schema_1.png and b/docs/resources/pymdma_schema_1.png differ diff --git a/docs/time_series/synth_val.md b/docs/time_series/synth_val.md index 5c8c481..392e227 100644 --- a/docs/time_series/synth_val.md +++ b/docs/time_series/synth_val.md @@ -20,3 +20,9 @@ These metrics require a preprocessing of the data, to extract the features that ### Privacy ::: pymdma.time_series.measures.synthesis_val.Authenticity + +## Data-based +### Quality +::: pymdma.time_series.measures.synthesis_val.DTW +::: pymdma.time_series.measures.synthesis_val.CrossCorrelation + diff --git a/notebooks/time_series_examples.ipynb b/notebooks/time_series_examples.ipynb new file mode 100644 index 0000000..fef7d95 --- /dev/null +++ b/notebooks/time_series_examples.ipynb @@ -0,0 +1,352 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install \"pymdma[time_series] @ https://github.com/fraunhoferportugal/pymdma.git\" --find-links \"https://download.pytorch.org/whl/cpu/torch_stable.html\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load data that simulates both real and synthetic samples for metric computation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "from pymdma.time_series.input_layer import TimeSeriesInputLayer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "parent_dir = os.path.dirname(os.getcwd())\n", + "\n", + "validation_domain = \"synthesis_val\"\n", + "reference_type = \"dataset\"\n", + "target_data_path = Path(parent_dir + \"/data/test/time_series/synthesis_val/dataset/\")\n", + "reference_data_path = Path(parent_dir + \"/data/test/time_series/synthesis_val/reference/\")\n", + "batch_size = 5\n", + "\n", + "ts_input_layer = TimeSeriesInputLayer(\n", + " validation_domain == validation_domain,\n", + " reference_type=reference_type,\n", + " target_data=target_data_path,\n", + " reference_data=reference_data_path,\n", + " batch_size=batch_size,\n", + ")\n", + "\n", + "\n", + "# Get raw data for input validation\n", + "ref_data, target_data = ts_input_layer.get_full_samples()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " Explore data shapes and plot Lead I of a real ECG tracing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Acess shape\n", + "shape_ref = ref_data.shape\n", + "shape_target = target_data.shape\n", + "\n", + "print(\n", + " f\"Reference/Real data Shape: {shape_ref} | {shape_ref[0]} ECG tracings, each {shape_ref[1]} samples long with {shape_ref[2]} channels\"\n", + ")\n", + "print(\n", + " f\"Target/Synthetic data Shape: {shape_target} | {shape_target[0]} tracings, each {shape_target[1]} samples long with {shape_target[2]} channels\"\n", + ")\n", + "\n", + "# Plot Lead I of a Real ECG Signal\n", + "plt.plot(ref_data[0, :, 0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_instances_score(signals: list[np.ndarray], metric: str, scores: list[float], n_cols: int = 5):\n", + " n_rows = len(signals) // n_cols\n", + " fig, axs = plt.subplots(n_rows, n_cols, figsize=(n_cols * 4, n_rows * 3))\n", + " for ax, signal, score in zip(axs.flat, signals, scores):\n", + " ax.plot(signal[:, 0]) # ploting only Lead I of the ECG signal\n", + " ax.set_title(f\"{metric}: {score:.2f}\")\n", + " ax.axis(\"off\")\n", + " ax.set_aspect(\"auto\")\n", + " # Add a title to the entire figure\n", + " fig.suptitle(\"ECG Signals with SNR Annotation (All Leads Considered, Lead I Shown)\", fontsize=16)\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Input Validation\n", + "\n", + "\n", + "In the time series modality, the `pymdma` package offers one type of input validation, **no-reference**, where the signal is validated independently, without requiring a reference signal.\n", + "\n", + "This section demonstrates how to use the input validation functions with the signal-to-noise ratio (`SNR`) as an example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pymdma.time_series.measures.input_val.data.quality import SNR\n", + "\n", + "snr = SNR()\n", + "snr_result = snr.compute(ref_data) # compute the metric\n", + "_dataset_level, instance_level = snr_result.value # fetch the instance level results\n", + "\n", + "\n", + "plot_instances_score(ref_data, \"SNR\", instance_level, n_cols=5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ploting the metric results\n", + "\n", + "We provide a simple method in the `MetricResult` class to easily plot the results of the metrics. The method `plot()` will plot the results of the metrics in the format specified by the `plot_params` attribute in the `MetricResult` class. The `plot_params` attribute is a dictionary that contains the parameters to be used in the plot. If this attribute is not set, the method will default to a bar plot.\n", + "\n", + "You can provide a title for the plot when calling this method, as well as an axis is which you wish to plot the results (helpfull when plotting multiple metrics in the same plot). In addition, you can provide a set of `plot_params` to be used directly by matplotlib's plotting functions.\n", + "\n", + "> **Note**: You also have access to the values of the metrics via the `values` attribute in the `MetricResult` class. You can use these values to plot the results using your own plotting functions.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "snr_result.plot(\"Signal to Noise Ratio\") # plot the results from the result object\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Synthetic Validation\n", + "\n", + "The automatic evaluation of synthetically generated signals is a common practice in the field of generative AI, and is crucial for the assessment of the quality of large synthetic datasets. This is usually done by comparing the synthetic signals to a set of reference signals by considering the similarity between the distributions of the two sets. In this section, we will demonstrate how to use the `pymdma` package to evaluate the quality of synthetic signals." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get features for synthetic data quality metrics computation\n", + "ref_features, target_features = ts_input_layer.get_embeddings(\"tsfel\")\n", + "\n", + "print(\"Reference features shape:\", ref_features.shape)\n", + "print(\"Synthetic features shape:\", target_features.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Feature Space Visualization: UMAP Analysis of Real vs Synthetic Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from umap import UMAP\n", + "\n", + "umap = UMAP(n_neighbors=3, n_components=2, random_state=10, n_jobs=1)\n", + "real_feats_2d = umap.fit_transform(ref_features)\n", + "fake_feats_2d = umap.transform(target_features)\n", + "\n", + "plt.figure(figsize=(5, 5))\n", + "plt.scatter(real_feats_2d[:, 0], real_feats_2d[:, 1], s=20, label=\"Real Samples\")\n", + "plt.scatter(fake_feats_2d[:, 0], fake_feats_2d[:, 1], s=20, label=\"Fake Samples\")\n", + "plt.title(\"UMAP Features Visualization | Real vs Synthetic\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Compute Improved Precision and Improved Recall (Dataset-level and Instance-level)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pymdma.time_series.measures.synthesis_val import ImprovedPrecision, ImprovedRecall\n", + "\n", + "ip = ImprovedPrecision(k=2)\n", + "ir = ImprovedRecall(k=2)\n", + "\n", + "ip_result = ip.compute(ref_features, target_features)\n", + "ir_result = ir.compute(ref_features, target_features)\n", + "\n", + "precision_dataset, precision_instance = ip_result.value\n", + "recall_dataset, recall_instance = ir_result.value\n", + "\n", + "print(f\"Dataset-level Precision: {precision_dataset:.2f} | Dataset-level Recall: {recall_dataset:.2f}\")\n", + "print(f\"Instance-level Precision: {precision_instance[:20]} | Instance-level Recall: {recall_instance[:20]}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Plot Precise and Imprecise samples according to Improved Precision" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_instances_grid(signals: list[np.ndarray], n_cols: int = 25):\n", + " n_rows = len(signals) // n_cols\n", + " fig, axs = plt.subplots(n_rows, n_cols, figsize=(n_cols * 3, n_rows * 3))\n", + " fig.subplots_adjust(hspace=0, wspace=0)\n", + " for ax, signal in zip(axs.flat, signals):\n", + " ax.plot(signal[:, 0]) # ploting only Lead I\n", + " ax.get_xaxis().set_ticks([])\n", + " ax.get_yaxis().set_ticks([])\n", + " ax.axis(\"off\")\n", + " ax.set_aspect(\"auto\")\n", + " return fig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "precision_instance = np.array(precision_instance)\n", + "imprecise_idx = np.argwhere(precision_instance < 1).flatten()\n", + "precise_idx = np.argwhere(precision_instance >= 1).flatten()\n", + "\n", + "precise_samples = [target_data[i] for i in precise_idx]\n", + "imprecise_samples = [target_data[i] for i in imprecise_idx]\n", + "\n", + "precise_fig = plot_instances_grid(precise_samples, n_cols=5)\n", + "precise_fig.suptitle(\"Lead I of Precise Signals (All Leads Considered)\", fontsize=15)\n", + "plt.show()\n", + "\n", + "imprecise_fig = plot_instances_grid(imprecise_samples, n_cols=5)\n", + "imprecise_fig.suptitle(\"Lead I of Imprecise Signals (All Leads Considered)\", fontsize=15)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Synthetic Valitation using Distance Metrics\n", + "\n", + "In distance metrics such as Frechet Distance, Wasserstein Distance, and Maximum Mean Discrepancy (MMD), besides the metric value alone the `pymdma` package also computes two additional statistics: the dispersion ratio and the distance ratio.\n", + "\n", + "- **dispersion ratio**: computes the ratio of the distance between fake samples and the distance between real samples, providing insight into the variability of the generated data compared to the original data.\n", + "- **distance ratio**: computes the ratio of the distance between real and fake samples and the distance of between real samples, indicating the dissimilarity between the two datasets in comparison to the internal variation within the real samples.\n", + "\n", + "An example of the Wasserstein distance value, along with the corresponding ratios, is provided above.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pymdma.time_series.measures.synthesis_val import WassersteinDistance\n", + "\n", + "WD = WassersteinDistance()\n", + "wd_result = WD.compute(ref_features, target_features)\n", + "\n", + "wd_dataset, _ = wd_result.value\n", + "stats_dataset, _ = wd_result.stats\n", + "\n", + "\n", + "dispersion_ratio = stats_dataset[\"dispersion_ratio\"]\n", + "distance_ratio = stats_dataset[\"distance_ratio\"]\n", + "print(\"Dataset-level information:\")\n", + "print(f\"\\t{'Wasserstein Distance':<25}{wd_dataset:.2f}\")\n", + "print(f\"\\t{'Distance Ratio':<25}{distance_ratio:.2f}\")\n", + "print(f\"\\t{'Dispersion Ratio':<25}{dispersion_ratio:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These values indicate that the distance between real and fake samples was 2.54 times greater than the distance between real samples, and that the variability among fake samples was 3.28 times higher than the variability between real samples. These ratios provide a more intuitive interpretation than the distance metric value alone, offering a clearer comparison of the variation between real and synthetic data." + ] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.20" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pyproject.toml b/pyproject.toml index b5da2c6..8f64293 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,24 +2,27 @@ # https://github.com/microsoft/vscode-python/blob/master/CHANGELOG.md#enhancements-1 [tool.poetry] name = "pymdma" -version = "0.1.4" +version = "0.1.5" description = "Multimodal Data Metrics for Auditing real and synthetic data" authors = ["Fraunhofer AICOS "] maintainers = [ - "Marília Barandas ", "Ivo Façoco ", "Joana Rebelo ", - "Pedro Matias " + "Pedro Matias ", + "Marília Barandas ", ] classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)", "Operating System :: OS Independent", + "Development Status :: 5 - Production/Stable", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Scientific/Engineering :: Information Analysis", ] readme = "README.md" license = "LGPL-3.0-or-later" package-mode = true -#homepage = "" +documentation = "https://pymdma.readthedocs.io/en/latest/" repository = "https://github.com/fraunhoferportugal/pymdma" packages = [ { include = "pymdma", from = "src"} @@ -43,7 +46,6 @@ gudhi = {version = ">=3.9.0, <=4.0.0"} scikit-learn = {version = ">1.4.0"} # Image dependencies -# pydom = {git = "https://github.com/umang-singhal/pydom.git", rev = "2554af8d0", optional = true} torchvision = {version = ">=0.15.2, <0.19.0", optional = true} torchmetrics = {version = ">=1.3.2, <1.4.0", extras = ["image"], optional = true} pycocotools = {version = ">=2.0.8", optional = true} @@ -54,6 +56,7 @@ torch-fidelity = {version = ">=0.3.0, <0.4.0", optional = true} tsfel = {version = ">0.1.7", optional = true} wfdb = {version = ">=4.1.2, <5.0.0", optional = true} statsmodels = {version = ">=0.14.4, <0.15.0", optional = true} +fastdtw = {version = "^0.3.4", optional = true} # Text Dependencies TODO # accelerate = {version = "^0.24.0", optional = true} # datasets = {version = "^2.14.6", optional = true} @@ -72,14 +75,14 @@ word2number = {version = ">=1.1.0, <1.5.0", optional = true} [tool.poetry.extras] -image = ["pydom", "torchvision", "torchmetrics", "pycocotools", "opencv-python", "torch-fidelity"] +image = ["torchvision", "torchmetrics", "pycocotools", "opencv-python", "torch-fidelity"] tabular = ["numba", "pandas", "pycanon", "scipy", "spacy", "transformers", "umap-learn", "word2number", "statsmodels"] -time_series = ["tsfel", "wfdb", "statsmodels"] +time_series = ["tsfel", "wfdb", "statsmodels", "fastdtw"] # text = ["accelerate", "datasets", "nltk", "sentence-transformers", "transformers", "python-multipart"] all = [ - "pydom", "torchvision", "torchmetrics", "pycocotools", "opencv-python", "torch-fidelity", + "torchvision", "torchmetrics", "pycocotools", "opencv-python", "torch-fidelity", "numba", "pandas", "pycanon", "scipy", "spacy", "transformers", "umap-learn", "word2number", - "tsfel", "wfdb", "statsmodels" + "tsfel", "wfdb", "statsmodels", "fastdtw" # "accelerate", "datasets", "nltk", "sentence-transformers", "transformers", "python-multipart" ] @@ -101,7 +104,6 @@ pytest = "^7.4.3" pytest-cov = "^4.1.0" commitizen = "^3.17.0" safety = "^2.3.5" -# cz-bitbucket-jira-conventional = {git = "https://bitbucket.fraunhofer.pt/scm/is2020/mlops-commit-drafter.git", rev = "v1.0.3"} httpx = "^0.27.0" twine = "^5.1.1" @@ -109,7 +111,6 @@ twine = "^5.1.1" optional = true [tool.poetry.group.docs.dependencies] pydocstyle = "^6.3.0" -# TODO (telmo.baptista): Add support for Sphinx. Sphinx is only supported by Python 3.9+, so we have to manage the minimal version required before creation. mkdocs = "^1.6.1" mkdocstrings = {extras = ["python"], version = "^0.26.1"} mkdocs-gen-files = "^0.5.0" @@ -128,16 +129,6 @@ nbconvert = "^7.11.0" [tool.poetry.scripts] pymdma = "pymdma.cli:main" -# [[tool.poetry.source]] -# name = "torch-cu118" -# url = "https://download.pytorch.org/whl/cu118" -# priority = "explicit" - - -# [[tool.poetry.source]] -# name = "torch-cpu" -# url = "https://download.pytorch.org/whl/cpu" -# priority = "explicit" [tool.black] line-length = 120 @@ -287,5 +278,3 @@ skip = [".git"] [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" -# requires = ["setuptools"] -# build-backend = "setuptools.build_meta" diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt deleted file mode 100644 index ff6d58a..0000000 --- a/requirements/requirements-dev.txt +++ /dev/null @@ -1,57 +0,0 @@ -anyio==3.7.1 ; python_version >= "3.9" and python_version < "3.13" -argcomplete==3.4.0 ; python_version >= "3.9" and python_version < "3.13" -black==23.12.1 ; python_version >= "3.9" and python_version < "3.13" -certifi==2024.7.4 ; python_version >= "3.9" and python_version < "3.13" -cfgv==3.4.0 ; python_version >= "3.9" and python_version < "3.13" -charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "3.13" -click==8.1.7 ; python_version >= "3.9" and python_version < "3.13" -colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" -commitizen==3.28.0 ; python_version >= "3.9" and python_version < "3.13" -coverage[toml]==7.6.0 ; python_version >= "3.9" and python_version < "3.13" -decli==0.6.2 ; python_version >= "3.9" and python_version < "3.13" -distlib==0.3.8 ; python_version >= "3.9" and python_version < "3.13" -dparse==0.6.3 ; python_version >= "3.9" and python_version < "3.13" -exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11" -filelock==3.15.4 ; python_version >= "3.9" and python_version < "3.13" -flake8==6.1.0 ; python_version >= "3.9" and python_version < "3.13" -h11==0.14.0 ; python_version >= "3.9" and python_version < "3.13" -httpcore==1.0.5 ; python_version >= "3.9" and python_version < "3.13" -httpx==0.27.0 ; python_version >= "3.9" and python_version < "3.13" -identify==2.6.0 ; python_version >= "3.9" and python_version < "3.13" -idna==3.7 ; python_version >= "3.9" and python_version < "3.13" -importlib-metadata==8.2.0 ; python_version >= "3.9" and python_version < "3.10" -iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.13" -isort==5.13.2 ; python_version >= "3.9" and python_version < "3.13" -jinja2==3.1.4 ; python_version >= "3.9" and python_version < "3.13" -markupsafe==2.1.5 ; python_version >= "3.9" and python_version < "3.13" -marshmallow==3.21.3 ; python_version >= "3.9" and python_version < "3.13" -mccabe==0.7.0 ; python_version >= "3.9" and python_version < "3.13" -mypy==1.11.0 ; python_version >= "3.9" and python_version < "3.13" -mypy-extensions==1.0.0 ; python_version >= "3.9" and python_version < "3.13" -nodeenv==1.9.1 ; python_version >= "3.9" and python_version < "3.13" -packaging==24.1 ; python_version >= "3.9" and python_version < "3.13" -pathspec==0.12.1 ; python_version >= "3.9" and python_version < "3.13" -platformdirs==4.2.2 ; python_version >= "3.9" and python_version < "3.13" -pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.13" -pre-commit==3.7.1 ; python_version >= "3.9" and python_version < "3.13" -prompt-toolkit==3.0.36 ; python_version >= "3.9" and python_version < "3.13" -pycodestyle==2.11.1 ; python_version >= "3.9" and python_version < "3.13" -pyflakes==3.1.0 ; python_version >= "3.9" and python_version < "3.13" -pytest==7.4.4 ; python_version >= "3.9" and python_version < "3.13" -pytest-cov==4.1.0 ; python_version >= "3.9" and python_version < "3.13" -pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13" -questionary==2.0.1 ; python_version >= "3.9" and python_version < "3.13" -requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13" -ruamel-yaml==0.18.6 ; python_version >= "3.9" and python_version < "3.13" -ruamel-yaml-clib==0.2.8 ; platform_python_implementation == "CPython" and python_version < "3.13" and python_version >= "3.9" -safety==2.4.0b2 ; python_version >= "3.9" and python_version < "3.13" -setuptools==71.1.0 ; python_version >= "3.9" and python_version < "3.13" -sniffio==1.3.1 ; python_version >= "3.9" and python_version < "3.13" -termcolor==2.4.0 ; python_version >= "3.9" and python_version < "3.13" -tomli==2.0.1 ; python_version >= "3.9" and python_full_version <= "3.11.0a6" -tomlkit==0.13.0 ; python_version >= "3.9" and python_version < "3.13" -typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13" -urllib3==2.2.2 ; python_version >= "3.9" and python_version < "3.13" -virtualenv==20.26.3 ; python_version >= "3.9" and python_version < "3.13" -wcwidth==0.2.13 ; python_version >= "3.9" and python_version < "3.13" -zipp==3.19.2 ; python_version >= "3.9" and python_version < "3.10" diff --git a/requirements/requirements-image.txt b/requirements/requirements-image.txt deleted file mode 100644 index 319cfaa..0000000 --- a/requirements/requirements-image.txt +++ /dev/null @@ -1,6 +0,0 @@ -opencv-python>=4.10.0.84, <4.11.0.0 ; python_version >= "3.9" and python_version < "3.13" -pycocotools>=2.0.8 ; python_version >= "3.9" and python_version < "3.13" -pydom @ git+https://github.com/umang-singhal/pydom.git@2554af8d08a80658539f002eae58ece89cbcc6d4 ; python_version >= "3.9" and python_version < "3.13" -torch-fidelity>=0.3.0, <0.4.0 ; python_version >= "3.9" and python_version < "3.13" -torchmetrics[image]>=1.3.2, <2.0.0 ; python_version >= "3.9" and python_version < "3.13" -torchvision>=0.15.2, <0.19.0 ; python_version >= "3.9" and python_version < "3.13" diff --git a/requirements/requirements-notebook.txt b/requirements/requirements-notebook.txt deleted file mode 100644 index 280d39a..0000000 --- a/requirements/requirements-notebook.txt +++ /dev/null @@ -1,57 +0,0 @@ -appnope==0.1.4 ; python_version >= "3.9" and python_version < "3.13" and platform_system == "Darwin" -asttokens==2.4.1 ; python_version >= "3.9" and python_version < "3.13" -attrs==23.2.0 ; python_version >= "3.9" and python_version < "3.13" -beautifulsoup4==4.12.3 ; python_version >= "3.9" and python_version < "3.13" -bleach==6.1.0 ; python_version >= "3.9" and python_version < "3.13" -cffi==1.16.0 ; python_version >= "3.9" and python_version < "3.13" and implementation_name == "pypy" -colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32" -comm==0.2.2 ; python_version >= "3.9" and python_version < "3.13" -debugpy==1.8.2 ; python_version >= "3.9" and python_version < "3.13" -decorator==5.1.1 ; python_version >= "3.9" and python_version < "3.13" -defusedxml==0.7.1 ; python_version >= "3.9" and python_version < "3.13" -exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11" -executing==2.0.1 ; python_version >= "3.9" and python_version < "3.13" -fastjsonschema==2.20.0 ; python_version >= "3.9" and python_version < "3.13" -importlib-metadata==8.2.0 ; python_version >= "3.9" and python_version < "3.10" -ipykernel==6.29.5 ; python_version >= "3.9" and python_version < "3.13" -ipython==8.18.0 ; python_version >= "3.9" and python_version < "3.13" -jedi==0.19.1 ; python_version >= "3.9" and python_version < "3.13" -jinja2==3.1.4 ; python_version >= "3.9" and python_version < "3.13" -jsonschema==4.23.0 ; python_version >= "3.9" and python_version < "3.13" -jsonschema-specifications==2023.12.1 ; python_version >= "3.9" and python_version < "3.13" -jupyter-client==8.6.2 ; python_version >= "3.9" and python_version < "3.13" -jupyter-core==5.7.2 ; python_version >= "3.9" and python_version < "3.13" -jupyterlab-pygments==0.3.0 ; python_version >= "3.9" and python_version < "3.13" -markupsafe==2.1.5 ; python_version >= "3.9" and python_version < "3.13" -matplotlib-inline==0.1.7 ; python_version >= "3.9" and python_version < "3.13" -mistune==3.0.2 ; python_version >= "3.9" and python_version < "3.13" -nbclient==0.10.0 ; python_version >= "3.9" and python_version < "3.13" -nbconvert==7.16.4 ; python_version >= "3.9" and python_version < "3.13" -nbformat==5.10.4 ; python_version >= "3.9" and python_version < "3.13" -nest-asyncio==1.6.0 ; python_version >= "3.9" and python_version < "3.13" -packaging==24.1 ; python_version >= "3.9" and python_version < "3.13" -pandocfilters==1.5.1 ; python_version >= "3.9" and python_version < "3.13" -parso==0.8.4 ; python_version >= "3.9" and python_version < "3.13" -pexpect==4.9.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform != "win32" -platformdirs==4.2.2 ; python_version >= "3.9" and python_version < "3.13" -prompt-toolkit==3.0.36 ; python_version >= "3.9" and python_version < "3.13" -psutil==6.0.0 ; python_version >= "3.9" and python_version < "3.13" -ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform != "win32" -pure-eval==0.2.3 ; python_version >= "3.9" and python_version < "3.13" -pycparser==2.22 ; python_version >= "3.9" and python_version < "3.13" and implementation_name == "pypy" -pygments==2.18.0 ; python_version >= "3.9" and python_version < "3.13" -python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "3.13" -pywin32==306 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.9" and python_version < "3.13" -pyzmq==26.0.3 ; python_version >= "3.9" and python_version < "3.13" -referencing==0.35.1 ; python_version >= "3.9" and python_version < "3.13" -rpds-py==0.19.1 ; python_version >= "3.9" and python_version < "3.13" -six==1.16.0 ; python_version >= "3.9" and python_version < "3.13" -soupsieve==2.5 ; python_version >= "3.9" and python_version < "3.13" -stack-data==0.6.3 ; python_version >= "3.9" and python_version < "3.13" -tinycss2==1.3.0 ; python_version >= "3.9" and python_version < "3.13" -tornado==6.4.1 ; python_version >= "3.9" and python_version < "3.13" -traitlets==5.14.3 ; python_version >= "3.9" and python_version < "3.13" -typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.10" -wcwidth==0.2.13 ; python_version >= "3.9" and python_version < "3.13" -webencodings==0.5.1 ; python_version >= "3.9" and python_version < "3.13" -zipp==3.19.2 ; python_version >= "3.9" and python_version < "3.10" diff --git a/requirements/requirements-tabular.txt b/requirements/requirements-tabular.txt deleted file mode 100644 index e9128c0..0000000 --- a/requirements/requirements-tabular.txt +++ /dev/null @@ -1,15 +0,0 @@ -gudhi>=3.9.0, <= 4.0.0 ; python_version >= "3.9" and python_version < "3.13" -numba>=0.60.0, <0.80.0 ; python_version >= "3.9" and python_version < "3.13" -pandas>=2.0.0, <3.0.0 ; python_version >= "3.9" and python_version < "3.13" -pycanon==1.0.1.post2 ; python_version >= "3.9" and python_version < "3.13" -scikit-learn>1.4.0 ; python_version >= "3.9" and python_version < "3.13" -scipy>=1.6.0, <2.0.0 ; python_version >= "3.9" and python_version < "3.13" -spacy>=3.7.4, <4.0.0 ; python_version >= "3.9" and python_version < "3.13" -transformers>=4.43.2, <5.0.0 ; python_version >= "3.9" and python_version < "3.13" -umap-learn>=0.5.5, <0.6.0 ; python_version >= "3.9" and python_version < "3.13" -word2number>=1.1.0, <1.5.0 ; python_version >= "3.9" and python_version < "3.13" -# TODO: pycaret dependencies -# kaleido>=0.2.1, <0.4.0 ; python_version >= "3.9" and python_version < "3.13" -# pycaret>=3.3.2, <4.0.0; python_version >= "3.9" and python_version < "3.13" -# statsmodels>=0.14.4, <0.15.0 ; python_version >= "3.9" and python_version < "3.13" -# matplotlib<3.8.0 ; python_version >= "3.9" and python_version < "3.13" diff --git a/requirements/requirements-time_series.txt b/requirements/requirements-time_series.txt deleted file mode 100644 index 63fddb1..0000000 --- a/requirements/requirements-time_series.txt +++ /dev/null @@ -1,3 +0,0 @@ -statsmodels>=0.14.4, <0.15.0 ; python_version >= "3.9" and python_version < "3.13" -tsfel>0.1.7 ; python_version >= "3.9" and python_version < "3.13" -wfdb>=4.1.2, <5.0.0 ; python_version >= "3.9" and python_version < "3.13" diff --git a/requirements/requirements.txt b/requirements/requirements.txt deleted file mode 100644 index 4d2d2b0..0000000 --- a/requirements/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -loguru>=0.7.2, <0.8.0 ; python_version >= "3.9" and python_version < "3.13" -matplotlib>=3.4.3, <4.0.0 ; python_version >= "3.9" and python_version < "3.13" -numpy>=1.22.0, <2.0.0 ; python_version >= "3.9" and python_version < "3.13" -piq>=0.8.0, <1.0.0 ; python_version >= "3.9" and python_version < "3.13" -POT>=0.9.4, <0.10.0 ; python_version >= "3.9" and python_version < "3.13" -pydantic>=2.8.2, <2.9.0 ; python_version >= "3.9" and python_version < "3.13" -python-dotenv>=1.0.0, <2.0.0 ; python_version >= "3.9" and python_version < "3.13" -torch>=2.0.1, <2.5.0 ; python_version >= "3.9" and python_version < "3.13" diff --git a/src/pymdma/general/functional/distance.py b/src/pymdma/general/functional/distance.py index 91d3a34..7be8387 100644 --- a/src/pymdma/general/functional/distance.py +++ b/src/pymdma/general/functional/distance.py @@ -1,6 +1,6 @@ import numpy as np -import ot import torch +import ot from sklearn.metrics import pairwise_kernels from ..models.kernels import GaussianKernel, MultipleKernelMaximumMeanDiscrepancy @@ -163,6 +163,11 @@ def cos_sim_2d(x_feat, y_feat): ------- float The cosine similarity between the two sets of feature vectors. + + References + ---------- + Manning, C. D., Raghavan, P., & Schütze, H., Introduction to Information Retrieval (2008). + https://www.cambridge.org/highereducation/books/introduction-to-information-retrieval/669D108D20F556C5C30957D63B5AB65C#overview """ # Normalize the vectors in both sets to have unit length norm_x = x_feat / np.linalg.norm(x_feat, axis=1, keepdims=True) diff --git a/src/pymdma/general/functional/ratio.py b/src/pymdma/general/functional/ratio.py index 84d3e4a..6e22ef9 100644 --- a/src/pymdma/general/functional/ratio.py +++ b/src/pymdma/general/functional/ratio.py @@ -1,6 +1,6 @@ def dispersion_ratio(func, x_feat_1, x_feat_2, y_feat_1, y_feat_2): - """Calculates the ratio of the distance between real samples and the - distance between fake samples, using the metric passed in the argument + """Calculates the ratio of the distance between fake samples and the + distance between real samples, using the metric passed in the argument "func". The dispersion ratio is a measure of how well the generator model @@ -42,7 +42,7 @@ def dispersion_ratio(func, x_feat_1, x_feat_2, y_feat_1, y_feat_2): def distance_ratio(func, x_feat_1, x_feat_2, y_feat_1, y_feat_2): """Calculates the ratio of the distance between real and fake samples and - the distance of between fake samples, using the metric passed in the + the distance of between real samples, using the metric passed in the argument "func". The distance ratio is a measure of how well the generator model diff --git a/src/pymdma/image/measures/input_val/__init__.py b/src/pymdma/image/measures/input_val/__init__.py index f3d493c..e2f034b 100644 --- a/src/pymdma/image/measures/input_val/__init__.py +++ b/src/pymdma/image/measures/input_val/__init__.py @@ -1,7 +1,7 @@ +from pymdma.image.measures.input_val.data.dom import DOM from pymdma.image.measures.input_val.data.no_reference import ( BRISQUE, CLIPIQA, - DOM, EME, Brightness, Colorfulness, diff --git a/src/pymdma/image/measures/input_val/data/dom.py b/src/pymdma/image/measures/input_val/data/dom.py new file mode 100644 index 0000000..03c9b5c --- /dev/null +++ b/src/pymdma/image/measures/input_val/data/dom.py @@ -0,0 +1,317 @@ +"""Module for DOM sharpness. + +Adapted from: +pydom, Sharpness Estimation for Document and Scene Images. + https://github.com/umang-singhal/pydom + + +Original paper: +Kumar et al., Sharpness estimation for document and scene images (2012). + https://ieeexplore.ieee.org/document/6460868 +""" + +import cv2 +import numpy as np +from loguru import logger + +from pymdma.common.definitions import Metric +from pymdma.common.output import DistributionResult, MetricResult +from pymdma.constants import EvaluationLevel, MetricGroup, OutputsTypes, ReferenceType + + +def _dom(median_blurred: np.ndarray): + """Find DOM at each pixel. + + Parameters + ---------- + median_blurred : np.ndarray + Median filtered image. + + Returns + ------- + domx : np.ndarray + Diff of diff on x axis. + domy : np.ndarray + Diff of diff on y axis. + """ + median_shift_up = np.pad(median_blurred, ((0, 2), (0, 0)), "constant")[2:, :] + median_shift_down = np.pad(median_blurred, ((2, 0), (0, 0)), "constant")[:-2, :] + domx = np.abs(median_shift_up - 2 * median_blurred + median_shift_down) + + median_shift_left = np.pad(median_blurred, ((0, 0), (0, 2)), "constant")[:, 2:] + median_shift_right = np.pad(median_blurred, ((0, 0), (2, 0)), "constant")[:, :-2] + domy = np.abs(median_shift_left - 2 * median_blurred + median_shift_right) + return domx, domy + + +def _sharpness_matrix( + median_blurred: np.ndarray, + edgex: np.ndarray, + edgey: np.ndarray, + width: int = 2, +): + """Find sharpness value at each pixel. + + Parameters + ---------- + median_blurred : np.ndarray + Median filtered grayscale image. + edgex : np.ndarray + Edge pixels in x-axis. + edgey : np.ndarray + Edge pixels in y-axis. + width : int, optional + Edge width, by default 2. + debug : bool, optional + To show intermediate results, by default False. + + Returns + ------- + Sx : np.ndarray + Sharpness value matrix computed in x-axis. + Sy : np.ndarray + Sharpness value matrix computed in y-axis. + """ + # Compute dod measure on both axis + domx, domy = _dom(median_blurred) + + # Contrast on x and y axis + Cx = np.abs(median_blurred - np.pad(median_blurred, ((1, 0), (0, 0)), "constant")[:-1, :]) + Cy = np.abs(median_blurred - np.pad(median_blurred, ((0, 0), (1, 0)), "constant")[:, :-1]) + + # Filter out sharpness at pixels other than edges + Cx = np.multiply(Cx, edgex) + Cy = np.multiply(Cy, edgey) + + # initialize sharpness matriz with 0's + Sx = np.zeros(domx.shape) + Sy = np.zeros(domy.shape) + + # Compute Sx + for i in range(width, domx.shape[0] - width): + num = np.abs(domx[i - width : i + width, :]).sum(axis=0) + dn = Cx[i - width : i + width, :].sum(axis=0) + Sx[i] = [(num[k] / dn[k] if dn[k] > 1e-3 else 0) for k in range(Sx.shape[1])] + + # Compute Sy + for j in range(width, domy.shape[1] - width): + num = np.abs(domy[:, j - width : j + width]).sum(axis=1) + dn = Cy[:, j - width : j + width].sum(axis=1) + Sy[:, j] = [(num[k] / dn[k] if dn[k] > 1e-3 else 0) for k in range(Sy.shape[0])] + return Sx, Sy + + +def _sharpness_measure( + median_blurred: np.ndarray, + edgex: np.ndarray, + edgey: np.ndarray, + width: int, + sharpness_threshold: float, + epsilon: float = 1e-8, +): + """Final Sharpness Value. + + Parameters + ---------- + median_blurred : np.ndarray + Median filtered grayscale image. + width : int + Edge width. + sharpness_threshold : float + Thresold to consider if a pixel is sharp. + epsilon : float, optional + Small value to defer div by zero, by default 1e-8. + + Returns + ------- + S : float + Sharpness measure(0= sharpness_threshold) / (np.sum(edgex) + epsilon) + Ry = np.sum(Sy >= sharpness_threshold) / (np.sum(edgey) + epsilon) + return np.sqrt(Rx**2 + Ry**2) + + +def _smoothen_image(image: np.ndarray, transpose: bool = False, epsilon: float = 1e-8): + """Smmoth image with ([0.5, 0, -0.5]) 1D filter. + + Parameters + ---------- + image : np.ndarray + Grayscale image. + transpose : bool, optional + To apply filter on vertical axis, by default False. + epsilon : float, optional + Small value to defer div by zero, by default 1e-8. + + Returns + ------- + image_smoothed : np.ndarray + Smoothened image. + """ + fil = np.array([0.5, 0, -0.5]) # Smoothing Filter + + # change image axis for column convolution + if transpose: + image = image.T + + # Convolve grayscale image with smoothing filter + image_smoothed = np.array([np.convolve(image[i], fil, mode="same") for i in range(image.shape[0])]) + + # change image axis after column convolution + if transpose: + image_smoothed = image_smoothed.T + + # Normalize smoothened grayscale image + return np.abs(image_smoothed) / (np.max(image_smoothed) + epsilon) + + +def _get_sharpness( + img, + width=2, + sharpness_threshold=2, + edge_threshold=0.0001, + blur: bool = False, + blur_size: tuple = (5, 5), +): + """Image Sharpness Assessment. + + Parameters + ---------- + img : str or np.ndarray + Image source or image matrix. + width : int, optional + Text edge width, by default 2. + sharpness_threshold : float, optional + Thresold to consider if a pixel is sharp, by default 2. + edge_threshold : float, optional + Thresold to consider if a pixel is an edge pixel, by default 0.0001. + debug : bool, optional + To show intermediate results, by default False. + + Returns + ------- + sharpness : float + Image sharpness measure(0 edge_threshold + edgey = smoothy > edge_threshold + + return _sharpness_measure(median_blurred, edgex, edgey, width=width, sharpness_threshold=sharpness_threshold) + + +class DOM(Metric): + """Computes DOM sharpness score for an image. It is effective in detecting + motion-blur, de-focused images or inherent properties of imaging system. + + **Objective**: Sharpness + + Parameters + ---------- + width : int, optional, default=2 + Width of the edge filter. + sharpness_threshold : int, optional, default=2 + Threshold for considering if a pixel is sharp or not. + edge_threshold : float, optional, default=0.0001 + Threshold for edge. + **kwargs : dict, optional + Additional keyword arguments for compatibility. + + References + ---------- + Kumar et al., Sharpness estimation for document and scene images (2012). + https://ieeexplore.ieee.org/document/6460868 + + Code was adapted from: + pydom, Sharpness Estimation for Document and Scene Images. + https://github.com/umang-singhal/pydom + + Examples + -------- + >>> dom = DOM() + >>> imgs = np.random.rand(20, 100, 100, 3) # (N, H, W, C) + >>> result: MetricResult = dom.compute(imgs) + """ + + reference_type = ReferenceType.NONE + evaluation_level = EvaluationLevel.INSTANCE + metric_group = MetricGroup.QUALITY + + higher_is_better: bool = True + min_value: float = 0.0 + max_value: float = 1.0 + + def __init__( + self, + width: int = 2, + sharpness_threshold: int = 2, + edge_threshold: float = 0.0001, + blur: bool = False, + blur_size: tuple = (5, 5), + **kwargs, + ): + super().__init__(**kwargs) + self.width = width + self.sharpness_threshold = sharpness_threshold + self.edge_threshold = edge_threshold + self.blur = blur + self.blur_size = blur_size + if blur: + logger.warning("Applying Gaussian Blur to the images may lead to non-deterministic results.") + + def compute( + self, + imgs: np.ndarray, + **kwargs, + ) -> MetricResult: + """Computes DOM score for an image. + + Parameters + ---------- + imgs : {(N, H, W, C) ndarray, (N, H, W) ndarray} + List of arrays representing RGB or grayscale image of shape (H, W, C) or (H, W), respectively. + + Returns + ------- + result: MetricResult + DOM score for each image. + """ + scores = [ + _get_sharpness( + img, + self.width, + self.sharpness_threshold, + self.edge_threshold, + self.blur, + self.blur_size, + ) + for img in imgs + ] + + return DistributionResult( + instance_level={"dtype": OutputsTypes.ARRAY, "subtype": "float", "value": scores}, + ) + + +__all__ = ["DOM"] diff --git a/src/pymdma/image/measures/input_val/data/no_reference.py b/src/pymdma/image/measures/input_val/data/no_reference.py index f676360..bb2713e 100644 --- a/src/pymdma/image/measures/input_val/data/no_reference.py +++ b/src/pymdma/image/measures/input_val/data/no_reference.py @@ -1,7 +1,6 @@ from typing import Literal, Tuple, Union import cv2 -# import dom as _dom import numpy as np import torch from PIL import Image, ImageEnhance @@ -14,88 +13,6 @@ from ....utils.processing import image_resize -# TODO review documentations and attributes - - -# class DOM(Metric): -# """Computes DOM sharpness score for an image. It is effective in detecting -# motion-blur, de-focused images or inherent properties of imaging system. - -# **Objective**: Sharpness - -# Parameters -# ---------- -# width : int, optional, default=2 -# Width of the edge filter. -# sharpness_threshold : int, optional, default=2 -# Threshold for considering if a pixel is sharp or not. -# edge_threshold : float, optional, default=0.0001 -# Threshold for edge. -# **kwargs : dict, optional -# Additional keyword arguments for compatibility. - -# References -# ---------- -# Kumar et al., Sharpness estimation for document and scene images (2012). -# https://ieeexplore.ieee.org/document/6460868 - -# Code was adapted from: -# pydom, Sharpness Estimation for Document and Scene Images. -# https://github.com/umang-singhal/pydom - -# Examples -# -------- -# >>> dom = DOM() -# >>> imgs = np.random.rand(20, 100, 100, 3) # (N, H, W, C) -# >>> result: MetricResult = dom.compute(imgs) -# """ - -# reference_type = ReferenceType.NONE -# evaluation_level = EvaluationLevel.INSTANCE -# metric_group = MetricGroup.QUALITY - -# higher_is_better: bool = True -# min_value: float = 0.0 -# max_value: float = 1.0 - -# def __init__( -# self, -# width: int = 2, -# sharpness_threshold: int = 2, -# edge_threshold: float = 0.0001, -# **kwargs, -# ): -# super().__init__(**kwargs) -# self._dom = _dom.DOM() -# self.width = width -# self.sharpness_threshold = sharpness_threshold -# self.edge_threshold = edge_threshold - -# def compute( -# self, -# imgs: np.ndarray, -# **kwargs, -# ) -> MetricResult: -# """Computes DOM score for an image. - -# Parameters -# ---------- -# imgs : {(N, H, W, C) ndarray, (N, H, W) ndarray} -# List of arrays representing RGB or grayscale image of shape (H, W, C) or (H, W), respectively. - -# Returns -# ------- -# result: MetricResult -# DOM score for each image. -# """ -# scores = [ -# self._dom.get_sharpness(img, self.width, self.sharpness_threshold, self.edge_threshold) for img in imgs -# ] - -# return DistributionResult( -# instance_level={"dtype": OutputsTypes.ARRAY, "subtype": "float", "value": scores}, -# ) - class Tenengrad(Metric): """Computes Tenengrad score for an image. Sharpness measure based on the @@ -379,8 +296,8 @@ def compute( # TODO documentation class ExposureBrightness(Metric): - """Computes Exposure and Brightness level Metric. - Values higher than 1 indicate overexposure, while values closer to 0 indicate underexposure. + """Computes Exposure and Brightness level Metric. Values higher than 1 + indicate overexposure, while values closer to 0 indicate underexposure. **Objective**: Exposure and Brightness @@ -814,7 +731,6 @@ def compute( __all__ = [ - "DOM", "Tenengrad", "TenengradRelative", "EME", diff --git a/src/pymdma/time_series/input_layer.py b/src/pymdma/time_series/input_layer.py index f3698ea..e63dee5 100644 --- a/src/pymdma/time_series/input_layer.py +++ b/src/pymdma/time_series/input_layer.py @@ -58,6 +58,7 @@ def _get_data_files_path(data_src: Union[List[str], Path]) -> List[Path]: else: raise AssertionError(f"Unsupported file extension: {item.suffix} (file: {item})") elif item.is_dir(): + item = Path(item) # Recursively search for data files in subdirectories for sig_file in item.iterdir(): if sig_file.is_file() and sig_file.suffix in SUPPORTED_FILES: @@ -214,3 +215,24 @@ def batched_samples(self) -> Generator[Tuple[np.ndarray], None, None]: # self.instance_ids.extend(list(sig_ids)) yield ref_sigs, sim_sigs + + def get_full_samples(self): + # only reference signals for no reference metrics + if self.reference_type == ReferenceType.NONE: + full_no_ref_signals = [] + for no_ref_signals, _labels, _sig_ids in self.target_loader: + full_no_ref_signals.extend(no_ref_signals) + return np.array(full_no_ref_signals) + else: # full reference + # iterate through both dataloaders and return all signals + full_ref_sigs = [] + full_sim_sigs = [] + ref_iter = iter(self.reference_loader) + sim_iter = iter(self.target_loader) + for _ in range(len(self.reference_loader)): + ref_sigs, _, _ = next(ref_iter) + sim_sigs, _, _sig_ids = next(sim_iter) + full_ref_sigs.extend(ref_sigs) + full_sim_sigs.extend(sim_sigs) + + return np.array(full_ref_sigs), np.array(full_sim_sigs) diff --git a/src/pymdma/time_series/measures/input_val/data/quality.py b/src/pymdma/time_series/measures/input_val/data/quality.py index 3797dcf..c682d1f 100644 --- a/src/pymdma/time_series/measures/input_val/data/quality.py +++ b/src/pymdma/time_series/measures/input_val/data/quality.py @@ -82,6 +82,11 @@ class SNR(Metric): **kwargs : dict, optional Additional keyword arguments for compatibility. + References + ---------- + Smith, S. W., The Scientist and Engineer's Guide to Digital Signal Processing (1997). + https://dl.acm.org/doi/10.5555/281875 + Examples -------- >>> snr = SNR() diff --git a/src/pymdma/time_series/measures/synthesis_val/__init__.py b/src/pymdma/time_series/measures/synthesis_val/__init__.py index ba6c8a7..57096e9 100644 --- a/src/pymdma/time_series/measures/synthesis_val/__init__.py +++ b/src/pymdma/time_series/measures/synthesis_val/__init__.py @@ -1,3 +1,4 @@ +from pymdma.time_series.measures.synthesis_val.data.reference import DTW, CrossCorrelation from pymdma.time_series.measures.synthesis_val.feature._shared import ( Authenticity, Coverage, @@ -24,4 +25,6 @@ "GeometryScore", "MultiScaleIntrinsicDistance", "PrecisionRecallDistribution", + "DTW", + "CrossCorrelation", ] diff --git a/src/pymdma/time_series/measures/synthesis_val/data/__init__.py b/src/pymdma/time_series/measures/synthesis_val/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pymdma/time_series/measures/synthesis_val/data/reference.py b/src/pymdma/time_series/measures/synthesis_val/data/reference.py new file mode 100644 index 0000000..8a9e605 --- /dev/null +++ b/src/pymdma/time_series/measures/synthesis_val/data/reference.py @@ -0,0 +1,226 @@ +from typing import List, Literal + +import numpy as np +from fastdtw import fastdtw + +from pymdma.common.definitions import Metric +from pymdma.common.output import MetricResult +from pymdma.constants import EvaluationLevel, MetricGroup, OutputsTypes, ReferenceType + + +class DTW(Metric): + """Computes the Dynamic Time Warping (DTW) distance between two sets of + time-series signals, evaluating the similarity between corresponding + channels in the target and reference signals. The DTW distance is computed + by comparing each target signal with every reference signal, with lower DTW + values indicating greater similarity. For each signal pair, the DTW + distance is calculated across all channels, with the mean of the distances + being taken across all channels and instances. This process yields both + instance-level and dataset-level DTW metrics. + + **Objective**: Fidelity, Diversity + + Parameters + ---------- + **kwargs : dict, optional + Additional keyword arguments for compatibility with the Metric framework. + + References + ---------- + Salvador, S., & Chan, P., FastDTW: Toward Accurate Dynamic Time (2004). + https://dl.acm.org/doi/10.5555/1367985.1367993 + + Examples + -------- + >>> dtw = DTW() + >>> reference_sigs = np.random.rand(64, 1000, 12) # (N, L, C) + >>> target_imgs = np.random.rand(64, 1000, 12) # (N, L, C) + >>> result: MetricResult = dtw.compute(reference_sigs, target_sigs) + """ + + reference_type = ReferenceType.DATASET + evaluation_level = [EvaluationLevel.INSTANCE, EvaluationLevel.DATASET] + metric_group = MetricGroup.QUALITY + + higher_is_better: bool = False + min_value: float = 0.0 + max_value: float = np.inf + + def __init__( + self, + **kwargs, + ): + super().__init__(**kwargs) + + def compute( + self, + reference_sigs: List[np.ndarray], + target_sigs: List[np.ndarray], + **kwargs, + ) -> MetricResult: + """Computes Dinamic Time Wrapping. + + Parameters + ---------- + reference_sigs: (N, L, C) ndarray + Signals to use as reference. + List of arrays representing a signal of shape (L, C). + target_sigs : (N, L, C) ndarray + Signals compare with reference. + List of arrays representing a signal of shape (L, C). + + Returns + ------- + result : MetricResult + Instance-level dtw. + Dataset-level dtw. + """ + instance_dtw = [] + for targ in target_sigs: + for ref in reference_sigs: + dtw_values_by_chan = [] + + # Compute the dtw for each channel + for targ_channel, ref_channel in zip(targ.T, ref.T): + channel_dtw, _ = fastdtw(targ_channel, ref_channel) + + dtw_values_by_chan.append(channel_dtw) + + # Compute mean dtw across channels + mean_dtw = np.mean(dtw_values_by_chan) + + # Correlations by signal + instance_dtw.append(mean_dtw) + + # Average correlation across signals + final_dtw = np.mean(instance_dtw) + + return MetricResult( + instance_level={"dtype": OutputsTypes.ARRAY, "subtype": "float", "value": instance_dtw}, + dataset_level={"dtype": OutputsTypes.NUMERIC, "subtype": "float", "value": final_dtw}, + ) + + +class CrossCorrelation(Metric): + """Computes the Cross-Correlation between two sets of signals. + + This function calculates the cross-correlation to analyze the relationship between + corresponding channels in the target and reference signals. The computation is performed + for each signal in the target set against every signal in the reference set, using a + specified overlap mode. The computed cross-correlation for each channel can be summarized + using one of two reduction methods: 'mean'and 'max'. + + For each signal pair, the function calculates the cross-correlation values for each channel + using the specified reduction method. It then computes the mean of these values across all + channels to provide an instance-level metric and averages these results across all signal + pairs to obtain the dataset-level metric. + + **Objective**: Fidelity, Diversity + + Parameters + ---------- + mode : {'full', 'same', 'valid'}, optional + Defines how the cross-correlation is computed. Default is 'full'. + - 'full': Computes the convolution at every point of overlap, producing + an output of size (N + M - 1). Boundary effects may be present. + - 'same': Produces an output of length max(M, N), centered on the signals. + - 'valid': Produces an output of length max(M, N) - min(M, N) + 1, + considering only complete overlaps. + reduction : {'mean', 'max'}, optional + Determines how the cross-correlation is summarized for each channel. Default is 'max'. + - 'mean': The average of the cross-correlation values for the channel. + - 'max': The maximum cross-correlation value for the channel. + **kwargs : dict, optional + Additional keyword arguments for customization. + + References + ---------- + Proakis and Manolakis, Digital Signal Processing: Principles, Algorithms, and Applications (1996). + https://dl.acm.org/doi/10.5555/227373 + + Examples + -------- + >>> cc = CrossCorrelation() + >>> reference_sigs = np.random.rand(64, 1000, 12) # (N, L, C) + >>> target_sigs = np.random.rand(64, 1000, 12) # (N, L, C) + >>> result: MetricResult = cc.compute(reference_sigs, target_sigs) + """ + + reference_type = ReferenceType.DATASET + evaluation_level = [EvaluationLevel.INSTANCE, EvaluationLevel.DATASET] + metric_group = MetricGroup.QUALITY + + higher_is_better: bool = True + min_value: float = -np.inf + max_value: float = np.inf + + def __init__( + self, + mode: Literal["full", "same", "valid"] = "full", + reduction: Literal["mean", "max"] = "max", + **kwargs, + ): + super().__init__(**kwargs) + assert mode in ["full", "same", "valid"], f"Unsupported mode for Cross Correlation: {mode}" + self.mode = mode + assert reduction in ["mean", "max"], f"Unsupported criteria for relative tenengrad: {reduction}" + self.reduction = reduction + + def compute( + self, + reference_sigs: List[np.ndarray], + target_sigs: List[np.ndarray], + **kwargs, + ) -> MetricResult: + """Computes Dinamic Time Wrapping. + + Parameters + ---------- + reference_sigs: (N, L1, C) ndarray + Signals to use as reference. + List of arrays representing a signal of shape (L1, C). + target_sigs :(N, L2, C) ndarray + Signals compare with reference. + List of arrays representing a signal of shape (L2, C). + + Returns + ------- + result : MetricResult + Instance-level maximum cross-correlation. + Dataset-level maximum cross-correlation. + """ + instance_cross_corr = [] + + for targ in target_sigs: + for ref in reference_sigs: + reduct_corr_values_by_chan = [] + + # Compute the cross-correlation for each channel with different reductions + for targ_channel, ref_channel in zip(targ.T, ref.T): + cross_corr = np.correlate(targ_channel, ref_channel, mode=self.mode) + + if self.reduction == "max": + max_corr_idx = np.argmax(cross_corr) + max_corr_value = cross_corr[max_corr_idx] + reduct_corr_values_by_chan.append(max_corr_value) + + else: + mean_corr_value = np.mean(cross_corr) + reduct_corr_values_by_chan.append(mean_corr_value) + + # Compute the mean of the correlation across channels + cross_corr = np.mean(reduct_corr_values_by_chan) + + # Correlations by signal + instance_cross_corr.append(cross_corr) + + # Average correlation across signals + final_cross_corr = np.mean(instance_cross_corr) + + return MetricResult( + instance_level={"dtype": OutputsTypes.ARRAY, "subtype": "float", "value": instance_cross_corr}, + dataset_level={"dtype": OutputsTypes.NUMERIC, "subtype": "float", "value": final_cross_corr}, + ) + + +__all__ = ["DTW", "CrossCorrelation"] diff --git a/src/pymdma/time_series/measures/synthesis_val/feature/distance.py b/src/pymdma/time_series/measures/synthesis_val/feature/distance.py index 39f2952..31f4b61 100644 --- a/src/pymdma/time_series/measures/synthesis_val/feature/distance.py +++ b/src/pymdma/time_series/measures/synthesis_val/feature/distance.py @@ -5,7 +5,7 @@ from pymdma.common.definitions import FeatureMetric from pymdma.common.output import MetricResult from pymdma.constants import EvaluationLevel, MetricGroup, OutputsTypes, ReferenceType -from pymdma.general.functional.distance import cos_sim_2d, fast_mmd_linear, mk_mmd, mmd_kernel, wasserstein +from pymdma.general.functional.distance import cos_sim_2d, fast_mmd_linear, mmd_kernel, wasserstein, mk_mmd from pymdma.general.functional.ratio import dispersion_ratio, distance_ratio from pymdma.general.utils.util import features_splitting @@ -129,8 +129,7 @@ def compute(self, real_features: np.ndarray, fake_features: np.ndarray, **kwargs class MMD(FeatureMetric): """Calculate the Maximum Mean Discrepancy (MMD) using a specified kernel - function. If the chosen kernel is "multi_gaussian", the multi-kernel MMD - using gaussian kernels will be computed. + function. **Objective**: Fidelity, Diversity @@ -141,11 +140,13 @@ class MMD(FeatureMetric): If False, ratio computation is skipped. Default is True. kernel : str, optional, default='linear' The kernel function to use for calculating MMD. Options include: - 'multi_gaussian','gaussian', 'additive_chi2', 'chi2', 'linear', 'poly', 'polynomial', 'rbf', 'laplacian', 'sigmoid', 'cosine' - (Note: when using gaussian kernel, the number of samples in both datasets must be the same.) - + 'multi_gaussian', 'additive_chi2', 'chi2', 'linear', 'poly', 'polynomial', 'rbf', 'laplacian', 'sigmoid', 'cosine' **kwargs : dict, optional Additional keyword arguments for compatibility. + + Notes + ----- + When using gaussian kernel, the number of samples in both datasets must be the same References ---------- @@ -157,7 +158,7 @@ class MMD(FeatureMetric): Examples -------- - >>> mmd = MMD(kernel = 'multi_gaussian') + >>> mmd = MMD(kernel = 'linear') >>> real_features = np.random.rand(64, 48) # (n_samples, num_features) >>> fake_features = np.random.rand(64, 48) # (n_samples, num_features) >>> result: MetricResult = mmd.compute(x_feat, y_feat) @@ -248,8 +249,7 @@ def _compute_ratios(self, real_features: np.ndarray, fake_features: np.ndarray, def compute(self, real_features: np.ndarray, fake_features: np.ndarray, **kwargs) -> MetricResult: """Calculate the Maximum Mean Discrepancy (MMD) using a specified - kernel function. If the chosen kernel is "multi_gaussian", the multi- - kernel MMD using gaussian kernels willbe computed. + kernel function. Parameters ---------- @@ -295,6 +295,12 @@ class CosineSimilarity(FeatureMetric): **kwargs : dict, optional Additional keyword arguments for compatibility. + References + ---------- + Manning, C. D., Raghavan, P., & Schütze, H., An Introduction to Information Retrieval (2008). + https://www.cambridge.org/highereducation/books/introduction-to-information-retrieval/669D108D20F556C5C30957D63B5AB65C#overview + + Examples -------- >>> cossine_sim = MMD() diff --git a/tests/__init__.py b/tests/__init__.py index 8cb3688..94f13ea 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,5 @@ """Unit test package for time_series_metrics.""" + import os _TEST_ROOT = os.path.dirname(__file__) diff --git a/tests/conftest.py b/tests/conftest.py index 3557210..21338a4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -29,9 +29,9 @@ def test_client(): yield test_client -#################################################################################################### -###################################### Image Fixtures ############################################## -#################################################################################################### +# ################################################################################################### +# ##################################### Image Fixtures ############################################## +# ################################################################################################### @pytest.fixture(scope="function") def coco_bbox_dataset(): return json.load( @@ -65,9 +65,9 @@ def get_extractor(name): return get_extractor -#################################################################################################### -################################### Time-Series Fixtures ########################################### -#################################################################################################### +# ################################################################################################### +# ################################## Time-Series Fixtures ########################################### +# ################################################################################################### @pytest.fixture(scope="module") @@ -82,7 +82,7 @@ def synth_ts_filenames(): return _get_data_files_path(Path(data_dir) / "test/time_series/input_val/dataset") -@pytest.fixture(scope="module") +@pytest.fixture() def ts_feature_extractor(): def get_extractor(name): return TimeSeriesFeatureExtractor(name) diff --git a/tests/test_ts_import.py b/tests/test_ts_import.py index db58a28..e589f23 100644 --- a/tests/test_ts_import.py +++ b/tests/test_ts_import.py @@ -1,10 +1,12 @@ import warnings +from copy import deepcopy import numpy as np import pytest from pymdma.constants import OutputsTypes from pymdma.time_series.measures.input_val.data import quality as input_metrics +from pymdma.time_series.measures.synthesis_val.data import reference as synth_data_metrics from pymdma.time_series.measures.synthesis_val.feature import _shared as synth_shared_metrics from pymdma.time_series.measures.synthesis_val.feature import distance as synth_distance_metrics @@ -107,6 +109,158 @@ def test_uniqueness_values(sample_distribution): APROXIMATION_TOLERANCE = 1e-2 # numeric tolerance for approximations +@pytest.mark.parametrize( + "metric_name, expected", + [ + (synth_data_metrics.DTW, 3.84066727425), + (synth_data_metrics.CrossCorrelation, 4.040718166076052), + ], +) +def test_reproducibility_dtw_crosscorr(metric_name, expected, show_dist=False): + """Test if dtw and cross correlation measures are reproducible.""" + + # From sample_distribution((10, 2), sigma=0.5, mu = 0): + x_ref = np.array( + [ + [ + [0.88202617, 0.2000786], + [0.48936899, 1.1204466], + [0.933779, -0.48863894], + [0.47504421, -0.0756786], + [-0.05160943, 0.20529925], + [0.07202179, 0.72713675], + [0.38051886, 0.06083751], + [0.22193162, 0.16683716], + [0.74703954, -0.10257913], + [0.15653385, -0.42704787], + ], + ], + ) + + # From sample_distribution((10, 2), sigma=0.5, mu = 0.5): + y_synth = np.array( + [ + [ + [1.38202617e00, 7.00078604e-01], + [9.89368992e-01, 1.62044660e00], + [1.43377900e00, 1.13610601e-02], + [9.75044209e-01, 4.24321396e-01], + [4.48390574e-01, 7.05299251e-01], + [5.72021786e-01, 1.22713675e00], + [8.80518863e-01, 5.60837508e-01], + [7.21931616e-01, 6.66837164e-01], + [1.24703954e00, 3.97420868e-01], + [6.56533851e-01, 7.29521303e-02], + ], + ], + ) + + metric = metric_name() + result = metric.compute(x_ref, y_synth) + + if result.dataset_level.dtype == OutputsTypes.NUMERIC: + value = result.dataset_level.value + + assert value == pytest.approx(expected), f"{metric_name}: unexpected value of {value}." + else: + warnings.warn(f"Unknown output type: {result['dataset_level']['type']}. Skipping comparison.", stacklevel=2) + + +def dtw_cross_corr_symetry(sample_distribution, metric_name): + """Test if dtw and cross correlation are symetric.""" + + signals_1 = sample_distribution((200, 1000, 12)) + signals_2 = sample_distribution((200, 1000, 12)) + + metric = metric_name() + result_norm = metric.compute(signals_1, signals_2) + result_inv = metric.compute(signals_2, signals_1) + + if result_norm.dataset_level.dtype == OutputsTypes.NUMERIC: + value_norm = result_norm.dataset_level.value + value_inv = result_inv.dataset_level.value + + assert value_norm == pytest.approx(value_inv), f"{metric_name}: unexpected value of {value_norm}." + else: + warnings.warn( + f"Unknown output type: {result_norm['dataset_level']['type']}. Skipping comparison.", + stacklevel=2, + ) + + +def _generate_triangular_signals(shape): + """Generates a set of triangular signals with random variations in + amplitude, phase shift, and slope.""" + n_samples, length, n_channels = shape + signals = np.zeros(shape) + + for i in range(n_samples): + for j in range(n_channels): + t = np.linspace(0, 1, length) + # Random amplitude scaling factor (varies between 0.5 and 1.5) + amplitude = np.random.uniform(0.5, 1.5) + + # Random phase shift (varies between -0.2 and 0.2 in normalized time units) + phase_shift = np.random.uniform(-0.2, 0.2) + t_shifted = np.clip(t + phase_shift, 0, 1) # Ensure phase remains within bounds + + # Random slope adjustment (varies between 0.8 and 1.2) + slope_variation = np.random.uniform(0.8, 1.2) + + # Generate the triangular waveform with variations + signals[i, :, j] = amplitude * np.abs((t_shifted - 0.5) * slope_variation) + + return signals + + +def _generate_square_signals(shape): + """Generates a set of square wave signals with random variations in + frequency, amplitude, and phase.""" + + n_samples, length, n_channels = shape + signals = np.zeros(shape) + for i in range(n_samples): + for j in range(n_channels): + t = np.linspace(0, 1, length) + frequency = np.random.uniform(1, 10) # Random frequency between 1Hz and 10Hz + amplitude = np.random.uniform(0.5, 1.5) # Random amplitude between 0.5 and 1.5 + phase = np.random.uniform(0, 2 * np.pi) # Random phase shift + signals[i, :, j] = amplitude * np.sign(np.sin(2 * np.pi * frequency * t + phase)) + return signals + + +@pytest.mark.parametrize( + "metric_name", + [ + (synth_data_metrics.DTW), + (synth_data_metrics.CrossCorrelation), + ], +) +def test_dtw_crosscorr_order_sigs(metric_name): + """Test if the similarity between signals of the same shape is greater than + the similarity between different shapes (square vs triangle)""" + + shape = (5, 500, 12) + triangular_signals = _generate_triangular_signals(shape) + square_signals = _generate_square_signals(shape) + + metric = metric_name() + result_tt = metric.compute(triangular_signals, triangular_signals) + result_ts = metric.compute(triangular_signals, square_signals) + + if result_ts.dataset_level.dtype == OutputsTypes.NUMERIC: + if metric_name == synth_data_metrics.DTW: + value_min = result_tt.dataset_level.value + value_max = result_ts.dataset_level.value + else: + value_min = result_ts.dataset_level.value + value_max = result_tt.dataset_level.value + + assert value_min < value_max, f"{metric_name}: unexpected order of values {value_min} < {value_max}." + else: + warnings.warn(f"Unknown output type: {result_ts['dataset_level']['type']}. Skipping comparison.", stacklevel=2) + + # AUXILIARY FUNCTION def validate_stats(stats_values, stats, expected_stats): if stats in stats_values: @@ -127,9 +281,9 @@ def test_extractor_models(ts_feature_extractor, synth_ts_filenames, extractor_na features = extractor.extract_features_from_files( synth_ts_filenames, - fs=50, + fs=500, dims=["ch1", "ch2", "ch3", "ch4", "ch5", "ch6", "ch7", "ch8", "ch9", "ch10", "ch11", "ch12"], - batch_size=6, + batch_size=20, ) assert features.shape[0] == len(synth_ts_filenames), "Feature length does not match input length" @@ -137,6 +291,7 @@ def test_extractor_models(ts_feature_extractor, synth_ts_filenames, extractor_na prec = synth_shared_metrics.ImprovedPrecision() result = prec.compute(features, features) + assert result.dataset_level is not None and result.instance_level is not None, "Eval level is None" dataset_level, instance_level = result.value assert dataset_level > 0.90, "Dataset level is below threshold" @@ -539,7 +694,7 @@ def test_distribution_shift(metric_name, sample_distribution, expected_upper, si (synth_distance_metrics.CosineSimilarity, 0.8370494332671239), (synth_shared_metrics.PrecisionRecallDistribution, (0.6881853042325229, 0.6920392785323591)), (synth_shared_metrics.FrechetDistance, 0.5000000060902672), - # (synth_shared_metrics.MultiScaleIntrinsicDistance, 24.476226229017197), + # (synth_shared_metrics.MultiScaleIntrinsicDistance, 152.29991989954718), (synth_shared_metrics.Authenticity, 0.5), (synth_shared_metrics.ImprovedPrecision, 1.0), (synth_shared_metrics.ImprovedRecall, 0.8), @@ -609,8 +764,9 @@ def test_reproducibility(metric_name, expected, show_dist=False): "metric_name, kernel", [ (synth_distance_metrics.MMD, "linear"), - (synth_distance_metrics.CosineSimilarity, "multi_gaussian"), - (synth_distance_metrics.CosineSimilarity, "sigmoid"), + (synth_distance_metrics.MMD, "multi_gaussian"), + # (synth_distance_metrics.CosineSimilarity, "multi_gaussian"), + # (synth_distance_metrics.CosineSimilarity, "sigmoid"), ], ) def test_mmd_kerneis(metric_name, kernel, sample_distribution): @@ -626,6 +782,73 @@ def test_mmd_kerneis(metric_name, kernel, sample_distribution): assert isinstance(value, float) +@pytest.mark.parametrize( + "metric_name, kernel, expected", + [ + (synth_distance_metrics.MMD, "multi_gaussian", 1.0416591425747472), + (synth_distance_metrics.MMD, "additive_chi2", 0.4689359283845498), + (synth_distance_metrics.MMD, "chi2", 0.2499004582501716), + (synth_distance_metrics.MMD, "linear", 0.3188643337711042), + (synth_distance_metrics.MMD, "poly", 1.462841954853067), # ? + (synth_distance_metrics.MMD, "rbf", 0.20019771014560495), # ? + (synth_distance_metrics.MMD, "laplacian", 0.1733109813486864), + (synth_distance_metrics.MMD, "sigmoid", 0.0188876128198876), + (synth_distance_metrics.MMD, "cosine", 0.014276571438203822), + ], +) +def test_mmd_kerneis_reproductibility(metric_name, kernel, expected): + """Test if mmd with several kernels is reproducible.""" + + # From sample_distribution((10, 2), sigma=0.5, mu = 0): (all positivr) + x_ref = np.array( + [ + [0.88202617, 0.2000786], + [0.48936899, 1.1204466], + [0.933779, 0.48863894], + [0.47504421, 0.0756786], + [0.05160943, 0.20529925], + [0.07202179, 0.72713675], + [0.38051886, 0.06083751], + [0.22193162, 0.16683716], + [0.74703954, 0.10257913], + [0.15653385, 0.42704787], + ], + ) + + # From sample_distribution((10, 2), sigma=0.5, mu = 0.5): + y_synth = np.array( + [ + [1.38202617e00, 7.00078604e-01], + [9.89368992e-01, 1.62044660e00], + [1.43377900e00, 1.13610601e-02], + [9.75044209e-01, 4.24321396e-01], + [4.48390574e-01, 7.05299251e-01], + [5.72021786e-01, 1.22713675e00], + [8.80518863e-01, 5.60837508e-01], + [7.21931616e-01, 6.66837164e-01], + [1.24703954e00, 3.97420868e-01], + [6.56533851e-01, 7.29521303e-02], + ], + ) + + metric = metric_name(kernel=kernel) + result = metric.compute(x_ref, y_synth) + + if result.dataset_level.dtype == OutputsTypes.KEY_ARRAY: + x_values = result.dataset_level.value["precision_values"] + y_values = result.dataset_level.value["recall_values"] + assert np.mean(x_values) == pytest.approx(expected[0]) and np.mean(y_values) == pytest.approx( + expected[1], + ), f"{metric_name}: unexpected value of {np.mean(x_values)} or {np.mean(y_values)} with kernel {kernel}." + + elif result.dataset_level.dtype == OutputsTypes.NUMERIC: + value = result.dataset_level.value + + assert value == pytest.approx(expected), f"{metric_name}: unexpected value of {value} with kernel {kernel}." + else: + warnings.warn(f"Unknown output type: {result['dataset_level']['type']}. Skipping comparison.", stacklevel=2) + + # ###### Test General Metrics Import #######