From 5a3ab0b9686da0617f025f2f88da561f3ff66f93 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 15 Dec 2023 15:00:26 -0500 Subject: [PATCH 1/6] ENH: Use staticmethods, cache to avoid excess lookups --- bids-validator/bids_validator/bids_validator.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/bids-validator/bids_validator/bids_validator.py b/bids-validator/bids_validator/bids_validator.py index 40c85b662..fff86755f 100644 --- a/bids-validator/bids_validator/bids_validator.py +++ b/bids-validator/bids_validator/bids_validator.py @@ -2,6 +2,7 @@ import re import os import json +from functools import lru_cache class BIDSValidator(): @@ -136,7 +137,9 @@ def is_file(self, path): return (any(conditions)) - def get_regular_expressions(self, file_name): + @staticmethod + @lru_cache + def get_regular_expressions(file_name): """Read regular expressions from a file.""" regexps = [] @@ -158,15 +161,10 @@ def get_regular_expressions(self, file_name): return regexps - def conditional_match(self, expression, path): + @staticmethod + def conditional_match(expression, path): """Find conditional match.""" match = re.compile(expression).findall(path) match = match[0] if len(match) >= 1 else False # adapted from JS code and JS does not support conditional groups - if (match): - if ((match[1] == match[2][1:]) | (not match[1])): - return True - else: - return False - else: - return False + return bool(match) and (match[1] == match[2][1:] or not match[1]) From ae032ac26dab73f0a946b53ce706224563cdf798 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 15 Dec 2023 15:01:11 -0500 Subject: [PATCH 2/6] TEST: Add fixture to avoid re-reading files during tests --- bids-validator/bids_validator/test_bids_validator.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/bids-validator/bids_validator/test_bids_validator.py b/bids-validator/bids_validator/test_bids_validator.py index 6465ba468..12f8be395 100644 --- a/bids-validator/bids_validator/test_bids_validator.py +++ b/bids-validator/bids_validator/test_bids_validator.py @@ -48,7 +48,13 @@ def _gather_test_files(dspath, exclude_keywords): @pytest.mark.parametrize('fname', files) -def test_is_bids(fname): +def test_is_bids(validator, fname): """Test that is_bids returns true for each file in a valid BIDS dataset.""" - validator = BIDSValidator() assert validator.is_bids(fname) + + +@pytest.fixture(scope='module') +def validator(): + """Return a BIDSValidator instance.""" + validator = BIDSValidator() + return validator From 9b6f39b0c3fcb4846713ab436075fdbe878cb27d Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 15 Dec 2023 15:07:55 -0500 Subject: [PATCH 3/6] RF: Rewrite checks to short-circuit --- .../bids_validator/bids_validator.py | 49 ++++++------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/bids-validator/bids_validator/bids_validator.py b/bids-validator/bids_validator/bids_validator.py index fff86755f..b2f5e7998 100644 --- a/bids-validator/bids_validator/bids_validator.py +++ b/bids-validator/bids_validator/bids_validator.py @@ -64,26 +64,23 @@ def is_bids(self, path): True """ - conditions = [] - - conditions.append(self.is_top_level(path)) - conditions.append(self.is_associated_data(path)) - conditions.append(self.is_session_level(path)) - conditions.append(self.is_subject_level(path)) - conditions.append(self.is_phenotypic(path)) - conditions.append(self.is_file(path)) - - return (any(conditions)) + return any( + check(path) for check in ( + self.is_top_level, + self.is_associated_data, + self.is_session_level, + self.is_subject_level, + self.is_phenotypic, + self.is_file + ) + ) def is_top_level(self, path): """Check if the file has appropriate name for a top-level file.""" regexps = self.get_regular_expressions(self.dir_rules + 'top_level_rules.json') - conditions = [False if re.compile(x).search(path) is None else True for - x in regexps] - - return (any(conditions)) + return any(re.search(regexp, path) for regexp in regexps) def is_associated_data(self, path): """Check if file is appropriate associated data.""" @@ -93,49 +90,35 @@ def is_associated_data(self, path): regexps = self.get_regular_expressions(self.dir_rules + 'associated_data_rules.json') - conditions = [(re.compile(x).search(path) is not None) for - x in regexps] - - return any(conditions) + return any(re.search(regexp, path) for regexp in regexps) def is_session_level(self, path): """Check if the file has appropriate name for a session level.""" regexps = self.get_regular_expressions(self.dir_rules + 'session_level_rules.json') - conditions = [self.conditional_match(x, path) for x in regexps] - - return (any(conditions)) + return any(self.conditional_match(regexp, path) for regexp in regexps) def is_subject_level(self, path): """Check if the file has appropriate name for a subject level.""" regexps = self.get_regular_expressions(self.dir_rules + 'subject_level_rules.json') - conditions = [(re.compile(x).search(path) is not None) for - x in regexps] - - return (any(conditions)) + return any(re.search(regexp, path) for regexp in regexps) def is_phenotypic(self, path): """Check if file is phenotypic data.""" regexps = self.get_regular_expressions(self.dir_rules + 'phenotypic_rules.json') - conditions = [(re.compile(x).search(path) is not None) for - x in regexps] - - return (any(conditions)) + return any(re.search(regexp, path) for regexp in regexps) def is_file(self, path): """Check if file is phenotypic data.""" regexps = self.get_regular_expressions(self.dir_rules + 'file_level_rules.json') - conditions = [(re.compile(x).search(path) is not None) for - x in regexps] - - return (any(conditions)) + return any(re.search(regexp, path) for regexp in regexps) @staticmethod @lru_cache From 6dd3794b881c6393771301f66de6519894ceec44 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 15 Dec 2023 16:15:35 -0500 Subject: [PATCH 4/6] TEST: Expand tests to prepare for refactor --- .../bids_validator/test_bids_validator.py | 105 +++++++++++++++++- 1 file changed, 99 insertions(+), 6 deletions(-) diff --git a/bids-validator/bids_validator/test_bids_validator.py b/bids-validator/bids_validator/test_bids_validator.py index 12f8be395..edfe12199 100644 --- a/bids-validator/bids_validator/test_bids_validator.py +++ b/bids-validator/bids_validator/test_bids_validator.py @@ -47,14 +47,107 @@ def _gather_test_files(dspath, exclude_keywords): files = _gather_test_files(dspath, EXCLUDE_KEYWORDS) -@pytest.mark.parametrize('fname', files) -def test_is_bids(validator, fname): - """Test that is_bids returns true for each file in a valid BIDS dataset.""" - assert validator.is_bids(fname) - - @pytest.fixture(scope='module') def validator(): """Return a BIDSValidator instance.""" validator = BIDSValidator() return validator + + +@pytest.mark.parametrize('fname', files) +def test_datasets(validator, fname): + """Test that is_bids returns true for each file in a valid BIDS dataset.""" + assert validator.is_bids(fname) + + +@pytest.mark.parametrize('fname, matches', [ + ('/T1w.json', True), + ('/dataset_description.json', True), + ('/README', True), + ('/CHANGES', True), + ('/participants.tsv', True), + ('/sub-01/anat/sub-01_T1w.nii.gz', False), +]) +def test_top_level(validator, fname, matches): + """Test that is_top_level returns true for top-level files.""" + assert validator.is_top_level(fname) is matches + + +@pytest.mark.parametrize('fname, matches', [ + ('/sourcedata/unstructured_data.nii.gz', True), + ('/sourcedata/dicom_dir/xyz.dcm', True), + ('/code/my_analysis/analysis.py', True), + ('/derivatives/preproc/sub-01/anat/sub-01_desc-preproc_T1w.nii.gz', True), + ('/stimuli/pic.jpg', True), + ('/sub-01/anat/sub-01_T1w.nii.gz', False), +]) +def test_associated_data(validator, fname, matches): + """Test that is_associated_data returns true for associated data.""" + assert validator.is_associated_data(fname) is matches + + +@pytest.mark.parametrize('fname, matches', [ + ('/sub-01/ses-1/sub-01_ses-1_scans.tsv', True), + ('/sub-01/ses-1/sub-01_ses-1_scans.json', True), + ('/sub-01/sub-01_scans.tsv', True), + ('/sub-01/sub-01_scans.json', True), + ('/sub-01/ses-1/sub-01_ses-1_task-rest_bold.json', True), + ('/sub-01/sub-01_task-rest_bold.json', True), + ('/sub-01/ses-1/sub-01_ses-1_asl.json', True), + ('/sub-01/sub-01_asl.json', True), + ('/sub-01/ses-1/sub-01_ses-1_pet.json', True), + ('/sub-01/sub-01_pet.json', True), + ('/sub-01/ses-1/sub-01_ses-1_proc-test_channels.tsv', True), + ('/sub-01/ses-1/sub-01_ses-1_channels.json', True), + ('/sub-01/sub-01_proc-test_channels.tsv', True), + ('/sub-01/sub-01_channels.json', True), + ('/sub-01/ses-1/sub-01_ses-1_space-CapTrak_electrodes.tsv', True), + ('/sub-01/ses-1/sub-01_ses-1_coordsystem.json', True), + ('/sub-01/sub-01_space-CapTrak_electrodes.tsv', True), + ('/sub-01/sub-01_coordsystem.json', True), + ('/sub-01/ses-1/sub-01_ses-1_motion.json', True), + ('/sub-01/sub-01_motion.json', True), + ('/sub-01/ses-1/sub-01_ses-1_TEM.json', True), + ('/sub-01/sub-01_TEM.json', True), + ('/sub-01/ses-1/sub-01_ses-1_nirs.json', True), + ('/sub-01/sub-01_nirs.json', True), + # Mismatch sessions + ('/sub-01/sub-01_ses-1_scans.tsv', False), + ('/sub-01/sub-01_ses-1_scans.json', False), + ('/sub-01/ses-1/sub-01_ses-2_scans.tsv', False), + # File-level + ('/sub-01/ses-1/func/sub-01_ses-1_task-rest_bold.nii.gz', False), + ('/sub-01/anat/sub-01_T1w.nii.gz', False), +]) +def test_session_level(validator, fname, matches): + """Test that is_session_level returns true for session level files.""" + assert validator.is_session_level(fname) is matches + + +@pytest.mark.parametrize('fname, matches', [ + ('/sub-01/sub-01_sessions.tsv', True), + ('/sub-01/sub-01_sessions.json', True), + ('/sub-01/anat/sub-01_T1w.nii.gz', False), +]) +def test_subject_level(validator, fname, matches): + """Test that is_subject_level returns true for subject level files.""" + assert validator.is_subject_level(fname) is matches + + +@pytest.mark.parametrize('fname, matches', [ + ('/phenotype/measure.tsv', True), + ('/phenotype/measure.json', True), + ('/sub-01/anat/sub-01_T1w.nii.gz', False), +]) +def test_phenotpic(validator, fname, matches): + """Test that is_phenotypic returns true for phenotypic files.""" + assert validator.is_phenotypic(fname) is matches + + +@pytest.mark.parametrize('fname, matches', [ + ('/sub-01/ses-1/func/sub-01_ses-1_task-rest_bold.nii.gz', True), + ('/sub-01/anat/sub-01_T1w.nii.gz', True), +]) +def test_file_level(validator, fname, matches): + """Test that is_file returns true for file level files.""" + assert validator.is_file(fname) is matches From 7144e0194ec92f8bcf85674bd8c2109820a27003 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 15 Dec 2023 16:16:03 -0500 Subject: [PATCH 5/6] FIX: Regex missing underscore --- bids-validator/bids_validator/rules/session_level_rules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bids-validator/bids_validator/rules/session_level_rules.json b/bids-validator/bids_validator/rules/session_level_rules.json index 683bb81be..2869309d6 100644 --- a/bids-validator/bids_validator/rules/session_level_rules.json +++ b/bids-validator/bids_validator/rules/session_level_rules.json @@ -7,7 +7,7 @@ }, "func_ses": { - "regexp": "^[\\/\\\\](sub-[a-zA-Z0-9]+)[\\/\\\\](?:(ses-[a-zA-Z0-9]+)[\\/\\\\])?\\1(_\\2)?task-[a-zA-Z0-9]+(?:_acq-[a-zA-Z0-9]+)?(?:_rec-[a-zA-Z0-9]+)?(?:_run-[0-9]+)?(?:_echo-[0-9]+)?(@@@_func_ses_ext_@@@)$", + "regexp": "^[\\/\\\\](sub-[a-zA-Z0-9]+)[\\/\\\\](?:(ses-[a-zA-Z0-9]+)[\\/\\\\])?\\1(_\\2)?_task-[a-zA-Z0-9]+(?:_acq-[a-zA-Z0-9]+)?(?:_rec-[a-zA-Z0-9]+)?(?:_run-[0-9]+)?(?:_echo-[0-9]+)?(@@@_func_ses_ext_@@@)$", "tokens": { "@@@_func_ses_ext_@@@": [ "_bold\\.json", From 7392c84b59ac0c670d8189c4f1ca4f307d8db7e9 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 15 Dec 2023 16:21:27 -0500 Subject: [PATCH 6/6] CI: Test on Python 3.12 --- .github/workflows/python_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index c4b8dc716..eb14ca67e 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: platform: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] runs-on: ${{ matrix.platform }}