From 41ec6e04c5f4f16f73a7d432d4e2ce5f709a1757 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Wed, 14 Aug 2024 20:07:13 -0400 Subject: [PATCH 1/7] fix(schema): Allow associated file columns to not exist --- src/schema/meta/context.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/schema/meta/context.yaml b/src/schema/meta/context.yaml index ef35475238..816fd36f4c 100644 --- a/src/schema/meta/context.yaml +++ b/src/schema/meta/context.yaml @@ -155,7 +155,7 @@ properties: events: description: 'Events file' type: object - required: [path, onset] + required: [path] additionalProperties: false properties: path: @@ -169,7 +169,7 @@ properties: aslcontext: description: 'ASL context file' type: object - required: [path, n_rows, volume_type] + required: [path, n_rows] additionalProperties: false properties: path: @@ -248,7 +248,7 @@ properties: channels: description: 'Channels file' type: object - required: [path, type] + required: [path] additionalProperties: false properties: path: From eab48c863190f79491c5cdcbb21717183c8ca019 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Thu, 15 Aug 2024 15:33:26 -0400 Subject: [PATCH 2/7] fix(schema): Set EchoTime requirement in separate table --- src/schema/rules/sidecars/mri.yaml | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/schema/rules/sidecars/mri.yaml b/src/schema/rules/sidecars/mri.yaml index b4ec44f535..158de082dd 100644 --- a/src/schema/rules/sidecars/mri.yaml +++ b/src/schema/rules/sidecars/mri.yaml @@ -207,21 +207,16 @@ MRITimingParameters: level_addendum: | required if corresponding fieldmap data is present, or the data comes from a multi-echo sequence or Arterial Spin Labeling. - issue: - code: ECHO_TIME_NOT_DEFINED - message: | - You must define 'EchoTime' for this file. 'EchoTime' is the echo time (TE) - for the acquisition, specified in seconds. Corresponds to DICOM Tag - 0018, 0081 Echo Time (please note that the DICOM term is in milliseconds - not seconds). The data type number may apply to files from any MRI modality - concerned with a single value for this field, or to the files in a file - collection where the value of this field is iterated using the echo entity. - The data type array provides a value for each volume in a 4D dataset and - should only be used when the volume timing is critical for interpretation - of the data, such as in ASL or variable echo time fMRI sequences. InversionTime: recommended DwellTime: recommended +EchoTimeRequiredMRI: + selectors: + - modality == "mri" + - entities.echo || datatype == "perf" + fields: + EchoTime: required + SliceTimingMRI: selectors: - modality == "mri" From 02b24b4c11f6a7bcd0fdcf1a3c6041478cbd018e Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Mon, 19 Aug 2024 14:57:42 -0400 Subject: [PATCH 3/7] fix(schema): Narrow selectors that might otherwise hit associated files --- src/schema/rules/sidecars/beh.yaml | 2 ++ src/schema/rules/sidecars/entity_rules.yaml | 6 +++++ src/schema/rules/sidecars/func.yaml | 2 ++ src/schema/rules/sidecars/mri.yaml | 27 +++++++++++++-------- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/schema/rules/sidecars/beh.yaml b/src/schema/rules/sidecars/beh.yaml index 1221bf84d7..f2d8410914 100644 --- a/src/schema/rules/sidecars/beh.yaml +++ b/src/schema/rules/sidecars/beh.yaml @@ -8,6 +8,7 @@ # Metadata for either beh or events files BEHTaskInformation: selectors: + - datatype == "beh" - intersects([suffix], ["beh", "events"]) fields: TaskName: recommended @@ -18,6 +19,7 @@ BEHTaskInformation: BEHInstitutionInformation: selectors: + - datatype == "beh" - intersects([suffix], ["beh", "events"]) fields: InstitutionName: recommended diff --git a/src/schema/rules/sidecars/entity_rules.yaml b/src/schema/rules/sidecars/entity_rules.yaml index be8f5603af..54b3c256a0 100644 --- a/src/schema/rules/sidecars/entity_rules.yaml +++ b/src/schema/rules/sidecars/entity_rules.yaml @@ -10,12 +10,14 @@ EntitiesTaskMetadata: selectors: - '"task" in entities' + - suffix != 'events' fields: TaskName: recommended EntitiesCeMetadata: selectors: - '"ce" in entities' + - match(extension, "^\.nii(\.gz)?$") fields: ContrastBolusIngredient: optional @@ -30,24 +32,28 @@ EntitiesStainMetadata: EntitiesEchoMetadata: selectors: - '"echo" in entities' + - match(extension, "^\.nii(\.gz)?$") fields: EchoTime: required EntitiesFlipMetadata: selectors: - '"flip" in entities' + - match(extension, "^\.nii(\.gz)?$") fields: FlipAngle: required EntitiesInvMetadata: selectors: - '"inv" in entities' + - match(extension, "^\.nii(\.gz)?$") fields: InversionTime: required EntitiesMTMetadata: selectors: - '"mt" in entities' + - match(extension, "^\.nii(\.gz)?$") fields: MTState: required diff --git a/src/schema/rules/sidecars/func.yaml b/src/schema/rules/sidecars/func.yaml index 7192886407..12a2b01e35 100644 --- a/src/schema/rules/sidecars/func.yaml +++ b/src/schema/rules/sidecars/func.yaml @@ -47,6 +47,7 @@ MRIFuncTimingParameters: selectors: - datatype == "func" - suffix == "bold" + - match(extension, "^\.nii(\.gz)?$") fields: NumberOfVolumesDiscardedByScanner: recommended NumberOfVolumesDiscardedByUser: recommended @@ -64,6 +65,7 @@ MRIFuncTaskInformation: selectors: - datatype == "func" - suffix == "bold" + - match(extension, "^\.nii(\.gz)?$") fields: Instructions: level: recommended diff --git a/src/schema/rules/sidecars/mri.yaml b/src/schema/rules/sidecars/mri.yaml index 158de082dd..ccfb4e0737 100644 --- a/src/schema/rules/sidecars/mri.yaml +++ b/src/schema/rules/sidecars/mri.yaml @@ -9,6 +9,7 @@ MRIHardware: selectors: - modality == "mri" + - match(extension, "^\.nii(\.gz)?$") fields: Manufacturer: level: recommended @@ -57,6 +58,7 @@ MRIChunkPosition: MRISample: selectors: - modality == "mri" + - match(extension, "^\.nii(\.gz)?$") fields: BodyPart: level: optional @@ -69,12 +71,14 @@ MRIScannerHardwareASL: - datatype == "perf" - suffix == "asl" - intersects([suffix], ["asl", "m0scan"]) + - match(extension, "^\.nii(\.gz)?$") fields: MagneticFieldStrength: required MRISequenceSpecifics: selectors: - modality == "mri" + - match(extension, "^\.nii(\.gz)?$") fields: PulseSequenceType: recommended ScanningSequence: recommended @@ -105,6 +109,7 @@ PETMRISequenceSpecifics: selectors: - modality == "mri" - intersects(dataset.modalities, ["pet"]) + - match(extension, "^\.nii(\.gz)?$") fields: NonlinearGradientCorrection: required @@ -112,6 +117,7 @@ ASLMRISequenceSpecifics: selectors: - datatype == "perf" - suffix == "asl" + - match(extension, "^\.nii(\.gz)?$") fields: MRAcquisitionType: required @@ -147,6 +153,7 @@ SpoilingGradient: MRISpatialEncoding: selectors: - modality == "mri" + - match(extension, "^\.nii(\.gz)?$") fields: NumberShots: recommended ParallelReductionFactorInPlane: recommended @@ -164,6 +171,7 @@ PhaseEncodingDirectionRec: selectors: - modality == "mri" - suffix != "epi" + - match(extension, "^\.nii(\.gz)?$") fields: PhaseEncodingDirection: level: recommended @@ -201,6 +209,7 @@ PhaseEncodingDirectionReq: MRITimingParameters: selectors: - modality == "mri" + - match(extension, "^\.nii(\.gz)?$") fields: EchoTime: level: recommended @@ -210,10 +219,11 @@ MRITimingParameters: InversionTime: recommended DwellTime: recommended -EchoTimeRequiredMRI: +EchoTimeRequiredASL: selectors: - modality == "mri" - - entities.echo || datatype == "perf" + - datatype == "perf" + - match(extension, "^\.nii(\.gz)?$") fields: EchoTime: required @@ -235,7 +245,6 @@ SliceTimingASL: - intersects([suffix], ["asl", "m0scan"]) - sidecar.MRAcquisitionType == "2D" fields: - EchoTime: required SliceTiming: level: required issue: @@ -255,16 +264,10 @@ SliceTimingASL: final entry in the `SliceTiming` list is the time of acquisition of slice 0. Without this parameter slice time correction will not be possible. -# This is technically for sparse sequences only, but I don't know how to encode that. -# SliceTimingSparse: -# selectors: -# - modality == "mri" -# fields: -# SliceTiming: required - MRIRFandContrast: selectors: - modality == "mri" + - match(extension, "^\.nii(\.gz)?$") fields: NegativeContrast: optional @@ -302,6 +305,7 @@ MRIFlipAngleLookLockerTrue: MRISliceAcceleration: selectors: - modality == "mri" + - match(extension, "^\.nii(\.gz)?$") fields: MultibandAccelerationFactor: recommended @@ -309,6 +313,7 @@ MRIAnatomicalLandmarks: selectors: - datatype == "anat" - intersects(dataset.datatypes, ["meg"]) + - match(extension, "^\.nii(\.gz)?$") fields: AnatomicalLandmarkCoordinates__mri: recommended @@ -323,12 +328,14 @@ MRIEchoPlanarImagingAndB0FieldSource: selectors: - intersects(datatype, ['dwi', 'func', 'perf']) - intersects(dataset.datatypes, ['fmap']) + - match(extension, "^\.nii(\.gz)?$") fields: B0FieldSource: recommended MRIInstitutionInformation: selectors: - modality == "mri" + - match(extension, "^\.nii(\.gz)?$") fields: InstitutionName: level: recommended From 555da40d4d4fb80e15187f3305643ca046d2ca12 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Mon, 19 Aug 2024 16:12:17 -0400 Subject: [PATCH 4/7] fix(schema): gzip filename fields may not exist --- src/schema/meta/context.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/meta/context.yaml b/src/schema/meta/context.yaml index 816fd36f4c..f80a23d363 100644 --- a/src/schema/meta/context.yaml +++ b/src/schema/meta/context.yaml @@ -292,7 +292,7 @@ properties: gzip: description: 'Parsed contents of gzip header' type: object - required: [timestamp, filename] + required: [timestamp] additionalProperties: false properties: timestamp: From 314622cc5a2d5a3e0bda52f4cc7847d5617c529d Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Mon, 19 Aug 2024 16:33:29 -0400 Subject: [PATCH 5/7] feat(schema): Split gzip warnings --- src/schema/rules/checks/privacy.yaml | 32 ++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/schema/rules/checks/privacy.yaml b/src/schema/rules/checks/privacy.yaml index c9e2ecf0cb..f21f25b167 100644 --- a/src/schema/rules/checks/privacy.yaml +++ b/src/schema/rules/checks/privacy.yaml @@ -1,17 +1,41 @@ --- -GzipHeaderFields: +GzipHeaderMtime: issue: - code: GZIP_HEADER_DATA + code: GZIP_HEADER_MTIME message: | - The gzip header contains a non-zero timestamp or a non-empty filename and/or comment field. - These may leak sensitive information or indicate a non-reproducible conversion process. + The gzip header contains a non-zero timestamp. + This may leak sensitive information or indicate a non-reproducible conversion process. level: warning selectors: - match(extension, ".gz$") - gzip != null checks: - gzip.timestamp == 0 + +GzipHeaderFilename: + issue: + code: GZIP_HEADER_FILENAME + message: | + The gzip header contains a non-empty filename. + This may leak sensitive information or indicate a non-reproducible conversion process. + level: warning + selectors: + - match(extension, ".gz$") + - gzip.filename + checks: - gzip.filename == "" + +GzipHeaderComment: + issue: + code: GZIP_HEADER_COMMENT + message: | + The gzip header contains a non-empty comment field. + This may leak sensitive information or indicate a non-reproducible conversion process. + level: warning + selectors: + - match(extension, ".gz$") + - gzip.comment + checks: - gzip.comment == "" CheckAge89: From 1f18f9a447e7cd7e1067e803ae4a96e7b95b0b59 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Tue, 20 Aug 2024 09:04:16 -0400 Subject: [PATCH 6/7] fix(metaschema): Explicitly type patterns as strings --- src/metaschema.json | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/metaschema.json b/src/metaschema.json index 7df2856177..c00481b5e8 100644 --- a/src/metaschema.json +++ b/src/metaschema.json @@ -68,6 +68,7 @@ "versions": { "type": "array", "items": { + "type": "string", "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$" } } @@ -470,7 +471,7 @@ "properties": { "datatypes": { "type": "array", - "items": { "pattern": "^[a-z]+$" } + "items": { "type": "string", "pattern": "^[a-z]+$" } } }, "required": ["datatypes"], @@ -661,7 +662,7 @@ "level": { "enum": ["optional", "recommended", "required"] }, "datatypes": { "type": "array", - "items": { "pattern": "^[a-z]+$" } + "items": { "type": "string", "pattern": "^[a-z]+$" } }, "stem": { "type": "string" }, "extensions": { "type": "array", "items": { "type": "string" } } @@ -675,11 +676,11 @@ "level": { "enum": ["optional", "recommended", "required"] }, "datatypes": { "type": "array", - "items": { "pattern": "^[a-z]+$" } + "items": { "type": "string", "pattern": "^[a-z]+$" } }, "suffixes": { "type": "array", - "items": { "pattern": "^[a-zA-Z0-9]+$" } + "items": { "type": "string", "pattern": "^[a-zA-Z0-9]+$" } }, "extensions": { "type": "array", "items": { "type": "string" } }, "entities": { From c6bed8c94f0debf0343dabba9c591ab1908ed089 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Tue, 20 Aug 2024 11:17:39 -0400 Subject: [PATCH 7/7] fix(schema): Add tolerance to REPETITION_TIME_MISMATCH rule --- src/schema/rules/checks/func.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/schema/rules/checks/func.yaml b/src/schema/rules/checks/func.yaml index a908bfa345..aff192b5fd 100644 --- a/src/schema/rules/checks/func.yaml +++ b/src/schema/rules/checks/func.yaml @@ -40,7 +40,9 @@ RepetitionTimeMismatch: - type(sidecar.RepetitionTime) != "null" - type(nifti_header) != "null" checks: - - sidecar.RepetitionTime == nifti_header.pixdim[4] + # Implement millisecond rounding via AND + - sidecar.RepetitionTime - nifti_header.pixdim[4] < 0.001 + - sidecar.RepetitionTime - nifti_header.pixdim[4] > -0.001 # 54 BoldNot4d: