From 7d870a7750ee949efad91a6893a6c172c80ddcae Mon Sep 17 00:00:00 2001 From: hechth Date: Mon, 3 Jun 2024 14:25:46 +0200 Subject: [PATCH 1/2] iitial commit --- tools/matchms/matchms_remove_spectra.xml | 48 +++++++------------ .../remove_spectra/require_filter.msp | 1 + .../require_inchi_removed_spectra.msp | 1 + 3 files changed, 19 insertions(+), 31 deletions(-) diff --git a/tools/matchms/matchms_remove_spectra.xml b/tools/matchms/matchms_remove_spectra.xml index 32782885..c2f7b93a 100644 --- a/tools/matchms/matchms_remove_spectra.xml +++ b/tools/matchms/matchms_remove_spectra.xml @@ -1,4 +1,4 @@ - + Filters spectra based on metadata presence @@ -28,9 +28,8 @@ from matchms.filtering import require_compound_name, require_formula, require_pr require_retention_time, require_retention_index, require_valid_annotation from matchms.importing import load_from_mgf, load_from_msp -#set metadata_fields = str("', '").join([str($f) for $f in $metadata_fields]) -required_metadata = "$metadata_fields" - +#set metadata_fields = [str($f) for $f in $metadata_fields] +required_metadata = [x for x in $metadata_fields] if "$spectra.ext" == "msp": spectra = list(load_from_msp("${spectra}")) elif "$spectra.ext" == 'mgf': @@ -40,57 +39,44 @@ else: filtered_spectra = [] removed_spectra = [] -keep = False for spectrum in spectra: + keep = True + if 'smiles' in required_metadata: - keep = is_valid_smiles(spectrum.get('smiles')) + keep = keep and is_valid_smiles(spectrum.get('smiles', '').strip()) if 'inchi' in required_metadata: - keep = is_valid_inchi(spectrum.get('inchi')) + keep = keep and is_valid_inchi(spectrum.get('inchi', '').strip()) if 'inchikey' in required_metadata: - keep = is_valid_inchikey(spectrum.get('inchikey')) + inchikey = spectrum.get('inchikey', '') + keep = keep and is_valid_inchikey(inchikey.strip()) if 'precursor_mz' in required_metadata: result = require_precursor_mz(spectrum) - if result is not None: - keep = True - else: - keep = False + keep = keep and result is not None if 'valid_annotation' in required_metadata: result = require_valid_annotation(spectrum) - if result is not None: - keep = True - else: - keep = False - + keep = keep and result is not None + if 'formula' in required_metadata: result = require_formula(spectrum) - if result is not None: - keep = True - else: - keep = False + keep = keep and result is not None if 'compound_name' in required_metadata: result = require_compound_name(spectrum) - if result is not None: - keep = True - else: - keep = False - + keep = keep and result is not None + if 'retention_time' in required_metadata: result = require_retention_time(spectrum) - if result is not None: - keep = True - else: - keep = False + keep = keep and result is not None if 'retention_index' in required_metadata: result = require_retention_index(spectrum) if result is not None: - keep = True + keep = keep and result.get('retention_index') > 0 else: keep = False diff --git a/tools/matchms/test-data/remove_spectra/require_filter.msp b/tools/matchms/test-data/remove_spectra/require_filter.msp index 46b4e9e0..73f327eb 100644 --- a/tools/matchms/test-data/remove_spectra/require_filter.msp +++ b/tools/matchms/test-data/remove_spectra/require_filter.msp @@ -4,6 +4,7 @@ COMMENT: SpectrumID: 1519953; Source: C4-1998-38-3; Class: Benzenoids; CASRN not PARENT_MASS: 347.930801 PUBCHEMID: 10970124 NOMINAL_MASS: 348 +RETENTION_INDEX: 0 SMILES: nan NUM PEAKS: 3 292.0 999.0 diff --git a/tools/matchms/test-data/remove_spectra/require_inchi_removed_spectra.msp b/tools/matchms/test-data/remove_spectra/require_inchi_removed_spectra.msp index 5163cd76..ec4e5d1c 100644 --- a/tools/matchms/test-data/remove_spectra/require_inchi_removed_spectra.msp +++ b/tools/matchms/test-data/remove_spectra/require_inchi_removed_spectra.msp @@ -4,6 +4,7 @@ COMMENT: SpectrumID: 1519953; Source: C4-1998-38-3; Class: Benzenoids; CASRN not PARENT_MASS: 347.930801 PUBCHEMID: 10970124 NOMINAL_MASS: 348 +RETENTION_INDEX: 0.0 SMILES: nan NUM PEAKS: 3 292.0 999.0 From 5462c9424b2486722674eadce5243eb0bb8c8a75 Mon Sep 17 00:00:00 2001 From: hechth Date: Mon, 3 Jun 2024 14:29:38 +0200 Subject: [PATCH 2/2] explicit float parsing --- tools/matchms/matchms_remove_spectra.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/matchms/matchms_remove_spectra.xml b/tools/matchms/matchms_remove_spectra.xml index c2f7b93a..4ef55f46 100644 --- a/tools/matchms/matchms_remove_spectra.xml +++ b/tools/matchms/matchms_remove_spectra.xml @@ -76,7 +76,7 @@ for spectrum in spectra: if 'retention_index' in required_metadata: result = require_retention_index(spectrum) if result is not None: - keep = keep and result.get('retention_index') > 0 + keep = keep and float(result.get('retention_index', 0)) > 0 else: keep = False