-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8db07ed
commit bc5b20a
Showing
7 changed files
with
6,975 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
<macros> | ||
<token name="@TOOL_VERSION@">2.0.0</token> | ||
|
||
<xml name="creator"> | ||
<creator> | ||
<person | ||
givenName="Zargham" | ||
familyName="Ahmad" | ||
url="https://github.com/zargham-ahmad" | ||
identifier="0000-0002-6096-224X" /> | ||
<organization | ||
url="https://www.recetox.muni.cz/" | ||
email="[email protected]" | ||
name="RECETOX MUNI" /> | ||
</creator> | ||
</xml> | ||
|
||
<xml name="edam"> | ||
<xrefs> | ||
<xref type="bio.tools">ms2deepscore</xref> | ||
</xrefs> | ||
</xml> | ||
|
||
<xml name="input_param"> | ||
<conditional name="scores"> | ||
<param name="use_scores" label="Use Scores Object" type="select"> | ||
<option value="False" selected="true">FALSE</option> | ||
<option value="True">TRUE</option> | ||
</param> | ||
<when value="True"> | ||
<param label="Scores object" name="scores_in" type="data" format="json" | ||
help="Scores objects calculated previously using one of the matchms similarity tools." /> | ||
</when> | ||
<when value="False"> | ||
<param label="Queries spectra" name="queries" type="data" format="msp" | ||
help="Query mass spectra to match against references."/> | ||
<param label="Reference spectra" name="references" type="data" format="msp" | ||
help="Reference mass spectra to match against as library."/> | ||
</when> | ||
</conditional> | ||
<param name="model" type="data" format="onnx" label="Pre-trained Model"> | ||
<help>Select the pre-trained MS2DeepScore model file (onnx format)</help> | ||
</param> | ||
<param name="model_param" type="data" format="json" label="Model Parameter"> | ||
<help>Select the pre-trained MS2DeepScore model Parameter</help> | ||
</param> | ||
</xml> | ||
|
||
<xml name="training_param"> | ||
<param label="Spectra File" name="spectra" type="data" format="msp,mgf" | ||
help="Spectra file that should be used for training. (it will be split in train, val and test)"/> | ||
<param name="model_param" type="data" format="json" label="Model Settings" help="json file with the MS2Deepscore model settings."/> | ||
<param name="validation_split_fraction" type="integer" min="0" max="100" value="20" label="Validation split fraction" | ||
help="The fraction of the inchikeys that will be used for validation and test"/> | ||
</xml> | ||
|
||
<xml name="config_generator"> | ||
<section name="model_structure" title="Model Structure" expanded="true"> | ||
<param name="base_dims" type="text" label="Base Dimensions" value="2000,2000,2000" help="The in between layers to be used." /> | ||
<param name="embedding_dim" type="integer" label="Embedding Dimension" value="400" help="The dimension of the final embedding." /> | ||
<param name="ionisation_mode" type="select" label="Ionisation Mode"> | ||
<option value="positive" selected="true">Positive</option> | ||
<option value="negative">Negative</option> | ||
<option value="both">Both</option> | ||
</param> | ||
</section> | ||
|
||
<section name="training_settings" title="Training Settings" expanded="true"> | ||
<param name="dropout_rate" type="float" label="Dropout Rate" value="0.0" /> | ||
<param name="learning_rate" type="float" label="Learning Rate" value="0.00025" /> | ||
<param name="epochs" type="integer" label="Epochs" value="250" /> | ||
<param name="patience" type="integer" label="Patience" value="20" help="How long the model should keep training if validation does not improve" /> | ||
<param name="loss_function" type="select" label="Loss Function"> | ||
<option value="mse" selected="true">Mean Squared Error (mse)</option> | ||
<option value="mae">Mean Absolute Error (mae)</option> | ||
<option value="rmse">Root Mean Squared Error (rmse)</option> | ||
<option value="risk_mae">Risk Aware MAE (risk_aware_mae)</option> | ||
<option value="risk_mse">Risk Aware MSE (risk_aware_mse)</option> | ||
</param> | ||
<param name="weighting_factor" type="integer" label="Weighting Factor" value="0" /> | ||
</section> | ||
|
||
<section name="tensorization_settings" title="Tensorization Settings" expanded="true"> | ||
<param name="min_mz" type="integer" label="Min m/z" value="10" /> | ||
<param name="max_mz" type="integer" label="Max m/z" value="1000" /> | ||
<param name="mz_bin_width" type="float" label="m/z Bin Width" value="0.1" /> | ||
<param name="intensity_scaling" type="float" label="Intensity Scaling" value="0.5" /> | ||
</section> | ||
|
||
<section name="data_generator_settings" title="Data generator settings" expanded="true"> | ||
<param name="batch_size" type="integer" value="32" label="Batch Size" help="Number of pairs per batch" /> | ||
</section> | ||
|
||
<section name="compound_pairs_selection_settings" title="Compound pairs selection settings" expanded="true"> | ||
<param name="average_pairs_per_bin" type="integer" value="20" label="Average pairs per bin" help="The aimed average number of pairs of spectra per spectrum in each bin." /> | ||
<param name="random_seed" type="text" label="Random seed" value="None" help="Specify random seed for reproducible random number generation." /> | ||
</section> | ||
|
||
<section name="tanimoto_score_settings" title="Tanimoto Score Settings" expanded="true"> | ||
<param name="fingerprint_type" type="text" value="daylight" label="Fingerprint Type" help="The fingerprint type that should be used for tanimoto score calculations." /> | ||
<param name="fingerprint_nbits" type="integer" label="Fingerprint Number of Bits" value="2048" help="The number of bits to use for the fingerprint." /> | ||
</section> | ||
</xml> | ||
|
||
<xml name="citations"> | ||
<citations> | ||
<citation type="doi">https://doi.org/10.1186/s13321-021-00558-4</citation> | ||
<citation type="doi">https://doi.org/10.1101/2024.03.25.586580</citation> | ||
</citations> | ||
</xml> | ||
|
||
<token name="@init_scores@"> | ||
from matchms.importing import load_from_msp, scores_from_json | ||
from matchms import Scores | ||
#if $scores.use_scores == "True" | ||
scores = scores_from_json("${scores_in}") | ||
#else | ||
scores = Scores(references=list(load_from_msp("$references")), queries=list(load_from_msp("$queries")), is_symmetric=False) | ||
#end if | ||
</token> | ||
|
||
<token name="@init_logger@"> | ||
from matchms import set_matchms_logger_level | ||
set_matchms_logger_level("WARNING") | ||
</token> | ||
|
||
<token name="@json_load@"> | ||
import numpy as np | ||
import json | ||
|
||
with open("$model_param", 'r') as json_file: | ||
model_params = json.load(json_file) | ||
|
||
# Conditionally convert specific keys if they are present | ||
if 'base_dims' in model_params: | ||
model_params['base_dims'] = tuple(model_params['base_dims']) | ||
|
||
if 'same_prob_bins' in model_params: | ||
model_params['same_prob_bins'] = np.array(model_params['same_prob_bins']) | ||
|
||
if 'additional_metadata' in model_params: | ||
model_params['additional_metadata'] = [ | ||
(entry[0], entry[1]) for entry in model_params['additional_metadata'] | ||
] | ||
</token> | ||
</macros> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
<tool id="ms2deepscore_config_generator" name="MS2DeepScore Configuration Generator" version="@TOOL_VERSION@+galaxy0"> | ||
<description>Generates model parameters for MS2DeepScore in JSON format</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="creator"/> | ||
<expand macro="edam" /> | ||
|
||
<requirements> | ||
<requirement type="package" version="@TOOL_VERSION@">ms2deepscore</requirement> | ||
</requirements> | ||
|
||
<command detect_errors="exit_code"><![CDATA[ | ||
python3 ${python_wrapper} | ||
]]></command> | ||
<configfiles> | ||
<configfile name="python_wrapper"> | ||
import numpy as np | ||
from typing import Optional | ||
from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore | ||
|
||
random_seed: Optional[int] = $compound_pairs_selection_settings.random_seed | ||
|
||
params = { | ||
"base_dims": tuple(int(dim.strip()) for dim in "$model_structure.base_dims".split(",")), | ||
"embedding_dim": $model_structure.embedding_dim, | ||
"ionisation_mode": "$model_structure.ionisation_mode", | ||
"dropout_rate": $training_settings.dropout_rate, | ||
"learning_rate": $training_settings.learning_rate, | ||
"epochs": $training_settings.epochs, | ||
"patience": $training_settings.patience, | ||
"loss_function": "$training_settings.loss_function", | ||
"weighting_factor": $training_settings.weighting_factor, | ||
"min_mz": $tensorization_settings.min_mz, | ||
"max_mz": $tensorization_settings.max_mz, | ||
"mz_bin_width": $tensorization_settings.mz_bin_width, | ||
"intensity_scaling": $tensorization_settings.intensity_scaling, | ||
"batch_size": $data_generator_settings.batch_size, | ||
"average_pairs_per_bin": $compound_pairs_selection_settings.average_pairs_per_bin, | ||
"same_prob_bins": np.array([(0, 0.2), (0.2, 1.0)]), | ||
"random_seed": random_seed, | ||
"fingerprint_type": "$tanimoto_score_settings.fingerprint_type", | ||
"fingerprint_nbits": $tanimoto_score_settings.fingerprint_nbits | ||
} | ||
|
||
settings = SettingsMS2Deepscore(**params) | ||
settings.save_to_file("$output_file") | ||
</configfile> | ||
</configfiles> | ||
|
||
<inputs> | ||
<expand macro="config_generator" /> | ||
</inputs> | ||
|
||
<outputs> | ||
<data name="output_file" format="json" label="Model Parameter JSON" /> | ||
</outputs> | ||
|
||
<tests> | ||
<test expect_num_outputs="1"> | ||
<param name="base_dims" value="200,200" /> | ||
<param name="embedding_dim" value="100" /> | ||
<param name="ionisation_mode" value="negative" /> | ||
<param name="epochs" value="2" /> | ||
<param name="batch_size" value="2" /> | ||
<param name="average_pairs_per_bin" value="2" /> | ||
<param name="random_seed" value="42"/> | ||
<output name="output_file" value="Model_Parameter_JSON.json" ftype="json" compare="diff" lines_diff="2"/> | ||
</test> | ||
</tests> | ||
|
||
<help> | ||
This tool generates model parameters for MS2DeepScore in JSON format based on the provided settings. | ||
</help> | ||
<expand macro="citations"/> | ||
</tool> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
<tool id="ms2deepscore_similarity" name="MS2DeepScore Similarity" version="@TOOL_VERSION@+galaxy0"> | ||
<description>Compute similarity scores using a pre-trained MS2DeepScore model</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="creator"/> | ||
<expand macro="edam" /> | ||
|
||
<requirements> | ||
<requirement type="package" version="@TOOL_VERSION@">ms2deepscore</requirement> | ||
<requirement type="package" version="1.16.2">onnx</requirement> | ||
</requirements> | ||
|
||
<command detect_errors="exit_code"><![CDATA[ | ||
python3 ${python_wrapper} | ||
]]></command> | ||
<configfiles> | ||
<configfile name="python_wrapper"> | ||
@init_logger@ | ||
|
||
import onnx | ||
import torch | ||
from ms2deepscore import MS2DeepScore | ||
from ms2deepscore.models.SiameseSpectralModel import SiameseSpectralModel | ||
from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore | ||
|
||
onnx_model = onnx.load("$model") | ||
|
||
# Extract the initializers (weights and biases) | ||
initializers = {init.name: onnx.numpy_helper.to_array(init) for init in onnx_model.graph.initializer} | ||
|
||
# Convert NumPy arrays to PyTorch tensors | ||
state_dict = {name: torch.tensor(np_array) for name, np_array in initializers.items()} | ||
|
||
@json_load@ | ||
|
||
model = SiameseSpectralModel(settings=SettingsMS2Deepscore(**model_params)) | ||
model.load_state_dict(state_dict) | ||
model.eval() | ||
|
||
similarity = MS2DeepScore(model) | ||
name="MS2DeepScore_similarity_scores" | ||
|
||
@init_scores@ | ||
|
||
layer = similarity.sparse_array( | ||
references=scores.references, | ||
queries=scores.queries, | ||
idx_row = scores._scores.row, | ||
idx_col = scores._scores.col, | ||
is_symmetric=False) | ||
|
||
scores._scores.add_sparse_data(scores._scores.row, scores._scores.col, layer, name) | ||
|
||
scores.filter_by_range(name=name, low=0) | ||
scores.to_json("$similarity_scores") | ||
</configfile> | ||
</configfiles> | ||
|
||
<inputs> | ||
<expand macro="input_param" /> | ||
</inputs> | ||
|
||
<outputs> | ||
<data label="ms2deepscore scores of ${on_string}" name="similarity_scores" format="json"/> | ||
</outputs> | ||
|
||
<help> | ||
ms2deepscore provides a Siamese neural network that is trained to predict molecular structural | ||
similarities (Tanimoto scores) from pairs of mass spectrometry spectra. | ||
</help> | ||
|
||
<expand macro="citations"/> | ||
</tool> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
<tool id="ms2deepscore_training" name="MS2DeepScore training" version="@TOOL_VERSION@+galaxy0"> | ||
<description>Compute similarity scores using a pre-trained MS2DeepScore model</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="creator"/> | ||
<expand macro="edam" /> | ||
|
||
<requirements> | ||
<requirement type="package" version="@TOOL_VERSION@">ms2deepscore</requirement> | ||
<requirement type="package" version="1.16.2">onnx</requirement> | ||
</requirements> | ||
|
||
<command detect_errors="exit_code"><![CDATA[ | ||
mkdir processing; | ||
cp $spectra processing/input."$spectra.ext"; | ||
python3 ${python_wrapper} | ||
]]></command> | ||
<configfiles> | ||
<configfile name="python_wrapper"> | ||
import onnx | ||
import os | ||
import torch | ||
from ms2deepscore.models import load_model | ||
from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore | ||
from ms2deepscore.wrapper_functions.training_wrapper_functions import train_ms2deepscore_wrapper, StoreTrainingData | ||
|
||
@json_load@ | ||
|
||
settings = SettingsMS2Deepscore(**model_params) | ||
file = "processing/input.$spectra.ext" | ||
directory = train_ms2deepscore_wrapper(file, settings, $validation_split_fraction) | ||
|
||
expected_file_names = StoreTrainingData(file) | ||
pt_model_path = os.path.join(expected_file_names.trained_models_folder, directory, settings.model_file_name) | ||
|
||
model = load_model(pt_model_path) | ||
model.eval() | ||
|
||
batch_size = 1 | ||
number_of_bins = settings.number_of_bins() | ||
additional_inputs = len(settings.additional_metadata) | ||
|
||
# Create dummy inputs | ||
spectra_tensors_1 = torch.randn(batch_size, number_of_bins) | ||
spectra_tensors_2 = torch.randn(batch_size, number_of_bins) | ||
metadata_1 = torch.randn(batch_size, additional_inputs) | ||
metadata_2 = torch.randn(batch_size, additional_inputs) | ||
|
||
# Export the model to ONNX | ||
torch.onnx.export( | ||
model, | ||
(spectra_tensors_1, spectra_tensors_2, metadata_1, metadata_2), | ||
"$onnx_trained_model", | ||
verbose=True | ||
) | ||
|
||
</configfile> | ||
</configfiles> | ||
|
||
<inputs> | ||
<expand macro="training_param" /> | ||
</inputs> | ||
|
||
<outputs> | ||
<data label="Trained model" name="onnx_trained_model" format="onnx"/> | ||
</outputs> | ||
|
||
<tests> | ||
<test expect_num_outputs="1"> | ||
<param name="spectra" value="clean_spectra.mgf" ftype="mgf"/> | ||
<param name="model_param" value="Model_Parameter_JSON.json" ftype="json" /> | ||
<param name="validation_split_fraction" value="5"/> | ||
<output name="onnx_trained_model" value="Trained_model.onnx" ftype="onnx" compare="sim_size"/> | ||
</test> | ||
</tests> | ||
|
||
<help> | ||
ms2deepscore provides a Siamese neural network that is trained to predict molecular structural | ||
similarities (Tanimoto scores) from pairs of mass spectrometry spectra. | ||
</help> | ||
|
||
<expand macro="citations"/> | ||
</tool> |
Oops, something went wrong.