Skip to content

Commit

Permalink
Merge pull request RECETOX#486 from RECETOX/wverastegui/issue482
Browse files Browse the repository at this point in the history
Read RI from comments and add them to RI column field
  • Loading branch information
hechth authored Feb 22, 2024
2 parents 45ad41f + 26ac471 commit de7fd09
Show file tree
Hide file tree
Showing 9 changed files with 179 additions and 7,413 deletions.
9 changes: 9 additions & 0 deletions tools/riassigner/.shed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,12 @@ repositories:
- riassigner.xml
- macros.xml
- test-data
assign_ri_from_comment:
description: "Assign RI from Comment Field is a tool for assigning retention indices from the comment field in csv, tsv, parquet, or msp files."
long_description: |
"Assign RI from Comment Field is a tool for assigning retention indices from the comment field in csv, tsv, parquet, or msp files.
The tool will read RI from the comment field and assign them to the RI field."
include:
- riassigner_from_comment.xml
- macros.xml
- test-data
9 changes: 7 additions & 2 deletions tools/riassigner/macros.xml
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
<macros>
<token name="@TOOL_VERSION@">0.3.4</token>
<token name="@TOOL_VERSION@">0.4.0</token>
<xml name="creator">
<creator>
<person
givenName="Helge"
familyName="Hecht"
url="https://github.com/hechth"
identifier="0000-0001-6744-996X" />
<person
givenName="Wudmir"
familyName="Rojas"
url="https://github.com/wverastegui"
identifier="0000-0001-7036-9987" />
<organization
url="https://www.recetox.muni.cz/"
email="[email protected]"
name="RECETOX MUNI" />
</creator>
</xml>

<token name="@HELP@">
<![CDATA[
RIAssigner can be used to read data from .msp, .csv and .tsv files using matchms and pandas and to compute the
Expand Down
73 changes: 73 additions & 0 deletions tools/riassigner/riassigner_from_comment.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
<tool id="riassigner_from_comment" name="RIAssigner init from comment" version="@TOOL_VERSION@+galaxy4" profile="21.09">
<description>Assign different retention indices from unstructured comment metadata</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="creator"/>

<xrefs>
<xref type="bio.tools">riassigner</xref>
</xrefs>

<requirements>
<requirement type="package" version="@TOOL_VERSION@">riassigner</requirement>
</requirements>

<command detect_errors="exit_code"><![CDATA[
python ${script} &&
mv results.${input.ext} ${output}
]]>
</command>

<configfiles>
<configfile name="script">
from RIAssigner.data import PandasData, MatchMSData
#if $input.ext in ["csv", "tsv", "parquet"]
query = PandasData('${input}', '${input.ext}', rt_unit="seconds")
#else
query = MatchMSData('${input}', '${input.ext}', rt_unit="seconds")
#end if
query.init_ri_from_comment('${format}')
query.write('results.${input.ext}')
</configfile>
</configfiles>

<inputs>
<param name="input" type="data" format="csv,tsv,parquet,msp,mgf" label="Input File">
<help>
File with 'comment' metadata in which to assign the selected retention index type from 'comment' to 'retention_index'.
</help>
</param>
<param name="format" type="select" label="GC column type" help="Type of gas chromatographic column for which to set the retention index value">
<option value="SemiStdNP">Semi Standard Non-Polar</option>
<option value="StdNP">Standard Non-Polar</option>
<option value="StdPolar">Standard Polar</option>
</param>
</inputs>

<outputs>
<data format_source="input" name="output" label="${on_string} with ${format} RI"/>
</outputs>

<tests>
<test>
<param name="input" value="nist_to_ri_2mols_input.csv" ftype="csv"/>
<output name="output" file="nist_to_ri_2mols_output.csv" ftype="csv"/>
</test>
<test>
<param name="input" value="nist_ei_ms_2mols_input.msp" ftype="msp"/>
<output name="output" file="nist_ei_ms_2mols_output.msp" ftype="msp"/>
</test>
</tests>

<help><![CDATA[
This tool can be used to assign retention indices (RI) from the comment field in csv, tsv, parquet, or msp files.
The tool will read RI from the comment field and assign them to the RI field. The type of gas chromatographic column
for which to set the retention index value can be selected.
]]></help>

<citations>
<citation type="doi">10.21105/joss.04337</citation>
</citations>
</tool>

3,705 changes: 0 additions & 3,705 deletions tools/riassigner/test-data/aplcms_aligned_peaks.csv

Large diffs are not rendered by default.

40 changes: 40 additions & 0 deletions tools/riassigner/test-data/nist_ei_ms_2mols_input.msp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
NAME: Water
FORMULA: H2O
MW: 18
CASNO: 7732185
ID: 9
COMMENT: SemiStdNP=317 StdNP=323/4/2 StdPolar=1053/14/2 ; NIST MS# 7, Seq# M67
NUM PEAKS: 5
STDINCHI: InChI=1S/H2O/h1H2
SMILES: O
16.0 8.99
17.0 211.81
18.0 999.0
19.0 5.0
20.0 3.0

NAME: Methyl Alcohol
FORMULA: CH4O
MW: 32
CASNO: 67561
ID: 32
COMMENT: SemiStdNP=354/16/10 StdNP=379/7/34 StdPolar=903/8/35 ; NIST MS# 229809, Seq# M1806
NUM PEAKS: 16
STDINCHI: InChI=1S/CH4O/c1-2/h2H,1H3
SMILES: CO
2.0 3.0
12.0 2.0
13.0 5.99
14.0 15.99
15.0 122.89
16.0 1.0
17.0 3.0
18.0 6.99
19.0 1.0
28.0 44.96
29.0 444.6
30.0 63.94
31.0 999.0
32.0 742.33
33.0 10.99
34.0 1.0
43 changes: 43 additions & 0 deletions tools/riassigner/test-data/nist_ei_ms_2mols_output.msp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
FORMULA: H2O
CASNO: 7732185
ID: 9
COMMENT: SemiStdNP=317 StdNP=323/4/2 StdPolar=1053/14/2 ; NIST MS# 7, Seq# M67
SMILES: O
COMPOUND_NAME: Water
NOMINAL_MASS: 18
INCHI: InChI=1S/H2O/h1H2
RETENTION_INDEX: 317
NUM PEAKS: 5
16.0 8.99
17.0 211.81
18.0 999.0
19.0 5.0
20.0 3.0

FORMULA: CH4O
CASNO: 67561
ID: 32
COMMENT: SemiStdNP=354/16/10 StdNP=379/7/34 StdPolar=903/8/35 ; NIST MS# 229809, Seq# M1806
SMILES: CO
COMPOUND_NAME: Methyl Alcohol
NOMINAL_MASS: 32
INCHI: InChI=1S/CH4O/c1-2/h2H,1H3
RETENTION_INDEX: 354
NUM PEAKS: 16
2.0 3.0
12.0 2.0
13.0 5.99
14.0 15.99
15.0 122.89
16.0 1.0
17.0 3.0
18.0 6.99
19.0 1.0
28.0 44.96
29.0 444.6
30.0 63.94
31.0 999.0
32.0 742.33
33.0 10.99
34.0 1.0

3 changes: 3 additions & 0 deletions tools/riassigner/test-data/nist_to_ri_2mols_input.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
casno,num_peaks,compound_name,retention_index,formula,id,comment,inchi,smiles,nominal_mass,
7732185,5,Water,,H2O,9,"SemiStdNP=317 StdNP=323/4/2 StdPolar=1053/14/2 ; NIST MS# 7, Seq# M67",InChI=1S/H2O/h1H2,O,18
67561,16,Methyl Alcohol,,CH4O,32,"SemiStdNP=354/16/10 StdNP=379/7/34 StdPolar=903/8/35 ; NIST MS# 229809, Seq# M1806","InChI=1S/CH4O/c1-2/h2H,1H3",CO,32
3 changes: 3 additions & 0 deletions tools/riassigner/test-data/nist_to_ri_2mols_output.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
casno,num_peaks,compound_name,retention_index,formula,id,comment,inchi,smiles,nominal_mass,unnamed: 10
7732185,5,Water,317.0,H2O,9,"SemiStdNP=317 StdNP=323/4/2 StdPolar=1053/14/2 ; NIST MS# 7, Seq# M67",InChI=1S/H2O/h1H2,O,18,
67561,16,Methyl Alcohol,354.0,CH4O,32,"SemiStdNP=354/16/10 StdNP=379/7/34 StdPolar=903/8/35 ; NIST MS# 229809, Seq# M1806","InChI=1S/CH4O/c1-2/h2H,1H3",CO,32,
Loading

0 comments on commit de7fd09

Please sign in to comment.