From 6ff1356e4d0916948188d0b9f7dbab336a1333fd Mon Sep 17 00:00:00 2001 From: paulzierep Date: Wed, 6 Nov 2024 12:24:46 +0100 Subject: [PATCH] init first unittest for workflowhub --- .github/run_unittests.yaml | 26 + sources/bin/__init__.py | 0 sources/bin/tests/readme.md | 6 + sources/bin/tests/test-data/test_tools.json | 693 +++++++++++++++ .../tests/test-data/workflowhub_api_mock.json | 806 ++++++++++++++++++ .../tests/test_extract_galaxy_workflows.py | 85 ++ sources/bin/tests/test_get_public_server.py | 35 - sources/bin/tests/test_tool_stats.py | 58 -- 8 files changed, 1616 insertions(+), 93 deletions(-) create mode 100644 .github/run_unittests.yaml create mode 100644 sources/bin/__init__.py create mode 100644 sources/bin/tests/readme.md create mode 100644 sources/bin/tests/test-data/test_tools.json create mode 100644 sources/bin/tests/test-data/workflowhub_api_mock.json create mode 100644 sources/bin/tests/test_extract_galaxy_workflows.py delete mode 100644 sources/bin/tests/test_get_public_server.py delete mode 100644 sources/bin/tests/test_tool_stats.py diff --git a/.github/run_unittests.yaml b/.github/run_unittests.yaml new file mode 100644 index 00000000..132d5adb --- /dev/null +++ b/.github/run_unittests.yaml @@ -0,0 +1,26 @@ +name: Run tests + +on: + pull_request: + branches: + - main + paths: + - 'bin/**' # This will trigger the workflow only if files in the 'bin' folder are modified. + +jobs: + unittest: + runs-on: ubuntu-20.04 + strategy: + matrix: + python-version: ['3.11'] + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install requirements + run: python -m pip install -r requirements.txt + - name: Run unittests + run: | + PYTHONPATH=bin python -m unittest discover -s bin/tests diff --git a/sources/bin/__init__.py b/sources/bin/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sources/bin/tests/readme.md b/sources/bin/tests/readme.md new file mode 100644 index 00000000..ac40fb36 --- /dev/null +++ b/sources/bin/tests/readme.md @@ -0,0 +1,6 @@ +# Run the tests locally + +``` +cd codex/sources +PYTHONPATH=bin python -m unittest discover -s bin/tests +``` \ No newline at end of file diff --git a/sources/bin/tests/test-data/test_tools.json b/sources/bin/tests/test-data/test_tools.json new file mode 100644 index 00000000..fc2ec3bc --- /dev/null +++ b/sources/bin/tests/test-data/test_tools.json @@ -0,0 +1,693 @@ +[ + { + "Galaxy wrapper id":"abricate", + "Galaxy tool ids":[ + "abricate", + "abricate_list", + "abricate_summary" + ], + "Description":"Mass screening of contigs for antiobiotic resistance genes", + "bio.tool id":"ABRicate", + "bio.tool ids":[ + "ABRicate" + ], + "biii":null, + "bio.tool name":"ABRicate", + "bio.tool description":"Mass screening of contigs for antimicrobial resistance or virulence genes.", + "EDAM operation":[ + "Antimicrobial resistance prediction" + ], + "EDAM topic":[ + "Genomics", + "Microbiology" + ], + "Status":"Up-to-date", + "Source":"https://github.com/tseemann/abricate", + "ToolShed categories":[ + "Sequence Analysis" + ], + "ToolShed id":"abricate", + "Date of first commit of the suite":"2017-07-10", + "Galaxy wrapper owner":"iuc", + "Galaxy wrapper source":"https://github.com/galaxyproject/tools-iuc/tree/master/tools/abricate/", + "Galaxy wrapper parsed folder":"https://github.com/galaxyproject/tools-iuc/tree/main/tools/abricate", + "Galaxy wrapper version":"1.0.1", + "Conda id":"abricate", + "Conda version":"1.0.1", + "EDAM operation (no superclasses)":[ + "Antimicrobial resistance prediction" + ], + "EDAM topic (no superclasses)":[ + "Genomics", + "Microbiology" + ], + "Available on UseGalaxy.org (Main)":3, + "Available on UseGalaxy.org.au":3, + "Available on UseGalaxy.eu":3, + "Available on UseGalaxy.fr":3, + "Tools available on UseGalaxy.org (Main)":3, + "Tools available on UseGalaxy.org.au":3, + "Tools available on UseGalaxy.eu":3, + "Tools available on UseGalaxy.fr":3, + "Tools available on APOSTL":0, + "Tools available on ARGs-OAP":0, + "Tools available on CIRM-CFBP":0, + "Tools available on ChemFlow":0, + "Tools available on Coloc-stats":0, + "Tools available on CropGalaxy":0, + "Tools available on Dintor":0, + "Tools available on GASLINI":0, + "Tools available on Galaxy@AuBi":3, + "Tools available on Galaxy@Pasteur":0, + "Tools available on GalaxyTrakr":3, + "Tools available on Genomic Hyperbrowser":0, + "Tools available on GigaGalaxy":0, + "Tools available on HyPhy HIV NGS Tools":0, + "Tools available on IPK Galaxy Blast Suite":0, + "Tools available on ImmPort Galaxy":0, + "Tools available on InteractoMIX":0, + "Tools available on MISSISSIPPI":0, + "Tools available on Mandoiu Lab":0, + "Tools available on MiModD NacreousMap":0, + "Tools available on Oqtans":0, + "Tools available on Palfinder":0, + "Tools available on PepSimili":0, + "Tools available on PhagePromotor":0, + "Tools available on UseGalaxy.be":2, + "Tools available on UseGalaxy.cz":3, + "Tools available on UseGalaxy.no":3, + "Tools available on Viral Variant Visualizer (VVV)":0, + "No. of tool users (5 years) (usegalaxy.eu)":619817, + "No. of tool users (all time) (usegalaxy.eu)":622353, + "Tool usage (5 years) (usegalaxy.eu)":4097, + "Tool usage (all time) (usegalaxy.eu)":4130, + "No. of tool users (5 years) (usegalaxy.org)":320454, + "No. of tool users (all time) (usegalaxy.org)":320454, + "Tool usage (5 years) (usegalaxy.org)":2915, + "Tool usage (all time) (usegalaxy.org)":2915, + "No. of tool users (5 years) (usegalaxy.org.au)":496156, + "No. of tool users (all time) (usegalaxy.org.au)":503743, + "Tool usage (5 years) (usegalaxy.org.au)":2012, + "Tool usage (all time) (usegalaxy.org.au)":2227, + "No. of tool users (5 years) - all main servers":1436427, + "No. of tool users (all time) - all main servers":1446550, + "Tool usage (5 years) - all main servers":9024, + "Tool usage (all time) - all main servers":9272 + }, + { + "Galaxy wrapper id":"2d_auto_threshold", + "Galaxy tool ids":[ + "ip_threshold" + ], + "Description":"Automatic thresholding", + "bio.tool id":"scikit-image", + "bio.tool ids":[ + "scikit-image" + ], + "biii":"scikit-image", + "bio.tool name":"scikit-image", + "bio.tool description":"Scikit-image contains image processing algorithms for SciPy, including IO, morphology, filtering, warping, color manipulation, object detection, etc.", + "EDAM operation":[ + "Image analysis", + "Image annotation", + "Visualisation", + "Data handling" + ], + "EDAM topic":[ + "Imaging", + "Software engineering", + "Literature and language" + ], + "Status":"To update", + "Source":"https://github.com/bmcv", + "ToolShed categories":[ + "Imaging" + ], + "ToolShed id":"2d_auto_threshold", + "Date of first commit of the suite":"2024-03-01", + "Galaxy wrapper owner":"imgteam", + "Galaxy wrapper source":"https://github.com/BMCV/galaxy-image-analysis/tree/master/tools/2d_auto_threshold/", + "Galaxy wrapper parsed folder":"https://github.com/paulzierep/Galaxy-Tool-Metadata-Extractor-Test-Wrapper/tree/main/tools/2d_auto_threshold", + "Galaxy wrapper version":"0.0.6-2", + "Conda id":"scikit-image", + "Conda version":null, + "EDAM operation (no superclasses)":[ + "Image analysis", + "Image annotation", + "Visualisation", + "Data handling" + ], + "EDAM topic (no superclasses)":[ + "Imaging", + "Software engineering", + "Literature and language" + ], + "Available on UseGalaxy.org (Main)":0, + "Available on UseGalaxy.org.au":1, + "Available on UseGalaxy.eu":1, + "Available on UseGalaxy.fr":0, + "Tools available on UseGalaxy.org (Main)":0, + "Tools available on UseGalaxy.org.au":1, + "Tools available on UseGalaxy.eu":1, + "Tools available on UseGalaxy.fr":0, + "Tools available on APOSTL":0, + "Tools available on BF2I-MAP":0, + "Tools available on BioBix":0, + "Tools available on CIRM-CFBP":0, + "Tools available on Center for Phage Technology (CPT)":0, + "Tools available on ChemFlow":0, + "Tools available on Coloc-stats":0, + "Tools available on CoralSNP":0, + "Tools available on CropGalaxy":0, + "Tools available on Dintor":0, + "Tools available on FreeBioinfo":0, + "Tools available on GASLINI":0, + "Tools available on Galaxy@AuBi":0, + "Tools available on Galaxy@Pasteur":0, + "Tools available on GalaxyTrakr":0, + "Tools available on Genomic Hyperbrowser":0, + "Tools available on GigaGalaxy":0, + "Tools available on HyPhy HIV NGS Tools":0, + "Tools available on IPK Galaxy Blast Suite":0, + "Tools available on ImmPort Galaxy":0, + "Tools available on InteractoMIX":0, + "Tools available on MISSISSIPPI":0, + "Tools available on Mandoiu Lab":0, + "Tools available on MiModD NacreousMap":0, + "Tools available on Oqtans":0, + "Tools available on Palfinder":0, + "Tools available on PepSimili":0, + "Tools available on PhagePromotor":0, + "Tools available on UseGalaxy.be":1, + "Tools available on UseGalaxy.cz":1, + "Tools available on UseGalaxy.no":1, + "Tools available on Viral Variant Visualizer (VVV)":0, + "No. of tool users (5 years) (usegalaxy.eu)":1434, + "No. of tool users (all time) (usegalaxy.eu)":6746, + "Tool usage (5 years) (usegalaxy.eu)":120, + "Tool usage (all time) (usegalaxy.eu)":122, + "No. of tool users (5 years) (usegalaxy.org)":0, + "No. of tool users (all time) (usegalaxy.org)":0, + "Tool usage (5 years) (usegalaxy.org)":0, + "Tool usage (all time) (usegalaxy.org)":0, + "No. of tool users (5 years) (usegalaxy.org.au)":305, + "No. of tool users (all time) (usegalaxy.org.au)":305, + "Tool usage (5 years) (usegalaxy.org.au)":11, + "Tool usage (all time) (usegalaxy.org.au)":11, + "No. of tool users (5 years) - all main servers":1739, + "No. of tool users (all time) - all main servers":7051, + "Tool usage (5 years) - all main servers":131, + "Tool usage (all time) - all main servers":133 + }, + { + "Galaxy wrapper id":"abritamr", + "Galaxy tool ids":[ + "abritamr" + ], + "Description":"A pipeline for running AMRfinderPlus and collating results into functional classes", + "bio.tool id":null, + "bio.tool ids":[ + + ], + "biii":null, + "bio.tool name":null, + "bio.tool description":null, + "EDAM operation":[ + + ], + "EDAM topic":[ + + ], + "Status":"To update", + "Source":"https://zenodo.org/record/7370628", + "ToolShed categories":[ + "Sequence Analysis" + ], + "ToolShed id":"abritamr", + "Date of first commit of the suite":"2024-03-01", + "Galaxy wrapper owner":"iuc", + "Galaxy wrapper source":"https://github.com/galaxyproject/tools-iuc/tree/master/tools/abritamr", + "Galaxy wrapper parsed folder":"https://github.com/paulzierep/Galaxy-Tool-Metadata-Extractor-Test-Wrapper/tree/main/tools/abritamr", + "Galaxy wrapper version":"1.0.14", + "Conda id":"abritamr", + "Conda version":"1.0.19", + "EDAM operation (no superclasses)":[ + + ], + "EDAM topic (no superclasses)":[ + + ], + "Available on UseGalaxy.org (Main)":0, + "Available on UseGalaxy.org.au":0, + "Available on UseGalaxy.eu":1, + "Available on UseGalaxy.fr":0, + "Tools available on UseGalaxy.org (Main)":0, + "Tools available on UseGalaxy.org.au":0, + "Tools available on UseGalaxy.eu":1, + "Tools available on UseGalaxy.fr":0, + "Tools available on APOSTL":0, + "Tools available on BF2I-MAP":0, + "Tools available on BioBix":0, + "Tools available on CIRM-CFBP":0, + "Tools available on Center for Phage Technology (CPT)":0, + "Tools available on ChemFlow":0, + "Tools available on Coloc-stats":0, + "Tools available on CoralSNP":0, + "Tools available on CropGalaxy":0, + "Tools available on Dintor":0, + "Tools available on FreeBioinfo":0, + "Tools available on GASLINI":0, + "Tools available on Galaxy@AuBi":0, + "Tools available on Galaxy@Pasteur":0, + "Tools available on GalaxyTrakr":0, + "Tools available on Genomic Hyperbrowser":0, + "Tools available on GigaGalaxy":0, + "Tools available on HyPhy HIV NGS Tools":0, + "Tools available on IPK Galaxy Blast Suite":0, + "Tools available on ImmPort Galaxy":0, + "Tools available on InteractoMIX":0, + "Tools available on MISSISSIPPI":0, + "Tools available on Mandoiu Lab":0, + "Tools available on MiModD NacreousMap":0, + "Tools available on Oqtans":0, + "Tools available on Palfinder":0, + "Tools available on PepSimili":0, + "Tools available on PhagePromotor":0, + "Tools available on UseGalaxy.be":0, + "Tools available on UseGalaxy.cz":0, + "Tools available on UseGalaxy.no":0, + "Tools available on Viral Variant Visualizer (VVV)":0, + "No. of tool users (5 years) (usegalaxy.eu)":1139, + "No. of tool users (all time) (usegalaxy.eu)":1139, + "Tool usage (5 years) (usegalaxy.eu)":109, + "Tool usage (all time) (usegalaxy.eu)":109, + "No. of tool users (5 years) (usegalaxy.org)":0, + "No. of tool users (all time) (usegalaxy.org)":0, + "Tool usage (5 years) (usegalaxy.org)":0, + "Tool usage (all time) (usegalaxy.org)":0, + "No. of tool users (5 years) (usegalaxy.org.au)":0, + "No. of tool users (all time) (usegalaxy.org.au)":0, + "Tool usage (5 years) (usegalaxy.org.au)":0, + "Tool usage (all time) (usegalaxy.org.au)":0, + "No. of tool users (5 years) - all main servers":1139, + "No. of tool users (all time) - all main servers":1139, + "Tool usage (5 years) - all main servers":109, + "Tool usage (all time) - all main servers":109 + }, + { + "Galaxy wrapper id":"aldex2", + "Galaxy tool ids":[ + "aldex2" + ], + "Description":"Performs analysis Of differential abundance taking sample variation into account", + "bio.tool id":"aldex2", + "bio.tool ids":[ + "aldex2" + ], + "biii":null, + "bio.tool name":"ALDEx2", + "bio.tool description":"A differential abundance analysis for the comparison of two or more conditions. It uses a Dirichlet-multinomial model to infer abundance from counts, that has been optimized for three or more experimental replicates. Infers sampling variation and calculates the expected FDR given the biological and sampling variation using the Wilcox rank test and Welches t-test, or the glm and Kruskal Wallis tests. Reports both P and fdr values calculated by the Benjamini Hochberg correction.", + "EDAM operation":[ + "Statistical inference" + ], + "EDAM topic":[ + "Gene expression", + "Statistics and probability" + ], + "Status":"To update", + "Source":"https://github.com/ggloor/ALDEx_bioc", + "ToolShed categories":[ + "Metagenomics" + ], + "ToolShed id":"aldex2", + "Date of first commit of the suite":"2024-03-01", + "Galaxy wrapper owner":"iuc", + "Galaxy wrapper source":"https://github.com/galaxyproject/tools-iuc/tree/master/tools/aldex2", + "Galaxy wrapper parsed folder":"https://github.com/paulzierep/Galaxy-Tool-Metadata-Extractor-Test-Wrapper/tree/main/tools/aldex2", + "Galaxy wrapper version":"1.26.0", + "Conda id":"bioconductor-aldex2", + "Conda version":"1.34.0", + "EDAM operation (no superclasses)":[ + "Statistical inference" + ], + "EDAM topic (no superclasses)":[ + "Gene expression", + "Statistics and probability" + ], + "Available on UseGalaxy.org (Main)":0, + "Available on UseGalaxy.org.au":0, + "Available on UseGalaxy.eu":1, + "Available on UseGalaxy.fr":0, + "Tools available on UseGalaxy.org (Main)":0, + "Tools available on UseGalaxy.org.au":0, + "Tools available on UseGalaxy.eu":1, + "Tools available on UseGalaxy.fr":0, + "Tools available on APOSTL":0, + "Tools available on BF2I-MAP":0, + "Tools available on BioBix":0, + "Tools available on CIRM-CFBP":0, + "Tools available on Center for Phage Technology (CPT)":0, + "Tools available on ChemFlow":0, + "Tools available on Coloc-stats":0, + "Tools available on CoralSNP":0, + "Tools available on CropGalaxy":0, + "Tools available on Dintor":0, + "Tools available on FreeBioinfo":0, + "Tools available on GASLINI":0, + "Tools available on Galaxy@AuBi":0, + "Tools available on Galaxy@Pasteur":0, + "Tools available on GalaxyTrakr":0, + "Tools available on Genomic Hyperbrowser":0, + "Tools available on GigaGalaxy":0, + "Tools available on HyPhy HIV NGS Tools":0, + "Tools available on IPK Galaxy Blast Suite":0, + "Tools available on ImmPort Galaxy":0, + "Tools available on InteractoMIX":0, + "Tools available on MISSISSIPPI":0, + "Tools available on Mandoiu Lab":0, + "Tools available on MiModD NacreousMap":0, + "Tools available on Oqtans":0, + "Tools available on Palfinder":0, + "Tools available on PepSimili":0, + "Tools available on PhagePromotor":0, + "Tools available on UseGalaxy.be":0, + "Tools available on UseGalaxy.cz":1, + "Tools available on UseGalaxy.no":0, + "Tools available on Viral Variant Visualizer (VVV)":0, + "No. of tool users (5 years) (usegalaxy.eu)":262, + "No. of tool users (all time) (usegalaxy.eu)":262, + "Tool usage (5 years) (usegalaxy.eu)":36, + "Tool usage (all time) (usegalaxy.eu)":36, + "No. of tool users (5 years) (usegalaxy.org)":0, + "No. of tool users (all time) (usegalaxy.org)":0, + "Tool usage (5 years) (usegalaxy.org)":0, + "Tool usage (all time) (usegalaxy.org)":0, + "No. of tool users (5 years) (usegalaxy.org.au)":0, + "No. of tool users (all time) (usegalaxy.org.au)":0, + "Tool usage (5 years) (usegalaxy.org.au)":0, + "Tool usage (all time) (usegalaxy.org.au)":0, + "No. of tool users (5 years) - all main servers":262, + "No. of tool users (all time) - all main servers":262, + "Tool usage (5 years) - all main servers":36, + "Tool usage (all time) - all main servers":36 + }, + { + "Galaxy wrapper id":"fastp", + "Galaxy tool ids":[ + "fastp" + ], + "Description":"Fast all-in-one preprocessing for FASTQ files", + "bio.tool id":"fastp", + "bio.tool ids":[ + "fastp" + ], + "biii":null, + "bio.tool name":"fastp", + "bio.tool description":"A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance.", + "EDAM operation":[ + "Sequencing quality control", + "Sequence contamination filtering" + ], + "EDAM topic":[ + "Sequence analysis", + "Probes and primers" + ], + "Status":"To update", + "Source":"https://github.com/OpenGene/fastp", + "ToolShed categories":[ + "Sequence Analysis" + ], + "ToolShed id":"fastp", + "Date of first commit of the suite":"2024-03-11", + "Galaxy wrapper owner":"iuc", + "Galaxy wrapper source":"https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastp", + "Galaxy wrapper parsed folder":"https://github.com/paulzierep/Galaxy-Tool-Metadata-Extractor-Test-Wrapper/tree/main/tools/fastp", + "Galaxy wrapper version":null, + "Conda id":"fastp", + "Conda version":"0.23.4", + "EDAM operation (no superclasses)":[ + "Sequence contamination filtering" + ], + "EDAM topic (no superclasses)":[ + "Probes and primers" + ], + "Available on UseGalaxy.org (Main)":1, + "Available on UseGalaxy.org.au":1, + "Available on UseGalaxy.eu":1, + "Available on UseGalaxy.fr":0, + "Tools available on UseGalaxy.org (Main)":1, + "Tools available on UseGalaxy.org.au":1, + "Tools available on UseGalaxy.eu":1, + "Tools available on UseGalaxy.fr":0, + "Tools available on APOSTL":0, + "Tools available on BF2I-MAP":0, + "Tools available on BioBix":0, + "Tools available on CIRM-CFBP":0, + "Tools available on Center for Phage Technology (CPT)":0, + "Tools available on ChemFlow":0, + "Tools available on Coloc-stats":0, + "Tools available on CoralSNP":0, + "Tools available on CropGalaxy":0, + "Tools available on Dintor":0, + "Tools available on FreeBioinfo":0, + "Tools available on GASLINI":0, + "Tools available on Galaxy@AuBi":1, + "Tools available on Galaxy@Pasteur":1, + "Tools available on GalaxyTrakr":1, + "Tools available on Genomic Hyperbrowser":0, + "Tools available on GigaGalaxy":0, + "Tools available on HyPhy HIV NGS Tools":1, + "Tools available on IPK Galaxy Blast Suite":0, + "Tools available on ImmPort Galaxy":0, + "Tools available on InteractoMIX":0, + "Tools available on MISSISSIPPI":1, + "Tools available on Mandoiu Lab":0, + "Tools available on MiModD NacreousMap":0, + "Tools available on Oqtans":0, + "Tools available on Palfinder":0, + "Tools available on PepSimili":0, + "Tools available on PhagePromotor":0, + "Tools available on UseGalaxy.be":1, + "Tools available on UseGalaxy.cz":1, + "Tools available on UseGalaxy.no":1, + "Tools available on Viral Variant Visualizer (VVV)":1, + "No. of tool users (5 years) (usegalaxy.eu)":1126086, + "No. of tool users (all time) (usegalaxy.eu)":1127111, + "Tool usage (5 years) (usegalaxy.eu)":6847, + "Tool usage (all time) (usegalaxy.eu)":6909, + "No. of tool users (5 years) (usegalaxy.org)":422259, + "No. of tool users (all time) (usegalaxy.org)":422259, + "Tool usage (5 years) (usegalaxy.org)":10722, + "Tool usage (all time) (usegalaxy.org)":10722, + "No. of tool users (5 years) (usegalaxy.org.au)":76462, + "No. of tool users (all time) (usegalaxy.org.au)":76462, + "Tool usage (5 years) (usegalaxy.org.au)":2242, + "Tool usage (all time) (usegalaxy.org.au)":2242, + "No. of tool users (5 years) - all main servers":1624807, + "No. of tool users (all time) - all main servers":1625832, + "Tool usage (5 years) - all main servers":19811, + "Tool usage (all time) - all main servers":19873 + }, + { + "Galaxy wrapper id":"spades", + "Galaxy tool ids":[ + "spades_biosyntheticspades", + "spades_coronaspades", + "spades_metaplasmidspades", + "metaspades", + "spades_metaviralspades", + "spades_plasmidspades", + "rnaspades", + "spades_rnaviralspades", + "spades" + ], + "Description":"SPAdes is an assembly toolkit containing various assembly pipelines. It implements the following 4 stages: assembly graph construction, k-bimer adjustment, construction of paired assembly graph and contig construction.", + "bio.tool id":"spades", + "bio.tool ids":[ + "metaplasmidspades", + "coronaspades", + "rnaspades" + ], + "biii":null, + "bio.tool name":"SPAdes", + "bio.tool description":"St. Petersburg genome assembler \u2013 is intended for both standard isolates and single-cell MDA bacteria assemblies. SPAdes 3.9 works with Illumina or IonTorrent reads and is capable of providing hybrid assemblies using PacBio, Oxford Nanopore and Sanger reads. Additional contigs can be provided and can be used as long reads.", + "EDAM operation":[ + "Genome assembly" + ], + "EDAM topic":[ + "Sequence assembly" + ], + "Status":"To update", + "Source":"https://github.com/ablab/spades", + "ToolShed categories":[ + "Assembly", + "RNA", + "Metagenomics" + ], + "ToolShed id":"spades", + "Date of first commit of the suite":"2024-03-12", + "Galaxy wrapper owner":"iuc", + "Galaxy wrapper source":"https://github.com/galaxyproject/tools-iuc/tree/master/tools/spades", + "Galaxy wrapper parsed folder":"https://github.com/paulzierep/Galaxy-Tool-Metadata-Extractor-Test-Wrapper/tree/main/tools/spades", + "Galaxy wrapper version":"3.15.5", + "Conda id":"spades", + "Conda version":"4.0.0", + "EDAM operation (no superclasses)":[ + "Genome assembly" + ], + "EDAM topic (no superclasses)":[ + "Sequence assembly" + ], + "Available on UseGalaxy.org (Main)":9, + "Available on UseGalaxy.org.au":9, + "Available on UseGalaxy.eu":9, + "Available on UseGalaxy.fr":0, + "Tools available on UseGalaxy.org (Main)":9, + "Tools available on UseGalaxy.org.au":9, + "Tools available on UseGalaxy.eu":9, + "Tools available on UseGalaxy.fr":0, + "Tools available on APOSTL":0, + "Tools available on BF2I-MAP":0, + "Tools available on BioBix":0, + "Tools available on CIRM-CFBP":0, + "Tools available on Center for Phage Technology (CPT)":0, + "Tools available on ChemFlow":0, + "Tools available on Coloc-stats":0, + "Tools available on CoralSNP":0, + "Tools available on CropGalaxy":0, + "Tools available on Dintor":0, + "Tools available on FreeBioinfo":0, + "Tools available on GASLINI":0, + "Tools available on Galaxy@AuBi":1, + "Tools available on Galaxy@Pasteur":3, + "Tools available on GalaxyTrakr":8, + "Tools available on Genomic Hyperbrowser":0, + "Tools available on GigaGalaxy":0, + "Tools available on HyPhy HIV NGS Tools":2, + "Tools available on IPK Galaxy Blast Suite":0, + "Tools available on ImmPort Galaxy":0, + "Tools available on InteractoMIX":0, + "Tools available on MISSISSIPPI":3, + "Tools available on Mandoiu Lab":0, + "Tools available on MiModD NacreousMap":0, + "Tools available on Oqtans":0, + "Tools available on Palfinder":0, + "Tools available on PepSimili":0, + "Tools available on PhagePromotor":0, + "Tools available on UseGalaxy.be":3, + "Tools available on UseGalaxy.cz":9, + "Tools available on UseGalaxy.no":3, + "Tools available on Viral Variant Visualizer (VVV)":0, + "No. of tool users (5 years) (usegalaxy.eu)":82716, + "No. of tool users (all time) (usegalaxy.eu)":87113, + "Tool usage (5 years) (usegalaxy.eu)":8209, + "Tool usage (all time) (usegalaxy.eu)":8526, + "No. of tool users (5 years) (usegalaxy.org)":120471, + "No. of tool users (all time) (usegalaxy.org)":120475, + "Tool usage (5 years) (usegalaxy.org)":14787, + "Tool usage (all time) (usegalaxy.org)":14790, + "No. of tool users (5 years) (usegalaxy.org.au)":54067, + "No. of tool users (all time) (usegalaxy.org.au)":61541, + "Tool usage (5 years) (usegalaxy.org.au)":5817, + "Tool usage (all time) (usegalaxy.org.au)":6653, + "No. of tool users (5 years) - all main servers":257254, + "No. of tool users (all time) - all main servers":269129, + "Tool usage (5 years) - all main servers":28813, + "Tool usage (all time) - all main servers":29969 + }, + { + "Galaxy wrapper id": "shovill", + "Galaxy tool ids": [ + "shovill" + ], + "Description": "Faster de novo assembly pipeline based around Spades", + "bio.tool id": "shovill", + "bio.tool ids": [ + "shovill" + ], + "biii": null, + "bio.tool name": "shovill", + "bio.tool description": "Shovill is a pipeline for assembly of bacterial isolate genomes from Illumina paired-end reads. Shovill uses SPAdes at its core, but alters the steps before and after the primary assembly step to get similar results in less time. Shovill also supports other assemblers like SKESA, Velvet and Megahit, so you can take advantage of the pre- and post-processing the Shovill provides with those too.", + "EDAM operation": [ + "Genome assembly" + ], + "EDAM topic": [ + "Genomics", + "Microbiology", + "Sequence assembly" + ], + "Status": "Up-to-date", + "Source": "https://github.com/tseemann/shovill", + "ToolShed categories": [ + "Assembly" + ], + "ToolShed id": "shovill", + "Date of first commit of the suite": "2017-10-24", + "Galaxy wrapper owner": "iuc", + "Galaxy wrapper source": "https://github.com/galaxyproject/tools-iuc/tree/master/tools/shovill", + "Galaxy wrapper parsed folder": "https://github.com/galaxyproject/tools-iuc/tree/main/tools/shovill", + "Galaxy wrapper version": "1.1.0", + "Conda id": "shovill", + "Conda version": "1.1.0", + "EDAM operation (no superclasses)": [ + "Genome assembly" + ], + "EDAM topic (no superclasses)": [ + "Genomics", + "Microbiology", + "Sequence assembly" + ], + "Available on UseGalaxy.org (Main)": 1, + "Available on UseGalaxy.org.au": 1, + "Available on UseGalaxy.eu": 1, + "Available on UseGalaxy.fr": 1, + "Tools available on UseGalaxy.org (Main)": 1, + "Tools available on UseGalaxy.org.au": 1, + "Tools available on UseGalaxy.eu": 1, + "Tools available on UseGalaxy.fr": 1, + "Tools available on APOSTL": 0, + "Tools available on ARGs-OAP": 0, + "Tools available on CIRM-CFBP": 0, + "Tools available on ChemFlow": 0, + "Tools available on Coloc-stats": 0, + "Tools available on CropGalaxy": 0, + "Tools available on Dintor": 0, + "Tools available on GASLINI": 0, + "Tools available on Galaxy@AuBi": 1, + "Tools available on Galaxy@Pasteur": 0, + "Tools available on GalaxyTrakr": 1, + "Tools available on Genomic Hyperbrowser": 0, + "Tools available on GigaGalaxy": 0, + "Tools available on HyPhy HIV NGS Tools": 0, + "Tools available on IPK Galaxy Blast Suite": 0, + "Tools available on ImmPort Galaxy": 0, + "Tools available on InteractoMIX": 0, + "Tools available on MISSISSIPPI": 0, + "Tools available on Mandoiu Lab": 0, + "Tools available on MiModD NacreousMap": 0, + "Tools available on Oqtans": 0, + "Tools available on Palfinder": 0, + "Tools available on PepSimili": 0, + "Tools available on PhagePromotor": 0, + "Tools available on UseGalaxy.be": 1, + "Tools available on UseGalaxy.cz": 1, + "Tools available on UseGalaxy.no": 1, + "Tools available on Viral Variant Visualizer (VVV)": 0, + "No. of tool users (5 years) (usegalaxy.eu)": 46125, + "No. of tool users (all time) (usegalaxy.eu)": 47376, + "Tool usage (5 years) (usegalaxy.eu)": 2273, + "Tool usage (all time) (usegalaxy.eu)": 2356, + "No. of tool users (5 years) (usegalaxy.org)": 40577, + "No. of tool users (all time) (usegalaxy.org)": 40577, + "Tool usage (5 years) (usegalaxy.org)": 3971, + "Tool usage (all time) (usegalaxy.org)": 3971, + "No. of tool users (5 years) (usegalaxy.org.au)": 19960, + "No. of tool users (all time) (usegalaxy.org.au)": 21130, + "Tool usage (5 years) (usegalaxy.org.au)": 1085, + "Tool usage (all time) (usegalaxy.org.au)": 1199, + "No. of tool users (5 years) - all main servers": 106662, + "No. of tool users (all time) - all main servers": 109083, + "Tool usage (5 years) - all main servers": 7329, + "Tool usage (all time) - all main servers": 7526 + } + ] \ No newline at end of file diff --git a/sources/bin/tests/test-data/workflowhub_api_mock.json b/sources/bin/tests/test-data/workflowhub_api_mock.json new file mode 100644 index 00000000..214d6789 --- /dev/null +++ b/sources/bin/tests/test-data/workflowhub_api_mock.json @@ -0,0 +1,806 @@ +[ + { + "data":[ + { + "id":"1189", + "type":"workflows", + "attributes":{ + "title":"AMR-Pathfinder", + "tags":[ + "bioinformatics" + ] + }, + "links":{ + "self":"/workflows/1189" + } + }, + { + "id":"1190", + "type":"workflows", + "attributes":{ + "title":"haploid-variant-calling-wgs-pe/main", + "tags":[ + "genomics" + ] + }, + "links":{ + "self":"/workflows/1190" + } + } + ] + }, + { + "data":{ + "id":"1189", + "type":"workflows", + "attributes":{ + "discussion_links":[ + + ], + "title":"AMR-Pathfinder", + "license":"MIT", + "description":"This is an aggregation of the work done in [Seq4AMR](https://workflowhub.eu/projects/110) consisting of the following workflows:\r\n\r\n- [WF1: AbritAMR / AMRFinderPlus](https://workflowhub.eu/workflows/634)\r\n- [WF2: Sciensano](https://workflowhub.eu/workflows/644) (**not currently included**)\r\n- [WF3: SRST2](https://workflowhub.eu/workflows/407) \r\n- [WF4: StarAMR](https://workflowhub.eu/workflows/470)\r\n\r\n## Installation\r\n\r\n- You will need to:\r\n - run the [RGI Database Builder](https://my.galaxy.training/?path=?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fcard%2Frgi%2Frgi_database_builder%2F1.2.0) as a Galaxy admin (if this hasn\\'t been done already)\r\n - [Have the en_US.UTF-8 locale installed](https://github.com/galaxyproject/tools-iuc/issues/6467) on the compute nodes executing cast/melt jobs.\r\n - Install the requisite tools with e.g. [`shed-tools`](https://ephemeris.readthedocs.io/en/latest/commands/shed-tools.html) command from the [`ephemeris`](https://ephemeris.readthedocs.io/en/latest/) suite: `shed-tools install -g https://galaxy.example.com -a API_KEY -t tools.yaml` (tools.yaml is provided in this repository.)\r\n- Then you can import this workflow\r\n - Navigate to `/workflows/import` of your Galaxy server\r\n - Select \"GA4GH servers\"\r\n - Enter `name:\"AMR-Pathfinder\"`\r\n- And run it\r\n - You must provide a Sequencing collection (list:paired of fastq files)\r\n - And a Genomes collection (list of fasta files) \r\n - Both of these should use **identical** collection element identifiers\r\n\r\n## Outputs\r\n\r\nThis will produce two important tables: \"Binary Comparison\" and a \"% Identity Scored Outputs\". \r\n\r\n### Binary comparison\r\n\r\nThis file reports the discovery or absence of specific AMR genes across all tested AMR Analysis tools. You will mostly see 1s (presence) or 0s (absence) but you may occasionally see higher numbers when an AMR tool reports multiple hits for a specific gene.\r\n\r\n### % Identity Scored Outputs\r\n\r\nThis is similar to binary comparison, but using the % identity reported by each AMR tool. For cases where multiple hits were detected, we take the highest.\r\n\r\n## Known Issues\r\n\r\nThe names for identified AMR genes is highly inconsistent across AMR analysis tools. We urge the AMR community to rectify this by standardising gene names used in their tooling.", + "latest_version":1, + "tags":[ + "AMR", + "AMR-detection", + "benchamrking" + ], + "versions":[ + { + "version":1, + "revision_comments":"Initial working version of the AMR Pathfinder project workflow", + "url":"https://workflowhub.eu/workflows/1189?version=1", + "commit":"a29814deaf5d824562481df6357ff3c89295c5b0", + "ref":"refs/tags/Version-4.6", + "tree":"/workflows/1189/git/1/tree", + "doi":"10.48546/workflowhub.workflow.1189.1" + } + ], + "version":1, + "revision_comments":"Initial working version of the AMR Pathfinder project workflow", + "created_at":"2024-10-28T13:43:16.000Z", + "updated_at":"2024-11-04T16:21:43.000Z", + "doi":"10.48546/workflowhub.workflow.1189.1", + "content_blobs":[ + + ], + "creators":[ + { + "profile":"/people/118", + "family_name":"Rasche", + "given_name":"Helena", + "affiliation":"", + "orcid":"https://orcid.org/0000-0001-9760-8992" + }, + { + "profile":"/people/359", + "family_name":"Dollée", + "given_name":"Dennis", + "affiliation":"Erasmus University Medical Centre", + "orcid":"None" + }, + { + "profile":"/people/875", + "family_name":"Rijvers", + "given_name":"Birgit", + "affiliation":"Erasmus University Medical Centre", + "orcid":"None" + } + ], + "other_creators":"", + "workflow_class":{ + "title":"Galaxy", + "key":"galaxy", + "description":"None" + }, + "operation_annotations":[ + { + "label":"Antimicrobial resistance prediction", + "identifier":"http://edamontology.org/operation_3482" + } + ], + "topic_annotations":[ + + ], + "internals":{ + "steps":[ + { + "id":"5", + "name":"Create text file", + "description":"toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_text_file_with_recurring_lines/1.1.0" + }, + { + "id":"6", + "name":"Sanitize Filename", + "description":"" + }, + { + "id":"7", + "name":"Shovill", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/shovill/shovill/1.1.0+galaxy1" + }, + { + "id":"8", + "name":"WF3: SRST2 :: AMR - SeqSero2/SISTR", + "description":"" + }, + { + "id":"9", + "name":"Replace", + "description":"toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_find_and_replace/1.1.4" + }, + { + "id":"10", + "name":"benchAMRking: wf1", + "description":"" + }, + { + "id":"11", + "name":"benchAMRking: wf3b", + "description":"" + }, + { + "id":"12", + "name":"ABRicate", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/abricate/abricate/1.0.1" + }, + { + "id":"13", + "name":"staramr", + "description":"toolshed.g2.bx.psu.edu/repos/nml/staramr/staramr_search/0.10.0+galaxy1" + }, + { + "id":"14", + "name":"Add column", + "description":"addValue" + }, + { + "id":"15", + "name":"Add column", + "description":"addValue" + }, + { + "id":"16", + "name":"Add column", + "description":"addValue" + }, + { + "id":"17", + "name":"hamronize", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/hamronize_tool/hamronize_tool/1.0.3+galaxy1" + }, + { + "id":"18", + "name":"hamronize", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/hamronize_tool/hamronize_tool/1.0.3+galaxy1" + }, + { + "id":"19", + "name":"Select", + "description":"Grep1" + }, + { + "id":"20", + "name":"Collapse Collection", + "description":"toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/5.1.0" + }, + { + "id":"21", + "name":"Replace", + "description":"toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_find_and_replace/1.1.4" + }, + { + "id":"22", + "name":"hamronize summarize:", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/hamronize_summarize/hamronize_summarize/1.1.4+galaxy0" + }, + { + "id":"23", + "name":"hamronize: summarize", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/hamronize_summarize/hamronize_summarize/1.0.3+galaxy2" + }, + { + "id":"24", + "name":"hamronize: summarize", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/hamronize_summarize/hamronize_summarize/1.0.3+galaxy2" + }, + { + "id":"25", + "name":"Select", + "description":"Grep1" + }, + { + "id":"26", + "name":"Split file", + "description":"toolshed.g2.bx.psu.edu/repos/bgruening/split_file_on_column/tp_split_on_column/0.2" + }, + { + "id":"27", + "name":"Cut", + "description":"Cut1" + }, + { + "id":"28", + "name":"Select", + "description":"Grep1" + }, + { + "id":"29", + "name":"Add column", + "description":"addValue" + }, + { + "id":"30", + "name":"Merge collections", + "description":"__MERGE_COLLECTION__" + }, + { + "id":"31", + "name":"Apply rules", + "description":"__APPLY_RULES__" + }, + { + "id":"32", + "name":"Concatenate multiple datasets", + "description":"toolshed.g2.bx.psu.edu/repos/artbio/concatenate_multiple_datasets/cat_multi_datasets/1.4.3" + }, + { + "id":"33", + "name":"Move WF# col to second position, keep score", + "description":"Cut1" + }, + { + "id":"34", + "name":"Move WF# col to second position 2, delete score", + "description":"Cut1" + }, + { + "id":"35", + "name":"Datamash", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/datamash_ops/datamash_ops/1.8+galaxy0" + }, + { + "id":"36", + "name":"Add column", + "description":"addValue" + }, + { + "id":"37", + "name":"Concatenate datasets", + "description":"cat1" + }, + { + "id":"38", + "name":"Concatenate datasets", + "description":"cat1" + }, + { + "id":"39", + "name":"% Identity", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/reshape2_cast/cast/1.4.2" + }, + { + "id":"40", + "name":"Binary Comparison", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/reshape2_cast/cast/1.4.2" + } + ] + }, + "tools":[ + { + "name":"shovill", + "id":"https://bio.tools/shovill" + }, + { + "name":"ABRicate", + "id":"https://bio.tools/ABRicate" + }, + { + "name":"staramr", + "id":"https://bio.tools/staramr" + }, + { + "name":"hAMRonization", + "id":"https://bio.tools/hamronization" + } + ] + }, + "relationships":{ + "creators":{ + "data":[ + { + "id":"118", + "type":"people" + }, + { + "id":"359", + "type":"people" + }, + { + "id":"875", + "type":"people" + } + ] + }, + "submitter":{ + "data":[ + { + "id":"118", + "type":"people" + } + ] + }, + "people":{ + "data":[ + { + "id":"118", + "type":"people" + }, + { + "id":"359", + "type":"people" + }, + { + "id":"875", + "type":"people" + } + ] + }, + "projects":{ + "data":[ + { + "id":"110", + "type":"projects" + }, + { + "id":"277", + "type":"projects" + } + ] + }, + "investigations":{ + "data":[ + + ] + }, + "studies":{ + "data":[ + + ] + }, + "assays":{ + "data":[ + + ] + }, + "publications":{ + "data":[ + + ] + }, + "sops":{ + "data":[ + + ] + }, + "presentations":{ + "data":[ + + ] + }, + "data_files":{ + "data":[ + + ] + }, + "documents":{ + "data":[ + + ] + } + }, + "links":{ + "self":"/workflows/1189?version=1", + "diagram":"/workflows/1189/diagram?version=1" + }, + "meta":{ + "created":"2024-10-28T13:43:16.000Z", + "modified":"2024-11-04T15:04:22.000Z", + "api_version":"0.3", + "base_url":"https://workflowhub.eu", + "uuid":"81fea0b0-7760-013d-5d70-005056ab8eb2" + } + }, + "jsonapi":{ + "version":"1.0" + } + }, + { + "data":{ + "id":"1189", + "type":"workflows", + "attributes":{ + "discussion_links":[ + + ], + "title":"AMR-Pathfinder", + "license":"MIT", + "description":"This is an aggregation of the work done in [Seq4AMR](https://workflowhub.eu/projects/110) consisting of the following workflows:\r\n\r\n- [WF1: AbritAMR / AMRFinderPlus](https://workflowhub.eu/workflows/634)\r\n- [WF2: Sciensano](https://workflowhub.eu/workflows/644) (**not currently included**)\r\n- [WF3: SRST2](https://workflowhub.eu/workflows/407) \r\n- [WF4: StarAMR](https://workflowhub.eu/workflows/470)\r\n\r\n## Installation\r\n\r\n- You will need to:\r\n - run the [RGI Database Builder](https://my.galaxy.training/?path=?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2Fcard%2Frgi%2Frgi_database_builder%2F1.2.0) as a Galaxy admin (if this hasn\\'t been done already)\r\n - [Have the en_US.UTF-8 locale installed](https://github.com/galaxyproject/tools-iuc/issues/6467) on the compute nodes executing cast/melt jobs.\r\n - Install the requisite tools with e.g. [`shed-tools`](https://ephemeris.readthedocs.io/en/latest/commands/shed-tools.html) command from the [`ephemeris`](https://ephemeris.readthedocs.io/en/latest/) suite: `shed-tools install -g https://galaxy.example.com -a API_KEY -t tools.yaml` (tools.yaml is provided in this repository.)\r\n- Then you can import this workflow\r\n - Navigate to `/workflows/import` of your Galaxy server\r\n - Select \"GA4GH servers\"\r\n - Enter `name:\"AMR-Pathfinder\"`\r\n- And run it\r\n - You must provide a Sequencing collection (list:paired of fastq files)\r\n - And a Genomes collection (list of fasta files) \r\n - Both of these should use **identical** collection element identifiers\r\n\r\n## Outputs\r\n\r\nThis will produce two important tables: \"Binary Comparison\" and a \"% Identity Scored Outputs\". \r\n\r\n### Binary comparison\r\n\r\nThis file reports the discovery or absence of specific AMR genes across all tested AMR Analysis tools. You will mostly see 1s (presence) or 0s (absence) but you may occasionally see higher numbers when an AMR tool reports multiple hits for a specific gene.\r\n\r\n### % Identity Scored Outputs\r\n\r\nThis is similar to binary comparison, but using the % identity reported by each AMR tool. For cases where multiple hits were detected, we take the highest.\r\n\r\n## Known Issues\r\n\r\nThe names for identified AMR genes is highly inconsistent across AMR analysis tools. We urge the AMR community to rectify this by standardising gene names used in their tooling.", + "latest_version":1, + "tags":[ + "AMR", + "AMR-detection", + "benchamrking" + ], + "versions":[ + { + "version":1, + "revision_comments":"Initial working version of the AMR Pathfinder project workflow", + "url":"https://workflowhub.eu/workflows/1189?version=1", + "commit":"a29814deaf5d824562481df6357ff3c89295c5b0", + "ref":"refs/tags/Version-4.6", + "tree":"/workflows/1189/git/1/tree", + "doi":"10.48546/workflowhub.workflow.1189.1" + } + ], + "version":1, + "revision_comments":"Initial working version of the AMR Pathfinder project workflow", + "created_at":"2024-10-28T13:43:16.000Z", + "updated_at":"2024-11-04T16:21:43.000Z", + "doi":"10.48546/workflowhub.workflow.1189.1", + "content_blobs":[ + + ], + "creators":[ + { + "profile":"/people/118", + "family_name":"Rasche", + "given_name":"Helena", + "affiliation":"", + "orcid":"https://orcid.org/0000-0001-9760-8992" + }, + { + "profile":"/people/359", + "family_name":"Dollée", + "given_name":"Dennis", + "affiliation":"Erasmus University Medical Centre", + "orcid":"None" + }, + { + "profile":"/people/875", + "family_name":"Rijvers", + "given_name":"Birgit", + "affiliation":"Erasmus University Medical Centre", + "orcid":"None" + } + ], + "other_creators":"", + "workflow_class":{ + "title":"Galaxy", + "key":"galaxy", + "description":"None" + }, + "operation_annotations":[ + { + "label":"Antimicrobial resistance prediction", + "identifier":"http://edamontology.org/operation_3482" + } + ], + "topic_annotations":[ + + ], + "internals":{ + "steps":[ + { + "id":"5", + "name":"Create text file", + "description":"toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_text_file_with_recurring_lines/1.1.0" + }, + { + "id":"6", + "name":"Sanitize Filename", + "description":"" + }, + { + "id":"7", + "name":"Shovill", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/shovill/shovill/1.1.0+galaxy1" + }, + { + "id":"8", + "name":"WF3: SRST2 :: AMR - SeqSero2/SISTR", + "description":"" + }, + { + "id":"9", + "name":"Replace", + "description":"toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_find_and_replace/1.1.4" + }, + { + "id":"10", + "name":"benchAMRking: wf1", + "description":"" + }, + { + "id":"11", + "name":"benchAMRking: wf3b", + "description":"" + }, + { + "id":"12", + "name":"ABRicate", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/abricate/abricate/1.0.1" + }, + { + "id":"13", + "name":"staramr", + "description":"toolshed.g2.bx.psu.edu/repos/nml/staramr/staramr_search/0.10.0+galaxy1" + }, + { + "id":"14", + "name":"Add column", + "description":"addValue" + }, + { + "id":"15", + "name":"Add column", + "description":"addValue" + }, + { + "id":"16", + "name":"Add column", + "description":"addValue" + }, + { + "id":"17", + "name":"hamronize", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/hamronize_tool/hamronize_tool/1.0.3+galaxy1" + }, + { + "id":"18", + "name":"hamronize", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/hamronize_tool/hamronize_tool/1.0.3+galaxy1" + }, + { + "id":"19", + "name":"Select", + "description":"Grep1" + }, + { + "id":"20", + "name":"Collapse Collection", + "description":"toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/5.1.0" + }, + { + "id":"21", + "name":"Replace", + "description":"toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_find_and_replace/1.1.4" + }, + { + "id":"22", + "name":"hamronize summarize:", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/hamronize_summarize/hamronize_summarize/1.1.4+galaxy0" + }, + { + "id":"23", + "name":"hamronize: summarize", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/hamronize_summarize/hamronize_summarize/1.0.3+galaxy2" + }, + { + "id":"24", + "name":"hamronize: summarize", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/hamronize_summarize/hamronize_summarize/1.0.3+galaxy2" + }, + { + "id":"25", + "name":"Select", + "description":"Grep1" + }, + { + "id":"26", + "name":"Split file", + "description":"toolshed.g2.bx.psu.edu/repos/bgruening/split_file_on_column/tp_split_on_column/0.2" + }, + { + "id":"27", + "name":"Cut", + "description":"Cut1" + }, + { + "id":"28", + "name":"Select", + "description":"Grep1" + }, + { + "id":"29", + "name":"Add column", + "description":"addValue" + }, + { + "id":"30", + "name":"Merge collections", + "description":"__MERGE_COLLECTION__" + }, + { + "id":"31", + "name":"Apply rules", + "description":"__APPLY_RULES__" + }, + { + "id":"32", + "name":"Concatenate multiple datasets", + "description":"toolshed.g2.bx.psu.edu/repos/artbio/concatenate_multiple_datasets/cat_multi_datasets/1.4.3" + }, + { + "id":"33", + "name":"Move WF# col to second position, keep score", + "description":"Cut1" + }, + { + "id":"34", + "name":"Move WF# col to second position 2, delete score", + "description":"Cut1" + }, + { + "id":"35", + "name":"Datamash", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/datamash_ops/datamash_ops/1.8+galaxy0" + }, + { + "id":"36", + "name":"Add column", + "description":"addValue" + }, + { + "id":"37", + "name":"Concatenate datasets", + "description":"cat1" + }, + { + "id":"38", + "name":"Concatenate datasets", + "description":"cat1" + }, + { + "id":"39", + "name":"% Identity", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/reshape2_cast/cast/1.4.2" + }, + { + "id":"40", + "name":"Binary Comparison", + "description":"toolshed.g2.bx.psu.edu/repos/iuc/reshape2_cast/cast/1.4.2" + } + ] + }, + "tools":[ + { + "name":"shovill", + "id":"https://bio.tools/shovill" + }, + { + "name":"ABRicate", + "id":"https://bio.tools/ABRicate" + }, + { + "name":"staramr", + "id":"https://bio.tools/staramr" + }, + { + "name":"hAMRonization", + "id":"https://bio.tools/hamronization" + } + ] + }, + "relationships":{ + "creators":{ + "data":[ + { + "id":"118", + "type":"people" + }, + { + "id":"359", + "type":"people" + }, + { + "id":"875", + "type":"people" + } + ] + }, + "submitter":{ + "data":[ + { + "id":"118", + "type":"people" + } + ] + }, + "people":{ + "data":[ + { + "id":"118", + "type":"people" + }, + { + "id":"359", + "type":"people" + }, + { + "id":"875", + "type":"people" + } + ] + }, + "projects":{ + "data":[ + { + "id":"110", + "type":"projects" + }, + { + "id":"277", + "type":"projects" + } + ] + }, + "investigations":{ + "data":[ + + ] + }, + "studies":{ + "data":[ + + ] + }, + "assays":{ + "data":[ + + ] + }, + "publications":{ + "data":[ + + ] + }, + "sops":{ + "data":[ + + ] + }, + "presentations":{ + "data":[ + + ] + }, + "data_files":{ + "data":[ + + ] + }, + "documents":{ + "data":[ + + ] + } + }, + "links":{ + "self":"/workflows/1189?version=1", + "diagram":"/workflows/1189/diagram?version=1" + }, + "meta":{ + "created":"2024-10-28T13:43:16.000Z", + "modified":"2024-11-04T15:04:22.000Z", + "api_version":"0.3", + "base_url":"https://workflowhub.eu", + "uuid":"81fea0b0-7760-013d-5d70-005056ab8eb2" + } + }, + "jsonapi":{ + "version":"1.0" + } + } + ] \ No newline at end of file diff --git a/sources/bin/tests/test_extract_galaxy_workflows.py b/sources/bin/tests/test_extract_galaxy_workflows.py new file mode 100644 index 00000000..c35b5974 --- /dev/null +++ b/sources/bin/tests/test_extract_galaxy_workflows.py @@ -0,0 +1,85 @@ +import json +import os +import unittest +from typing import ( + Any, + Dict, +) +from unittest.mock import ( + MagicMock, + patch, +) + +from bin.extract_galaxy_workflows import Workflows + +from bin import shared + + +class TestAddWorkflowsFromWorkflowHub(unittest.TestCase): + + def setUp(self) -> None: + """Set up the test environment and prepare mock data.""" + # Get the directory where the script is located + self.script_dir = os.path.dirname(os.path.realpath(__file__)) + + # Construct the path to the JSON file (relative to the script's location) + json_file_path = os.path.join(self.script_dir, "test-data", "workflowhub_api_mock.json") + + # Open and load the JSON file + with open(json_file_path, "r") as file: + self.mock_responses = json.load(file) + + # Define the side effect function + def mock_side_effect(url: str, headers: Dict[str, str]) -> Dict: + if url == "https://workflowhub.eu/workflows?filter[workflow_type]=galaxy": + return self.mock_responses[0] + elif url == "https://workflowhub.eu/workflows/1189": + return self.mock_responses[1] + elif url == "https://workflowhub.eu/workflows/1190": + return self.mock_responses[2] + else: + return {"data": []} # Default empty response for any other URL + + self.mock_side_effect = mock_side_effect # Store for reuse in tests + + # Construct the path to the JSON file (relative to the script's location) + self.test_tools_file_path = os.path.join(self.script_dir, "test-data", "test_tools.json") + + @patch("shared.get_request_json") + def test_add_workflows_from_workflowhub(self, mock_get_request_json: MagicMock) -> None: + + # Mock the first call to get_request_json to return the mock list response + mock_get_request_json.side_effect = self.mock_side_effect + + # Create the Workflows instance and invoke the method + workflows_instance = Workflows(test=True) # Set test=True to limit data processed + workflows_instance.tools = shared.read_suite_per_tool_id(self.test_tools_file_path) + workflows_instance.add_workflows_from_workflowhub() + + # Assert that the correct number of workflows were added + self.assertEqual(len(workflows_instance.workflows), 2) # Should have added all 6 workflows + + # Check details of the first added workflow as an example + first_workflow = workflows_instance.workflows[0] + self.assertEqual(first_workflow.id, "1189") + self.assertEqual(first_workflow.name, "AMR-Pathfinder") + self.assertEqual(first_workflow.link, "https://workflowhub.eu/workflows/1189?version=1") + + # Optionally check that the mock was called with the expected URLs + mock_get_request_json.assert_any_call( + "https://workflowhub.eu/workflows?filter[workflow_type]=galaxy", {"Accept": "application/json"} + ) + mock_get_request_json.assert_any_call("https://workflowhub.eu/workflows/1189", {"Accept": "application/json"}) + mock_get_request_json.assert_any_call("https://workflowhub.eu/workflows/1190", {"Accept": "application/json"}) + + self.assertEqual(mock_get_request_json.call_count, 3) # Adjust based on the expected number of calls + + # check if edam terms are transferred from the tools to the workflow + self.assertEqual( + set(first_workflow.edam_operation), set(["Antimicrobial resistance prediction", "Genome assembly"]) + ) + + +# Run the tests +if __name__ == "__main__": + unittest.main() diff --git a/sources/bin/tests/test_get_public_server.py b/sources/bin/tests/test_get_public_server.py deleted file mode 100644 index a78f3225..00000000 --- a/sources/bin/tests/test_get_public_server.py +++ /dev/null @@ -1,35 +0,0 @@ -###################################### -# Initial start for function based unit tests -# need to set this up using a proper testing framework -###################################### - -import os -import sys - -sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) - -from extract_galaxy_tools import ( - check_tools_on_servers, - USEGALAXY_SERVER_URLS, -) - -server_url = USEGALAXY_SERVER_URLS["UseGalaxy.eu"] -tool_ids = ["abricate"] - -count = check_tools_on_servers(tool_ids, server_url) -print(count) - -tool_ids = ["bla"] -count = check_tools_on_servers(tool_ids, server_url) -print(count) - -server_url = "https://jolo.eu" -tool_ids = ["bla"] -count = check_tools_on_servers(tool_ids, server_url) -print(count) - -# test the cache ! -server_url = "https://jolo.eu" -tool_ids = ["bla", "blub"] -count = check_tools_on_servers(tool_ids, server_url) -print(count) diff --git a/sources/bin/tests/test_tool_stats.py b/sources/bin/tests/test_tool_stats.py deleted file mode 100644 index 86f29b12..00000000 --- a/sources/bin/tests/test_tool_stats.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import sys -from typing import List - -import pandas as pd - -sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) - -from extract_galaxy_tools import GALAXY_TOOL_STATS - - -def get_last_url_position(toot_id: str) -> str: - """ - Returns the last url position of the toot_id, if the value is not a - url it returns the toot_id. So works for local and toolshed - installed tools. - - :param tool_id: galaxy tool id - """ - - if "/" in toot_id: - toot_id = toot_id.split("/")[-1] - return toot_id - - -def get_tool_stats_from_stats_file(tool_stats_df: pd.DataFrame, tool_ids: List[str]) -> int: - """ - Adds the usage statistics to the community tool table - - :param tools_stats_df: df with tools stats in the form `toolshed.g2.bx.psu.edu/repos/iuc/snpsift/snpSift_filter,3394539` - :tool_ids: tool ids to get statistics for and aggregate - """ - - # extract tool id - tool_stats_df["Galaxy wrapper id"] = tool_stats_df["tool_name"].apply(get_last_url_position) - # print(tool_stats_df["Galaxy wrapper id"].to_list()) - - agg_count = 0 - for tool_id in tool_ids: - if tool_id in tool_stats_df["Galaxy wrapper id"].to_list(): - - # get stats of the tool for all versions - counts = tool_stats_df.loc[(tool_stats_df["Galaxy wrapper id"] == tool_id), "count"] - agg_versions = counts.sum() - - # aggregate all counts for all tools in the suite - agg_count += agg_versions - - return int(agg_count) - - -tools_stats_df = pd.read_csv(GALAXY_TOOL_STATS["Total tool usage (usegalaxy.eu)"]) - -tool_ids = ["nextclade"] - -counts = get_tool_stats_from_stats_file(tools_stats_df, tool_ids) - -print(counts)