diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8092f270..1ac9ba25 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: uses: actions/checkout@v3 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ed60a41..345c3250 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,41 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[2.1.0](https://github.com/sanger-tol/genomenote/releases/tag/2.1.0)] - Pembroke Welsh Corgi [2024-12-11] + +### Enhancements & fixes + +- New annotation_statistics subworkfow which runs BUSCO in protein mode and generates some basic statistics on the the annotated gene set if provided with a GFF3 file of gene annotations using the `--annotation_set` option. +- The genome_metadata subworkflow now queries Ensembl's GraphQL API to determine if Ensembl has released gene annotation for the assembly being processed. +- Module updates and remove Anaconda channels +- Removed merquryfk completeness metric + +### Parameters + +| Old parameter | New parameter | +| ------------- | ---------------- | +| | --annotation_set | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present.
**NB:** Parameter has been **added** if just the new parameter information is present.
**NB:** Parameter has been **removed** if new parameter information isn't present. + +### Software dependencies + +Note, since the pipeline is using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. Only `Docker` or `Singularity` containers are supported, `conda` is not supported. + +| Dependency | Old version | New version | +| ----------- | ---------------------------------------- | ---------------------------------------- | +| `agat` | | 1.4.0 | +| `bedtools` | 2.30.0 | 2.31.1 | +| `busco` | 5.5.0 | 5.7.1 | +| `cooler` | 0.8.11 | 0.9.2 | +| `fastk` | 427104ea91c78c3b8b8b49f1a7d6bbeaa869ba1c | 666652151335353eef2fcd58880bcef5bc2928e1 | +| `gffread` | | 0.12.7 | +| `merquryfk` | d00d98157618f4e8d1a9190026b19b471055b22e | | +| `multiqc` | 1.14 | 1.25.1 | +| `samtools` | 1.17 | 1.21 | + +> **NB:** Dependency has been **updated** if both old and new version information is present.
**NB:** Dependency has been **added** if just the new version information is present.
**NB:** Dependency has been **removed** if version information isn't present. + ## [[2.0.0](https://github.com/sanger-tol/genomenote/releases/tag/2.0.0)] - English Cocker Spaniel [2024-10-10] ### Enhancements & fixes diff --git a/CITATION.cff b/CITATION.cff index 969f1a7c..b619d6fe 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -8,8 +8,8 @@ message: >- metadata from this file. type: software authors: - - given-names: Sandra - family-names: Babiyre + - given-names: Sandra Ruth + family-names: Babirye affiliation: Wellcome Sanger Institute orcid: "https://orcid.org/0009-0004-7773-7008" - given-names: Tyler diff --git a/CITATIONS.md b/CITATIONS.md index 8bff25a0..6bb367a9 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,6 +12,10 @@ ## Pipeline tools +- [AGAT](https://github.com/NBISweden/AGAT) + + > Dainat J. AGAT: Another Gff Analysis Toolkit to handle annotations in any GTF/GFF format. (Version v1.4.0). Zenodo. https://www.doi.org/10.5281/zenodo.3552717 + - [BedTools](https://bedtools.readthedocs.io/en/latest/) > Quinlan, Aaron R., and Ira M. Hall. “BEDTools: A Flexible Suite of Utilities for Comparing Genomic Features.” Bioinformatics, vol. 26, no. 6, 2010, pp. 841–842., https://doi.org/10.1093/bioinformatics/btq033. @@ -30,6 +34,10 @@ - [FastK](https://github.com/thegenemyers/FASTK) +- [GFFREAD](https://github.com/gpertea/gffread) + + > Pertea G and Pertea M. "GFF Utilities: GffRead and GffCompare [version 1; peer review: 3 approved]". F1000Research 2020, 9:304 https://doi.org/10.12688/f1000research.23297.1 + - [MerquryFK](https://github.com/thegenemyers/MERQURY.FK) - [MultiQC](https://multiqc.info) @@ -48,9 +56,9 @@ ## Software packaging/containerisation tools -- [Anaconda](https://anaconda.com) +- [Conda](https://conda.org/) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + > conda contributors. conda: A system-level, binary package and environment manager running on all major operating systems and platforms. Computer software. https://github.com/conda/conda - [Bioconda](https://bioconda.github.io) diff --git a/README.md b/README.md index 7b324815..4f6b10dd 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7949384-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7949384) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=conda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/sanger-tol/genomenote) @@ -13,7 +13,7 @@ ## Introduction -**sanger-tol/genomenote** is a bioinformatics pipeline that takes aligned HiC reads, creates contact maps and chromosomal grid using Cooler, and display on a [HiGlass server](https://genome-note-higlass.tol.sanger.ac.uk/app). The pipeline also collates (1) assembly information, statistics and chromosome details from NCBI datasets, (2) genome completeness from BUSCO, (3) consensus quality and k-mer completeness from MerquryFK, and (4) HiC primary mapped percentage from samtools flagstat. +**sanger-tol/genomenote** is a bioinformatics pipeline that takes aligned HiC reads, creates contact maps and chromosomal grid using Cooler, and display on a [HiGlass server](https://genome-note-higlass.tol.sanger.ac.uk/app). The pipeline also collates (1) assembly information, statistics and chromosome details from NCBI datasets, (2) genome completeness from BUSCO, (3) consensus quality and k-mer completeness from MerquryFK, (4) HiC primary mapped percentage from samtools flagstat and optionally (5) Annotation statistics from AGAT and BUSCO. The pipeline combines the calculated statistics and collated assembly metadata with a template document to output a genome note document. @@ -25,7 +25,9 @@ 6. Genome completeness ([`NCBI API`](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/reference-docs/rest-api/), [`BUSCO`](https://busco.ezlab.org)) 7. Consensus quality and k-mer completeness ([`FASTK`](https://github.com/thegenemyers/FASTK), [`MERQURY.FK`](https://github.com/thegenemyers/MERQURY.FK)) 8. Collated summary table ([`createtable`](bin/create_table.py)) -9. Present results and visualisations ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/)) +9. Optionally calculates some annotation statistics and completeness , ([`AGAT`](https://github.com/NBISweden/AGAT), [`BUSCO`](https://busco.ezlab.org)) +10. Combines calculated statisics and assembly metadata with a template file to produce a genome note document. +11. Present results and visualisations ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/)) ## Usage diff --git a/assets/genome_note_template.docx b/assets/genome_note_template.docx index 15b1f94c..d1f6e68d 100644 Binary files a/assets/genome_note_template.docx and b/assets/genome_note_template.docx differ diff --git a/bin/combine_parsed_data.py b/bin/combine_parsed_data.py index fc82bfb6..5e22d833 100755 --- a/bin/combine_parsed_data.py +++ b/bin/combine_parsed_data.py @@ -21,6 +21,7 @@ ("COPO_BIOSAMPLE_HIC", "copo_biosample_hic_file"), ("COPO_BIOSAMPLE_RNA", "copo_biosample_rna_file"), ("GBIF_TAXONOMY", "gbif_taxonomy_file"), + ("ENSEMBL_ANNOTATION", "ensembl_annotation_file"), ] @@ -42,6 +43,7 @@ def parse_args(args=None): parser.add_argument("--copo_biosample_hic_file", help="Input parsed COPO HiC biosample file.", required=False) parser.add_argument("--copo_biosample_rna_file", help="Input parsed COPO RNASeq biosample file.", required=False) parser.add_argument("--gbif_taxonomy_file", help="Input parsed GBIF taxonomy file.", required=False) + parser.add_argument("--ensembl_annotation_file", help="Input parsed Ensembl annotation file.", required=False) parser.add_argument("--out_consistent", help="Output file.", required=True) parser.add_argument("--out_inconsistent", help="Output file.", required=True) parser.add_argument("--version", action="version", version="%(prog)s 1.0") diff --git a/bin/combine_statistics_data.py b/bin/combine_statistics_data.py index 2e48c1dc..5517169b 100755 --- a/bin/combine_statistics_data.py +++ b/bin/combine_statistics_data.py @@ -8,7 +8,8 @@ files = [ ("CONSISTENT", "in_consistent"), - ("STATISITCS", "in_statistics"), + ("GENOME_STATISTICS", "in_genome_statistics"), + ("ANNOTATION_STATISITCS", "in_annotation_statistics"), ] @@ -19,7 +20,13 @@ def parse_args(args=None): parser = argparse.ArgumentParser(description=Description, epilog=Epilog) parser.add_argument("--in_consistent", help="Input consistent params file.", required=True) parser.add_argument("--in_inconsistent", help="Input consistent params file.", required=True) - parser.add_argument("--in_statistics", help="Input parsed genome statistics params file.", required=True) + parser.add_argument("--in_genome_statistics", help="Input parsed genome statistics params file.", required=True) + parser.add_argument( + "--in_annotation_statistics", + help="Input parsed annotation statistics params file.", + required=False, + default=None, + ) parser.add_argument("--out_consistent", help="Output file.", required=True) parser.add_argument("--out_inconsistent", help="Output file.", required=True) parser.add_argument("--version", action="version", version="%(prog)s 1.0") @@ -36,7 +43,7 @@ def process_file(file_in, file_type, params, param_sets): reader = csv.reader(infile) for row in reader: - if row[0] == "#paramName": + if row[0].startswith("#"): continue key = row.pop(0) @@ -95,7 +102,10 @@ def main(args=None): params_inconsistent = {} for file in files: - (params, param_sets) = process_file(getattr(args, file[1]), file[0], params, param_sets) + if file[0] == "ANNOTATION_STATISITCS" and args.in_annotation_statistics == None: + continue + else: + (params, param_sets) = process_file(getattr(args, file[1]), file[0], params, param_sets) for key in params.keys(): value_set = {v for v in params[key]} diff --git a/bin/extract_annotation_statistics_info.py b/bin/extract_annotation_statistics_info.py new file mode 100755 index 00000000..a4f2b964 --- /dev/null +++ b/bin/extract_annotation_statistics_info.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +import re +import csv +import sys +import argparse +import json + + +# Extract CDS information from mrna and transcript sections +def extract_cds_info(file): + # Define regex patterns for different statistics + patterns = { + "TRANSC_MRNA": re.compile(r"Number of mrna\s+(\d+)"), + "PCG": re.compile(r"Number of gene\s+(\d+)"), + "CDS_PER_GENE": re.compile(r"mean mrnas per gene\s+([\d.]+)"), + "EXONS_PER_TRANSC": re.compile(r"mean exons per mrna\s+([\d.]+)"), + "CDS_LENGTH": re.compile(r"mean mrna length \(bp\)\s+([\d.]+)"), + "EXON_SIZE": re.compile(r"mean exon length \(bp\)\s+([\d.]+)"), + "INTRON_SIZE": re.compile(r"mean intron in cds length \(bp\)\s+([\d.]+)"), + } + + # Initialize a dictionary to store content for different sections + section_content = {"mrna": "", "transcript": ""} + + # Variable to keep track of the current section being processed + current_section = None + + with open(file, "r") as f: + lines = f.read().splitlines() # read all lines in the file + + for line in lines: + line = line.strip() # Remove any leading/trailing whitespace including newline characters + + if "---------------------------------- mrna ----------------------------------" in line: + current_section = "mrna" # Switch to 'mrna' section + elif "---------------------------------- transcript ----------------------------------" in line: + current_section = "transcript" # Switch to 'transcript' section + elif "----------" in line: + current_section = None # End of current section + elif current_section: + section_content[current_section] += ( + line + " " + ) # Accumulate content for the current section, separate lines by a space + + cds_info = {} + + for label, pattern in patterns.items(): + text_to_search = section_content["mrna"] if label != "EXONS_PER_TRANSC" else section_content["transcript"] + match = re.search(pattern, text_to_search) + if match: + cds_info[label] = match.group(1) + + return cds_info + + +# Function to extract the number of non-coding genes from the second file +def extract_non_coding_genes(file): + non_coding_genes = {"ncrna_gene": 0} + + with open(file, "r") as f: + for line in f: + parts = line.split() + if len(parts) < 2: + continue + + gene_type = parts[0] + try: + count = int(parts[1]) + except ValueError: + continue + + if gene_type in non_coding_genes: + non_coding_genes[gene_type] += count + + NCG = sum(non_coding_genes.values()) + return {"NCG": NCG} + + +# Extract the one_line_summary from a BUSCO JSON file +def extract_busco_results(busco_stats_file): + try: + with open(busco_stats_file, "r") as file: + busco_data = json.load(file) + # Extract the one_line_summary from the results section + one_line_summary = busco_data.get("results", {}).get("one_line_summary") + if one_line_summary: + # Use regex to extract everything after the first colon + match = re.search(r':\s*"(.*)"', one_line_summary) + if match: + one_line_summary = match.group(1) # Get text after the colon + return {"BUSCO_PROTEIN_SCORES": one_line_summary} if one_line_summary else {} + except (json.JSONDecodeError, FileNotFoundError) as e: + print(f"Error loading BUSCO JSON file: {e}") + return {} + + +# Function to write the extracted data to a CSV file +def write_to_csv(data, output_file, busco_stats_file): + busco_results = extract_busco_results(busco_stats_file) + + descriptions = { + "TRANSC_MRNA": "The number of transcribed mRNAs", + "PCG": "The number of protein coding genes", + "NCG": "The number of non-coding genes", + "CDS_PER_GENE": "The average number of coding transcripts per gene", + "EXONS_PER_TRANSC": "The average number of exons per transcript", + "CDS_LENGTH": "The average length of coding sequence", + "EXON_SIZE": "The average length of a coding exon", + "INTRON_SIZE": "The average length of coding intron size", + "BUSCO_PROTEIN_SCORES": "BUSCO results summary from running BUSCO in protein mode", + } + + with open(output_file, "w", newline="") as csvfile: + writer = csv.writer(csvfile) + + # Write descriptions at the top of the CSV file + for key, description in descriptions.items(): + csvfile.write(f"# {key}: {description}\n") + + # Write the Variable and Value columns header + writer.writerow(["#paramName", "paramValue"]) + + # Write the data + for key, value in data.items(): + writer.writerow([key, value]) + + # Add the BUSCO results summary + for key, value in busco_results.items(): + writer.writerow([key, value]) + + +# Main function to take input files and output file as arguments +def main(): + Description = "Parse contents of the agat_spstatistics, buscoproteins and agat_sqstatbasic to extract relevant annotation statistics information." + Epilog = ( + "Example usage: python extract_annotation_statistics_info.py " + ) + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("basic_stats", help="Input txt file with basic_feature_statistics.") + parser.add_argument("other_stats", help="Input txt file with other_feature_statistics.") + parser.add_argument("busco_stats", help="Input JSON file for the BUSCO statistics.") + parser.add_argument("output", help="Output file.") + parser.add_argument("--version", action="version", version="%(prog)s 1.0") + args = parser.parse_args() + + cds_info = extract_cds_info(args.other_stats) + non_coding_genes = extract_non_coding_genes(args.basic_stats) + data = {**cds_info, **non_coding_genes} + write_to_csv(data, args.output, args.busco_stats) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/fetch_ensembl_metadata.py b/bin/fetch_ensembl_metadata.py new file mode 100755 index 00000000..da6c87e1 --- /dev/null +++ b/bin/fetch_ensembl_metadata.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 + +import os +import sys +import requests +import argparse + + +def parse_args(args=None): + Description = "Query the Ensembl Metadata API to pull out annotation information required by a genome note." + Epilog = "Example usage: python fetch_ensembl_metadata.py --taxon_id --output" + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("--taxon_id", required=True, help="The species taxon id") + parser.add_argument("--output", required=True, help="Output file path") + parser.add_argument("--version", action="version", version="%(prog)s 1.0") + + return parser.parse_args() + + +def make_dir(path): + if len(path) > 0: + os.makedirs(path, exist_ok=True) + + +def fetch_ensembl_data(taxon, output_file): + # Use the species taxon_id to query the Ensembl Metadata API to determine if the + # species has been annotated. Return assmbly accesssion of annotated data and + # a url linking to that species on the Ensembl Rapid website + + url = "https://beta.ensembl.org/data/graphql" + variables = {"taxon": taxon} + query = """ + query Annotation($taxon: String) + { + genomes(by_keyword: {species_taxonomy_id: $taxon }) { + assembly_accession + scientific_name + tol_id + dataset { + name + type + dataset_type + } + genome_id + } + } + """ + response = requests.post(url=url, json={"query": query, "variables": variables}) + + if response.status_code == 200: + param_list = [] + data = response.json() + if data["data"]["genomes"] is not None: + genomes = data["data"]["genomes"][0] + + if genomes["assembly_accession"]: + accession = genomes["assembly_accession"] + acc = f'"{accession}"' + param_list.append(("ANNOT_ACCESSION", acc)) + species_id = genomes["genome_id"] + annot_url = f"https://beta.ensembl.org/species/{species_id}" + annot_url = f'"{annot_url}"' + param_list.append(("ANNOT_URL", annot_url)) + + # Write out file even if there is no annotation data to write + out_dir = os.path.dirname(output_file) + make_dir(out_dir) # Create directory if it does not exist + + with open(output_file, "w") as fout: + # Write header + fout.write(",".join(["#paramName", "paramValue"]) + "\n") + + for param_pair in param_list: + fout.write(",".join(param_pair) + "\n") + + return output_file + + +def main(args=None): + args = parse_args(args) + fetch_ensembl_data(args.taxon_id, args.output) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/fetch_gbif_metadata.py b/bin/fetch_gbif_metadata.py index ccac54cd..4a47aa27 100755 --- a/bin/fetch_gbif_metadata.py +++ b/bin/fetch_gbif_metadata.py @@ -7,8 +7,8 @@ def parse_args(args=None): - Description = "Parse contents of an ENA Taxonomy report and pull out metadata required by a genome note." - Epilog = "Example usage: python fetch_gbif_metadata.py --genus --species --output" + Description = "Query GBIF for species taxonomy information and pull out metadata required by a genome note." + Epilog = "Example usage: python fetch_gbif_metadata.py --species --output" parser = argparse.ArgumentParser(description=Description, epilog=Epilog) parser.add_argument("--species", required=True, help="The species name") diff --git a/conf/base.config b/conf/base.config index 66bf7196..4758d678 100644 --- a/conf/base.config +++ b/conf/base.config @@ -86,4 +86,16 @@ process { withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } + withName: AGAT_SQSTATBASIC { + memory = { check_max( 500.MB * task.attempt, 'memory' ) } + } + + withName: AGAT_SPSTATISTICS { + memory = { check_max( 3.GB * task.attempt, 'memory' ) } + } + withName: BUSCOPROTEINS { + memory = { check_max( 4.GB * task.attempt, 'memory' ) } + cpus = { log_increase_cpus(2, 2*task.attempt, 1, 2) } + time = { check_max( 2.h * task.attempt, 'time') } + } } diff --git a/conf/modules.config b/conf/modules.config index c9c3aa9c..342574a4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -55,8 +55,8 @@ process { ext.args = { 'test' in workflow.profile.tokenize(',') ? // Additional configuration to speed processes up during testing. // Note: BUSCO *must* see the double-quotes around the parameters - '--mode genome --tar --metaeuk_parameters \'"-s=2"\' --metaeuk_rerun_parameters \'"-s=2"\'' - : '--mode genome --tar' } + '--tar --metaeuk --metaeuk_parameters \'"-s=2"\' --metaeuk_rerun_parameters \'"-s=2"\'' + : '--tar --metaeuk ' } } withName: "RESTRUCTUREBUSCODIR" { @@ -106,7 +106,7 @@ process { ] } - withName: COMBINE_STATISTICS_AND_METADATA { + withName: COMBINE_STATISTICS_AND_METADATA { publishDir = [ path: { "${params.outdir}/genome_note" }, mode: params.publish_dir_mode, @@ -114,7 +114,7 @@ process { ] } - withName: POPULATE_TEMPLATE { + withName: POPULATE_TEMPLATE { memory = { check_max( 100.MB * task.attempt, 'memory' ) } publishDir = [ path: { "${params.outdir}/genome_note" }, @@ -123,4 +123,37 @@ process { ] } + withName: GENERATE_HIGLASS_LINK { + publishDir = [ + path: { "${params.outdir}/contact_maps" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: AGAT_SQSTATBASIC { + ext.prefix = { "${meta.id}_sqstats" } + } + + withName: AGAT_SPSTATISTICS { + ext.prefix = { "${meta.id}_spstats" } + } + + withName: EXTRACT_ANNOTATION_STATISTICS_INFO { + publishDir = [ + path: { "${params.outdir}/gene" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: GFFREAD { + ext.args = "-y" + } + + withName: BUSCOPROTEINS { + scratch = { !params.use_work_dir_as_temp || (meta.genome_size < 2000000000) } + ext.args = { '--tar'} + } + } diff --git a/conf/test.config b/conf/test.config index a8591f5e..378f628c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,6 +22,9 @@ params { // Input data input = "${projectDir}/assets/samplesheet.csv" + // Annotation file + annotation_set = "https://tolit.cog.sanger.ac.uk/test-data/Ceramica_pisi/annotation/Ceramica_pisi-GCA_963859965.1-2024_04-genes.gff3.gz" + // Fasta references fasta = "https://tolit.cog.sanger.ac.uk/test-data/Ceramica_pisi/assembly/release/ilCerPisi1.1/insdc/GCA_963859965.1.fasta.gz" diff --git a/conf/test_full.config b/conf/test_full.config index 3eace25e..c746a6ae 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -10,8 +10,6 @@ ---------------------------------------------------------------------------------------- */ -cleanup = true - params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' @@ -19,6 +17,9 @@ params { // Input data for full size test input = "${projectDir}/assets/samplesheet_full.csv" + // Annotation file + annotation_set = "https://tolit.cog.sanger.ac.uk/test-data/Ypsolopha_sequella/annotation/GCA_934047225.1.braker.2022_09.gff3.gz" + // Fasta references fasta = "/lustre/scratch124/tol/projects/darwin/data/insects/Ypsolopha_sequella/assembly/release/ilYpsSequ2.1/insdc/GCA_934047225.1.fasta.gz" diff --git a/docs/output.md b/docs/output.md index f742e4cc..4920f6d5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -12,6 +12,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Contact maps](#contact-maps) – Contact matrix created using HiC sequencing data - [Genome statistics](#genome-statistics) – Collated assembly information, genome statistics and alignment quality information +- [Annotation statistics](#annotation-statistics) - Statistics calculated on the annotated protein set for the assembly (if GFF annotation file is provided as input) - [BUSCO](#busco) - BUSCO results - [MultiQC](#multiqc) - Aggregate report describing results from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution @@ -38,13 +39,24 @@ This pipeline collates (1) assembly information, statistics and chromosome detai Output files - `genome_note/` - - `.csv`: collate genome statistics file + - `.csv`: collated genome statistics file - `.{docx|xml}`: partially completed genome note template file - `_genome_note_consistent.csv`: a file of genome metadata parameters pulled from various public data repositories where all source agree on the paramter value. - `_genome_note_inconsistent.csv`: a file of genome metadata parameters, and their sources pulled from various public data repositories where the paramter value differs between data sources. +### Annotation statistics + +This pipeline can generate some statistics using AGAT and a BUSCO completeness score on the assembly annotation if a GFF file of protein annotations is given as input. This file should be a GFF3 format file describing the annotated protein set. + + +Output files + +- `gene/` -`_stats.csv`: collated annotation statistics file + + + ### BUSCO BUSCO results generated by the pipeline (all BUSCO lineages that match the claassification of the species). diff --git a/docs/usage.md b/docs/usage.md index 529ec0a3..f8baa6ee 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -13,18 +13,27 @@ These typically include: 1. Assembly metadata from COPO, ENA, GoaT, GBIF and NCBI 2. Assembly information, statistics and chromosome details from NCBI datasets. 3. Genome completeness from BUSCO. -4. Consensus quality and k-mer completeness from MerquryFK - when high-quality reads are available. -5. Hi-C contact map and chromosomal grid using Cooler, as well as primary mapped percentage from samtools flagstat - when Hi-C reads are provided. These files can be displayed on a [HiGlass](http://higlass.io) server, like the one use by the [Sanger Institute](https://genome-note-higlass.tol.sanger.ac.uk/app). +4. Annotation statistics from AGAT and completeness from BUSCO. +5. Consensus quality and k-mer completeness from MerquryFK - when high-quality reads are available. +6. Hi-C contact map and chromosomal grid using Cooler, as well as primary mapped percentage from samtools flagstat - when Hi-C reads are provided. These files can be displayed on a [HiGlass](http://higlass.io) server, like the one use by the [Sanger Institute](https://genome-note-higlass.tol.sanger.ac.uk/app). ## Genome metadata input You will need to supply the assembly accession for the genome you would like to analyse along with the biosample acession(s) linked to this genome assembly. ```bash - --assembly '[assembly accession]' - --biosample_wgs '[biosample accession of the biosample used to produce the genomic sequence]' - --biosample_hic '[biosample accession of the biosample used to produce the HiC data]' - --biosample_rna '[biosample accession of the biosample used to produce the RNASeq data] +--assembly '[assembly accession]' +--biosample_wgs '[biosample accession of the biosample used to produce the genomic sequence]' +--biosample_hic '[biosample accession of the biosample used to produce the HiC data]' +--biosample_rna '[biosample accession of the biosample used to produce the RNASeq data] +``` + +## Annotation input + +If you want to generate statistics on the geneset annotated for the assembly you will need to supply a GFF3 file of the predicted gene sequences. The assembly region names used in this file must match the assembly regions names used in the assembly fasta file provided with --fasta + +```bash +--annotation_set '[Path to annotation file :gff] ``` ## Samplesheet input diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 8d030f4e..13cb02df 100755 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -22,7 +22,7 @@ class Utils { // Check that all channels are present // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def required_channels_in_order = ['conda-forge', 'bioconda'] def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order diff --git a/modules.json b/modules.json index 67875851..05680e71 100644 --- a/modules.json +++ b/modules.json @@ -5,67 +5,76 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "agat/spstatistics": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, + "agat/sqstatbasic": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "bedtools/bamtobed": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, - "busco": { + "busco/busco": { "branch": "master", - "git_sha": "e3126f437c336c826f242842fe51769cfce0ec2d", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], - "patch": "modules/nf-core/busco/busco.diff" + "patch": "modules/nf-core/busco/busco/busco-busco.diff" }, "cooler/cload": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "cooler/dump": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "cooler/zoomify": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "fastk/fastk": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/fastk/fastk/fastk-fastk.diff" }, - "gnu/sort": { + "gffread": { "branch": "master", - "git_sha": "88f6e982fb8bd40488d837b3b08a65008e602840", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, - "gunzip": { + "gnu/sort": { "branch": "master", - "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, - "merquryfk/merquryfk": { + "gunzip": { "branch": "master", - "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"], - "patch": "modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff" + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", "installed_by": ["modules"] } } diff --git a/modules/local/combine_statistics_and_metadata.nf b/modules/local/combine_statistics_and_metadata.nf index 05b03ace..be31568e 100644 --- a/modules/local/combine_statistics_and_metadata.nf +++ b/modules/local/combine_statistics_and_metadata.nf @@ -10,7 +10,8 @@ process COMBINE_STATISTICS_AND_METADATA { input: tuple val(meta), path(consistent_params) tuple val(meta2), path(inconsistent_params) - tuple val(meta3), path(statistics_params) + tuple val(meta3), path(genome_statistics_params) + tuple val(meta4), path(annotation_statistics_params) output: tuple val (meta), path("${meta.id}_genome_note_consistent.csv") , emit: consistent @@ -22,14 +23,16 @@ process COMBINE_STATISTICS_AND_METADATA { script: def prefix = task.ext.prefix ?: meta.id + def annotation = annotation_statistics_params ? "--in_annotation_statistics $annotation_statistics_params" : '' """ combine_statistics_data.py \\ --in_consistent $consistent_params \\ --in_inconsistent $inconsistent_params \\ - --in_statistics $statistics_params \\ + --in_genome_statistics $genome_statistics_params \\ --out_consistent ${prefix}_genome_note_consistent.csv \\ --out_inconsistent ${prefix}_genome_note_inconsistent.csv \\ + $annotation \\ cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/extract_annotation_statistics_info.nf b/modules/local/extract_annotation_statistics_info.nf new file mode 100644 index 00000000..f87bfd1f --- /dev/null +++ b/modules/local/extract_annotation_statistics_info.nf @@ -0,0 +1,38 @@ +// EXtracting essential annotation statistics information from the output txt files +process EXTRACT_ANNOTATION_STATISTICS_INFO{ + tag "${meta.id}" + label 'process_single' + conda "conda-forge::python=3.9.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9--1' : + 'quay.io/biocontainers/python:3.9--1' }" + input: + tuple val(meta), path(basic_stats) + tuple val(meta2), path(other_stats) + tuple val(meta3), path(busco_stats) + + output: + tuple val (meta), path("*.csv") , emit: csv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: meta.filename + def output_file = "${prefix}.stats.csv" + """ + + extract_annotation_statistics_info.py \\ + $basic_stats \\ + $other_stats \\ + $busco_stats \\ + $output_file + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} + diff --git a/modules/local/fetch_ensembl_metadata.nf b/modules/local/fetch_ensembl_metadata.nf new file mode 100644 index 00000000..d48b0540 --- /dev/null +++ b/modules/local/fetch_ensembl_metadata.nf @@ -0,0 +1,34 @@ +process FETCH_ENSEMBL_METADATA { + tag "$assembly" + label 'process_single' + + conda "conda-forge::python=3.9.1" + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/requests:2.26.0': + 'quay.io/biocontainers/requests:2.26.0'}" + + input: + tuple val(assembly), val(taxon_id) + + + output: + path "*.csv", emit: file_path + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def script_name = "fetch_ensembl_metadata.py" + def output_file = "${assembly}_ensembl_annotation.csv" + + """ + $script_name --taxon_id $taxon_id --output $output_file + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fetch_ensembl_metadata.py: \$(fetch_ensembl_metadata.py --version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/nf-core/busco/environment.yml b/modules/nf-core/agat/spstatistics/environment.yml similarity index 50% rename from modules/nf-core/busco/environment.yml rename to modules/nf-core/agat/spstatistics/environment.yml index f872d057..0410ee76 100644 --- a/modules/nf-core/busco/environment.yml +++ b/modules/nf-core/agat/spstatistics/environment.yml @@ -1,7 +1,5 @@ -name: busco channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::busco=5.5.0 + - bioconda::agat=1.4.0 diff --git a/modules/nf-core/agat/spstatistics/main.nf b/modules/nf-core/agat/spstatistics/main.nf new file mode 100644 index 00000000..fd69d9c5 --- /dev/null +++ b/modules/nf-core/agat/spstatistics/main.nf @@ -0,0 +1,46 @@ +process AGAT_SPSTATISTICS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/agat:1.4.0--pl5321hdfd78af_0' : + 'biocontainers/agat:1.4.0--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(gff) + + output: + tuple val(meta), path("*.txt"), emit: stats_txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + agat_sp_statistics.pl \\ + --gff $gff \\ + --output ${prefix}.stats.txt \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_sp_statistics.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_sp_statistics.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/nf-core/agat/spstatistics/meta.yml b/modules/nf-core/agat/spstatistics/meta.yml new file mode 100644 index 00000000..a2f897b8 --- /dev/null +++ b/modules/nf-core/agat/spstatistics/meta.yml @@ -0,0 +1,48 @@ +name: agat_spstatistics +description: | + Provides different type of statistics in text format from a GFF/GTF annotation file +keywords: + - genome + - gff + - gtf + - statistics +tools: + - agat: + description: "AGAT is a toolkit for manipulation and getting information from + GFF/GTF files" + homepage: "https://github.com/NBISweden/AGAT" + documentation: "https://agat.readthedocs.io/" + tool_dev_url: "https://github.com/NBISweden/AGAT" + doi: "10.5281/zenodo.3552717" + licence: ["GPL v3"] + identifier: biotools:AGAT +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gff: + type: file + description: Annotation file in GFF3/GTF format + pattern: "*.{gff, gtf}" +output: + - stats_txt: + - meta: + type: file + description: Output of Statistics execution + pattern: "*.{txt}" + - "*.txt": + type: file + description: Output of Statistics execution + pattern: "*.{txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@toniher" +maintainers: + - "@toniher" + - "@gallvp" diff --git a/modules/nf-core/agat/spstatistics/tests/main.nf.test b/modules/nf-core/agat/spstatistics/tests/main.nf.test new file mode 100644 index 00000000..ef867576 --- /dev/null +++ b/modules/nf-core/agat/spstatistics/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process AGAT_SPSTATISTICS" + script "../main.nf" + process "AGAT_SPSTATISTICS" + + tag "modules" + tag "modules_nfcore" + tag "agat" + tag "agat/spstatistics" + + test("sarscov2 genome [gff]") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 genome [gff] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.stats_txt.collect { file(it[1]).getName() } + + process.out.versions ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/agat/spstatistics/tests/main.nf.test.snap b/modules/nf-core/agat/spstatistics/tests/main.nf.test.snap new file mode 100644 index 00000000..41f91740 --- /dev/null +++ b/modules/nf-core/agat/spstatistics/tests/main.nf.test.snap @@ -0,0 +1,48 @@ +{ + "sarscov2 genome [gff] - stub": { + "content": [ + [ + "test.stats.txt", + "versions.yml:md5,33c0e3094afa9dabe8f773db494bceb9" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T10:14:09.802927" + }, + "sarscov2 genome [gff]": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.stats.txt:md5,0a27bb5a911615023decd8223ba67812" + ] + ], + "1": [ + "versions.yml:md5,33c0e3094afa9dabe8f773db494bceb9" + ], + "stats_txt": [ + [ + { + "id": "test" + }, + "test.stats.txt:md5,0a27bb5a911615023decd8223ba67812" + ] + ], + "versions": [ + "versions.yml:md5,33c0e3094afa9dabe8f773db494bceb9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T10:14:05.328436" + } +} \ No newline at end of file diff --git a/modules/nf-core/agat/spstatistics/tests/tags.yml b/modules/nf-core/agat/spstatistics/tests/tags.yml new file mode 100644 index 00000000..6cf47ac6 --- /dev/null +++ b/modules/nf-core/agat/spstatistics/tests/tags.yml @@ -0,0 +1,2 @@ +agat/spstatistics: + - "modules/nf-core/agat/spstatistics/**" diff --git a/modules/nf-core/agat/sqstatbasic/environment.yml b/modules/nf-core/agat/sqstatbasic/environment.yml new file mode 100644 index 00000000..0410ee76 --- /dev/null +++ b/modules/nf-core/agat/sqstatbasic/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::agat=1.4.0 diff --git a/modules/nf-core/agat/sqstatbasic/main.nf b/modules/nf-core/agat/sqstatbasic/main.nf new file mode 100644 index 00000000..d9b98c6b --- /dev/null +++ b/modules/nf-core/agat/sqstatbasic/main.nf @@ -0,0 +1,46 @@ +process AGAT_SQSTATBASIC { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/agat:1.4.0--pl5321hdfd78af_0' : + 'biocontainers/agat:1.4.0--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(gff) + + output: + tuple val(meta), path("*.txt"), emit: stats_txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + agat_sq_stat_basic.pl \\ + -i $gff \\ + --output ${prefix}.stats.txt \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_sq_stat_basic.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_sq_stat_basic.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/nf-core/agat/sqstatbasic/meta.yml b/modules/nf-core/agat/sqstatbasic/meta.yml new file mode 100644 index 00000000..b813a4a8 --- /dev/null +++ b/modules/nf-core/agat/sqstatbasic/meta.yml @@ -0,0 +1,48 @@ +name: agat_sqstatbasic +description: | + Provides basic statistics in text format from a GFF/GTF annotation file +keywords: + - genome + - gff + - gtf + - statistics +tools: + - agat: + description: "AGAT is a toolkit for manipulation and getting information from + GFF/GTF files" + homepage: "https://github.com/NBISweden/AGAT" + documentation: "https://agat.readthedocs.io/" + tool_dev_url: "https://github.com/NBISweden/AGAT" + doi: "10.5281/zenodo.3552717" + licence: ["GPL v3"] + identifier: biotools:AGAT +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gff: + type: file + description: Annotation file in GFF3/GTF format + pattern: "*.{gff, gtf}" +output: + - stats_txt: + - meta: + type: file + description: Output of Statistics execution + pattern: "*.{txt}" + - "*.txt": + type: file + description: Output of Statistics execution + pattern: "*.{txt}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@toniher" +maintainers: + - "@toniher" + - "@gallvp" diff --git a/modules/nf-core/agat/sqstatbasic/tests/main.nf.test b/modules/nf-core/agat/sqstatbasic/tests/main.nf.test new file mode 100644 index 00000000..4803f51e --- /dev/null +++ b/modules/nf-core/agat/sqstatbasic/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process AGAT_SQSTATBASIC" + script "../main.nf" + process "AGAT_SQSTATBASIC" + + tag "modules" + tag "modules_nfcore" + tag "agat" + tag "agat/sqstatbasic" + + test("sarscov2 - genome [gff3]") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - genome [gff3] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.stats_txt.collect { file(it[1]).getName() } + + process.out.versions ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/agat/sqstatbasic/tests/main.nf.test.snap b/modules/nf-core/agat/sqstatbasic/tests/main.nf.test.snap new file mode 100644 index 00000000..10b75f48 --- /dev/null +++ b/modules/nf-core/agat/sqstatbasic/tests/main.nf.test.snap @@ -0,0 +1,48 @@ +{ + "sarscov2 - genome [gff3] - stub": { + "content": [ + [ + "test.stats.txt", + "versions.yml:md5,b5447c68fa61b8a04fdd7a95c4489775" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T10:16:10.187496" + }, + "sarscov2 - genome [gff3]": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.stats.txt:md5,a41a0a9cfba3ed7cc1aff28815843c78" + ] + ], + "1": [ + "versions.yml:md5,b5447c68fa61b8a04fdd7a95c4489775" + ], + "stats_txt": [ + [ + { + "id": "test" + }, + "test.stats.txt:md5,a41a0a9cfba3ed7cc1aff28815843c78" + ] + ], + "versions": [ + "versions.yml:md5,b5447c68fa61b8a04fdd7a95c4489775" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T10:16:05.681598" + } +} \ No newline at end of file diff --git a/modules/nf-core/agat/sqstatbasic/tests/tags.yml b/modules/nf-core/agat/sqstatbasic/tests/tags.yml new file mode 100644 index 00000000..ad0ea18f --- /dev/null +++ b/modules/nf-core/agat/sqstatbasic/tests/tags.yml @@ -0,0 +1,2 @@ +agat/sqstatbasic: + - "modules/nf-core/agat/sqstatbasic/**" diff --git a/modules/nf-core/bedtools/bamtobed/environment.yml b/modules/nf-core/bedtools/bamtobed/environment.yml new file mode 100644 index 00000000..5683bc05 --- /dev/null +++ b/modules/nf-core/bedtools/bamtobed/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/bamtobed/main.nf b/modules/nf-core/bedtools/bamtobed/main.nf index 29f5a62f..bb8295dc 100644 --- a/modules/nf-core/bedtools/bamtobed/main.nf +++ b/modules/nf-core/bedtools/bamtobed/main.nf @@ -2,10 +2,10 @@ process BEDTOOLS_BAMTOBED { tag "$meta.id" label 'process_medium' - conda "bioconda::bedtools=2.30.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" input: tuple val(meta), path(bam) @@ -32,4 +32,15 @@ process BEDTOOLS_BAMTOBED { bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ } diff --git a/modules/nf-core/bedtools/bamtobed/meta.yml b/modules/nf-core/bedtools/bamtobed/meta.yml index 5a4ff73a..20171885 100644 --- a/modules/nf-core/bedtools/bamtobed/meta.yml +++ b/modules/nf-core/bedtools/bamtobed/meta.yml @@ -3,36 +3,45 @@ description: Converts a bam file to a bed12 file. keywords: - bam - bed + - bedtools + - bamtobed + - converter tools: - bedtools: description: | A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. documentation: https://bedtools.readthedocs.io/en/latest/content/tools/complement.html licence: ["MIT"] + identifier: biotools:bedtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: Input BAM file - pattern: "*.{bam}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM file + pattern: "*.{bam}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bed: - type: file - description: Bed file containing genomic intervals. - pattern: "*.{bed}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bed": + type: file + description: Bed file containing genomic intervals. + pattern: "*.{bed}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@yuukiiwa" - "@drpatelh" +maintainers: + - "@yuukiiwa" + - "@drpatelh" diff --git a/modules/nf-core/bedtools/bamtobed/tests/main.nf.test b/modules/nf-core/bedtools/bamtobed/tests/main.nf.test new file mode 100644 index 00000000..297f1813 --- /dev/null +++ b/modules/nf-core/bedtools/bamtobed/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + name "Test Process BEDTOOLS_BAMTOBED" + script "../main.nf" + process "BEDTOOLS_BAMTOBED" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/bamtobed" + + test("sarscov2 - bam") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bed[0][1]).name).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bedtools/bamtobed/tests/main.nf.test.snap b/modules/nf-core/bedtools/bamtobed/tests/main.nf.test.snap new file mode 100644 index 00000000..d28ddd3e --- /dev/null +++ b/modules/nf-core/bedtools/bamtobed/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "stub": { + "content": [ + "test.bed" + ], + "timestamp": "2023-12-05T17:37:27.785556" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,a6a299bd39dc56225f8029c05ea97dcb" + ] + ], + "1": [ + "versions.yml:md5,90a53b0acd234b4f7d125dadd0dbbdfb" + ], + "bed": [ + [ + { + "id": "test" + }, + "test.bed:md5,a6a299bd39dc56225f8029c05ea97dcb" + ] + ], + "versions": [ + "versions.yml:md5,90a53b0acd234b4f7d125dadd0dbbdfb" + ] + } + ], + "timestamp": "2023-12-05T17:37:20.997988" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/bamtobed/tests/tags.yml b/modules/nf-core/bedtools/bamtobed/tests/tags.yml new file mode 100644 index 00000000..54510dda --- /dev/null +++ b/modules/nf-core/bedtools/bamtobed/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/bamtobed: + - "modules/nf-core/bedtools/bamtobed/**" diff --git a/modules/nf-core/busco/busco.diff b/modules/nf-core/busco/busco/busco-busco.diff similarity index 87% rename from modules/nf-core/busco/busco.diff rename to modules/nf-core/busco/busco/busco-busco.diff index 775788fb..1317574a 100644 --- a/modules/nf-core/busco/busco.diff +++ b/modules/nf-core/busco/busco/busco-busco.diff @@ -1,8 +1,8 @@ -Changes in module 'nf-core/busco' ---- modules/nf-core/busco/main.nf -+++ modules/nf-core/busco/main.nf +Changes in module 'nf-core/busco/busco' +--- modules/nf-core/busco/busco/main.nf ++++ modules/nf-core/busco/busco/main.nf @@ -1,6 +1,5 @@ - process BUSCO { + process BUSCO_BUSCO { - tag "$meta.id" - label 'process_medium' + tag "${meta.id}_${lineage}" diff --git a/modules/nf-core/busco/busco/environment.yml b/modules/nf-core/busco/busco/environment.yml new file mode 100644 index 00000000..5b918b45 --- /dev/null +++ b/modules/nf-core/busco/busco/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::busco=5.7.1 diff --git a/modules/nf-core/busco/main.nf b/modules/nf-core/busco/busco/main.nf similarity index 80% rename from modules/nf-core/busco/main.nf rename to modules/nf-core/busco/busco/main.nf index 83d8eacd..319f7235 100644 --- a/modules/nf-core/busco/main.nf +++ b/modules/nf-core/busco/busco/main.nf @@ -1,13 +1,13 @@ -process BUSCO { +process BUSCO_BUSCO { tag "${meta.id}_${lineage}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/busco:5.5.0--pyhdfd78af_0': - 'biocontainers/busco:5.5.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/busco:5.7.1--pyhdfd78af_0': + 'biocontainers/busco:5.7.1--pyhdfd78af_0' }" input: - tuple val(meta), path('tmp_input/*') + tuple val(meta), path(fasta, stageAs:'tmp_input/*') val mode // Required: One of genome, proteins, or transcriptome val lineage // Required: lineage to check against, "auto" enables --auto-lineage instead path busco_lineages_path // Recommended: path to busco lineages - downloads if not set @@ -15,13 +15,13 @@ process BUSCO { output: tuple val(meta), path("*-busco.batch_summary.txt") , emit: batch_summary - tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt, optional: true - tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json, optional: true - tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table, optional: true - tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list, optional: true - tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins, optional: true + tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt , optional: true + tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json , optional: true + tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table , optional: true + tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list , optional: true + tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins , optional: true tuple val(meta), path("*-busco/*/run_*/busco_sequences") , emit: seq_dir - tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir, optional: true + tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir , optional: true tuple val(meta), path("*-busco") , emit: busco_dir path "versions.yml" , emit: versions @@ -90,4 +90,17 @@ process BUSCO { busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" + def fasta_name = files(fasta).first().name - '.gz' + """ + touch ${prefix}-busco.batch_summary.txt + mkdir -p ${prefix}-busco/$fasta_name/run_${lineage}/busco_sequences + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/busco/busco/meta.yml b/modules/nf-core/busco/busco/meta.yml new file mode 100644 index 00000000..7cb6d69c --- /dev/null +++ b/modules/nf-core/busco/busco/meta.yml @@ -0,0 +1,152 @@ +name: busco_busco +description: Benchmarking Universal Single Copy Orthologs +keywords: + - quality control + - genome + - transcriptome + - proteome +tools: + - busco: + description: BUSCO provides measures for quantitative assessment of genome assembly, + gene set, and transcriptome completeness based on evolutionarily informed expectations + of gene content from near-universal single-copy orthologs selected from OrthoDB. + homepage: https://busco.ezlab.org/ + documentation: https://busco.ezlab.org/busco_userguide.html + tool_dev_url: https://gitlab.com/ezlab/busco + doi: "10.1007/978-1-4939-9173-0_14" + licence: ["MIT"] + identifier: biotools:busco +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleic or amino acid sequence file in FASTA format. + pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" + - - mode: + type: string + description: The mode to run Busco in. One of genome, proteins, or transcriptome + pattern: "{genome,proteins,transcriptome}" + - - lineage: + type: string + description: The BUSCO lineage to use, or "auto" to automatically select lineage + - - busco_lineages_path: + type: directory + description: Path to local BUSCO lineages directory. + - - config_file: + type: file + description: Path to BUSCO config file. +output: + - batch_summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco.batch_summary.txt": + type: file + description: Summary of all sequence files analyzed + pattern: "*-busco.batch_summary.txt" + - short_summaries_txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - short_summary.*.txt: + type: file + description: Short Busco summary in plain text format + pattern: "short_summary.*.txt" + - short_summaries_json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - short_summary.*.json: + type: file + description: Short Busco summary in JSON format + pattern: "short_summary.*.json" + - full_table: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/full_table.tsv": + type: file + description: Full BUSCO results table + pattern: "full_table.tsv" + - missing_busco_list: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/missing_busco_list.tsv": + type: file + description: List of missing BUSCOs + pattern: "missing_busco_list.tsv" + - single_copy_proteins: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/single_copy_proteins.faa": + type: file + description: Fasta file of single copy proteins (transcriptome mode) + pattern: "single_copy_proteins.faa" + - seq_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/busco_sequences": + type: directory + description: BUSCO sequence directory + pattern: "busco_sequences" + - translated_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/translated_proteins": + type: directory + description: Six frame translations of each transcript made by the transcriptome + mode + pattern: "translated_dir" + - busco_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco": + type: directory + description: BUSCO lineage specific output + pattern: "*-busco" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyanka-surana" + - "@charles-plessy" + - "@mahesh-panchal" + - "@muffato" + - "@jvhagey" + - "@gallvp" +maintainers: + - "@priyanka-surana" + - "@charles-plessy" + - "@mahesh-panchal" + - "@muffato" + - "@jvhagey" + - "@gallvp" diff --git a/modules/nf-core/busco/busco/tests/main.nf.test b/modules/nf-core/busco/busco/tests/main.nf.test new file mode 100644 index 00000000..bb7b49a9 --- /dev/null +++ b/modules/nf-core/busco/busco/tests/main.nf.test @@ -0,0 +1,415 @@ +nextflow_process { + + name "Test Process BUSCO_BUSCO" + script "../main.nf" + process "BUSCO_BUSCO" + + tag "modules" + tag "modules_nfcore" + tag "busco" + tag "busco/busco" + + test("test_busco_genome_single_fasta") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'bacteria_odb10' // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues + input[3] = [] // Download busco lineage + input[4] = [] // No config + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + } + + test("test_busco_genome_multi_fasta") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta', checkIfExists: true) + ] + ] + input[1] = 'genome' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1][0]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_txt[0][1][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1][0]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + with(path(process.out.short_summaries_json[0][1][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1][0]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(file(process.out.seq_dir[0][1][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_eukaryote_metaeuk") { + + config './nextflow.metaeuk.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'eukaryota_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_eukaryote_augustus") { + + config './nextflow.augustus.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'eukaryota_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + assert snapshot( + process.out.batch_summary[0][1], + process.out.versions[0] + ).match() + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Augustus did not recognize any genes') + + } + + assert process.out.short_summaries_json == [] + assert process.out.short_summaries_txt == [] + assert process.out.missing_busco_list == [] + assert process.out.full_table == [] + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_protein") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true) + ] + input[1] = 'proteins' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_transcriptome") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true) + ] + input[1] = 'transcriptome' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.translated_dir[0][1], + process.out.single_copy_proteins[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + } + + } + + test("minimal-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/busco/busco/tests/main.nf.test.snap b/modules/nf-core/busco/busco/tests/main.nf.test.snap new file mode 100644 index 00000000..1b6411bc --- /dev/null +++ b/modules/nf-core/busco/busco/tests/main.nf.test.snap @@ -0,0 +1,230 @@ +{ + "minimal-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "7": [ + + ], + "8": [ + [ + { + "id": "test" + }, + [ + [ + [ + [ + + ] + ] + ] + ] + ] + ], + "9": [ + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "batch_summary": [ + [ + { + "id": "test" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "busco_dir": [ + [ + { + "id": "test" + }, + [ + [ + [ + [ + + ] + ] + ] + ] + ] + ], + "full_table": [ + + ], + "missing_busco_list": [ + + ], + "seq_dir": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "short_summaries_json": [ + + ], + "short_summaries_txt": [ + + ], + "single_copy_proteins": [ + + ], + "translated_dir": [ + + ], + "versions": [ + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:28:04.451297" + }, + "test_busco_eukaryote_augustus": { + "content": [ + "test-eukaryota_odb10-busco.batch_summary.txt:md5,3ea3bdc423a461dae514d816bdc61c89", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:26:36.974986" + }, + "test_busco_genome_single_fasta": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,21b3fb771cf36be917cc451540d999be", + "full_table.tsv:md5,638fe7590f442c57361554dae330eca1", + "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:22:45.07816" + }, + "test_busco_genome_multi_fasta": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,fcd3c208913e8abda3d6742c43fec5fa", + [ + "full_table.tsv:md5,c657edcc7d0de0175869717551df6e83", + "full_table.tsv:md5,638fe7590f442c57361554dae330eca1" + ], + [ + "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112", + "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a" + ], + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:23:50.255602" + }, + "test_busco_eukaryote_metaeuk": { + "content": [ + "test-eukaryota_odb10-busco.batch_summary.txt:md5,ff6d8277e452a83ce9456bbee666feb6", + "full_table.tsv:md5,92b1b1d5cb5ea0e2093d16f00187e8c7", + "missing_busco_list.tsv:md5,0352e563de290bf804c708323c35a9e3", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:25:38.159041" + }, + "test_busco_transcriptome": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,8734b3f379c4c0928e5dd4ea1873dc64", + "full_table.tsv:md5,1b2ce808fdafa744c56b5f781551272d", + "missing_busco_list.tsv:md5,a6931b6470262b997b8b99ea0f1d14a4", + [ + "1024388at2.faa:md5,797d603d262a6595a112e25b73e878b0", + "1054741at2.faa:md5,cd4b928cba6b19b4437746ba507e7195", + "1093223at2.faa:md5,df9549708e5ffcfaee6a74dd70a0e5dc", + "1151822at2.faa:md5,12726afc1cdc40c13392e1596e93df3a", + "143460at2.faa:md5,d887431fd988a5556a523440f02d9594", + "1491686at2.faa:md5,d03362d19979b27306c192f1c74a84e5", + "1504821at2.faa:md5,4f5f6e5c57bac0092c1d85ded73d7e67", + "1574817at2.faa:md5,1153e55998c2929eacad2aed7d08d248", + "1592033at2.faa:md5,bb7a59e5f3a57ba12d10dabf4c77ab57", + "1623045at2.faa:md5,8fe38155feb1802beb97ef7714837bf5", + "1661836at2.faa:md5,6c6d592c2fbb0d7a4e5e1f47a15644f0", + "1674344at2.faa:md5,bb41b44e53565a54cadf0b780532fe08", + "1698718at2.faa:md5,f233860000028eb00329aa85236c71e5", + "1990650at2.faa:md5,34a2d29c5f8b6253159ddb7a43fa1829", + "223233at2.faa:md5,dec6705c7846c989296e73942f953cbc", + "402899at2.faa:md5,acc0f271f9a586d2ce1ee41669b22999", + "505485at2.faa:md5,aa0391f8fa5d9bd19b30d844d5a99845", + "665824at2.faa:md5,47f8ad43b6a6078206feb48c2e552793", + "776861at2.faa:md5,f8b90c13f7c6be828dea3bb920195e3d", + "874197at2.faa:md5,8d22a35a768debe6f376fc695d233a69", + "932854at2.faa:md5,2eff2de1ab83b22f3234a529a44e22bb", + "95696at2.faa:md5,247bfd1aef432f7b5456307768e9149c" + ], + "single_copy_proteins.faa:md5,73e2c5d6a9b0f01f2deea3cc5f21b764", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:27:53.992893" + }, + "test_busco_protein": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,f5a782378f9f94a748aa907381fdef91", + "full_table.tsv:md5,812ab6a0496fccab774643cf40c4f2a8", + "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:27:12.724862" + } +} \ No newline at end of file diff --git a/modules/nf-core/busco/busco/tests/nextflow.augustus.config b/modules/nf-core/busco/busco/tests/nextflow.augustus.config new file mode 100644 index 00000000..84daa69d --- /dev/null +++ b/modules/nf-core/busco/busco/tests/nextflow.augustus.config @@ -0,0 +1,5 @@ +process { + withName: 'BUSCO_BUSCO' { + ext.args = '--tar --augustus' + } +} diff --git a/modules/nf-core/busco/busco/tests/nextflow.config b/modules/nf-core/busco/busco/tests/nextflow.config new file mode 100644 index 00000000..1ec3fec0 --- /dev/null +++ b/modules/nf-core/busco/busco/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'BUSCO_BUSCO' { + ext.args = '--tar' + } +} diff --git a/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config b/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config new file mode 100644 index 00000000..c1418445 --- /dev/null +++ b/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config @@ -0,0 +1,5 @@ +process { + withName: 'BUSCO_BUSCO' { + ext.args = '--tar --metaeuk' + } +} diff --git a/modules/nf-core/busco/busco/tests/old_test.yml b/modules/nf-core/busco/busco/tests/old_test.yml new file mode 100644 index 00000000..75177f5d --- /dev/null +++ b/modules/nf-core/busco/busco/tests/old_test.yml @@ -0,0 +1,624 @@ +- name: busco test_busco_genome_single_fasta + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_single_fasta -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: bc2440f8a68d7fbf931ff911c1c3fdfa + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log + md5sum: 9caf1a1434414c78562eb0bbb9c0e53f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint + contains: + - "Tool: prodigal" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv + md5sum: c56edab1dc1522e993c25ae2b730799f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv + md5sum: b533ef30270f27160acce85a22d01bf5 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "lineage_dataset" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_genome_multi_fasta + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_multi_fasta -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 8c64c1a28b086ef2ee444f99cbed5f7d + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_out.log + md5sum: 8f047bdb33264d22a83920bc2c63f29a + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_err.log + md5sum: c1fdc6977332f53dfe7f632733bb4585 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_out.log + md5sum: 50752acb1c5a20be886bfdfc06635bcb + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/.checkpoint + contains: + - "Tool: prodigal" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.faa + md5sum: 8166471fc5f08c82fd5643ab42327f9d + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.fna + md5sum: ddc508a18f60e7f3314534df50cdf8ca + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa + md5sum: 8166471fc5f08c82fd5643ab42327f9d + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna + md5sum: ddc508a18f60e7f3314534df50cdf8ca + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log + md5sum: c1fdc6977332f53dfe7f632733bb4585 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log + md5sum: 50752acb1c5a20be886bfdfc06635bcb + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.faa + md5sum: e56fd59c38248dc21ac94355dca98121 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.fna + md5sum: b365f84bf99c68357952e0b98ed7ce42 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_err.log + md5sum: e5f14d7925ba14a0f9850542f3739894 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_out.log + md5sum: d41971bfc1b621d4ffd2633bc47017ea + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/full_table.tsv + md5sum: c9651b88b10871abc260ee655898e828 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/missing_busco_list.tsv + md5sum: 9939309df2da5419de88c32d1435c779 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log + md5sum: 9caf1a1434414c78562eb0bbb9c0e53f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint + contains: + - "Tool: prodigal" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv + md5sum: c56edab1dc1522e993c25ae2b730799f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv + md5sum: b533ef30270f27160acce85a22d01bf5 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_eukaryote_metaeuk + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_metaeuk -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt + md5sum: ff6d8277e452a83ce9456bbee666feb6 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log + md5sum: e63debaa653f18f7405d936050abc093 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv + md5sum: bd880e90b9e5620a58943a3e0f9ff16b + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint + contains: + - "Tool: metaeuk" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa + md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv + md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_eukaryote_augustus + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_augustus -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt + md5sum: ff6d8277e452a83ce9456bbee666feb6 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log + md5sum: e63debaa653f18f7405d936050abc093 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log + contains: + - "metaeuk" + - "easy-predict" + - "Compute score and coverage" + - "Time for processing:" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log + contains: + - "metaeuk" + - "easy-predict" + - "Compute score and coverage" + - "Time for processing:" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv + md5sum: bd880e90b9e5620a58943a3e0f9ff16b + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint + contains: + - "Tool: metaeuk" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa + md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv + md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_protein + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_protein -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 7a65e6cbb6c56a2ea4e739ae0aa3297d + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/full_table.tsv + md5sum: 0e34f1011cd83ea1d5d5103ec62b8922 + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/missing_busco_list.tsv + md5sum: 9939309df2da5419de88c32d1435c779 + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/versions.yml + +- name: busco test_busco_transcriptome + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_transcriptome -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 46118ecf60d1b87d22b96d80f4f03632 + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/.checkpoint + contains: + - "Tool: makeblastdb" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ndb + md5sum: 3788c017fe5e6f0f58224e9cdd21822b + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nhr + md5sum: 8ecd2ce392bb5e25ddbe1d85f879582e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nin + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.njs + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.not + md5sum: 0c340e376c7e85d19f82ec1a833e6a6e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nsq + md5sum: 532d5c0a7ea00fe95ca3c97cb3be6198 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ntf + md5sum: de1250813f0c7affc6d12dac9d0fb6bb + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nto + md5sum: ff74bd41f9cc9b011c63a32c4f7693bf + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_err.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_out.log + contains: + - "Building a new DB" + - "Adding sequences from FASTA" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_err.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_out.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/.checkpoint + contains: + - "Tool: tblastn" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/coordinates.tsv + md5sum: cc30eed321944af293452bdbcfc24292 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_101.temp + md5sum: 73e9c65fc83fedc58f57f09b08f08238 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_119.temp + md5sum: 7fa4cc7955ec0cc36330a221c579b975 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_129.temp + md5sum: 6f1601c875d019e3f6f1f98ed8e988d4 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_138.temp + md5sum: 3f8e034686cd240c2330650d791bcae2 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_143.temp + md5sum: df3dfa8e9ba30ed70cf75b5e7abf2179 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_172.temp + md5sum: 7d463e0e6cf7169bc9077d8dc776dda1 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_178.temp + md5sum: 2288edf7fa4f88f51b4cf4d94086f77e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_188.temp + md5sum: 029906abbad6d87fc57830dd548cac24 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_195.temp + md5sum: 4937f3b348774a31b1160a00297c29cc + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_210.temp + md5sum: afcb20ba4c466479d6b91c8c62251e1f + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_232.temp + md5sum: 2e1e823ce017345bd998191a39fa9924 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_268.temp + md5sum: 08c2d82c34ecffbe1c638b410349412e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_29.temp + md5sum: cd9b63cf93524284781535c888313764 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_44.temp + md5sum: d1929b742b24ebe379bf4801ca882dca + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_58.temp + md5sum: 69215765b010c05336538cb322c900b3 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_72.temp + md5sum: 6feaa1cc3b0899a147ea9d466878f3e3 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_80.temp + md5sum: 13625eae14e860a96ce17cd4e37e9d01 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_81.temp + md5sum: e14b2484649b0dbc8926815c207b806d + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_93.temp + md5sum: 6902c93691df00e690faea914c71839e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_97.temp + md5sum: 0a0d9d38a83acbd5ad43c29cdf429988 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/tblastn.tsv + contains: + - "TBLASTN" + - "BLAST processed" + - "queries" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/full_table.tsv + md5sum: 24df25199e13c88bd892fc3e7b541ca0 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/missing_busco_list.tsv + md5sum: e7232e2b8cca4fdfdd9e363b39ebbc81 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/single_copy_proteins.faa + md5sum: e04b9465733577ae6e4bccb7aa01e720 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1024388at2.faa + md5sum: 7333c39a20258f20c7019ea0cd83157c + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1054741at2.faa + md5sum: ebb481e77a824685fbe04d8a2f3a0d7d + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1093223at2.faa + md5sum: 34621c7d499034e8f8e6b92fd4020a93 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1151822at2.faa + md5sum: aa89ca381c1c70c9c4e1380351ca7c2a + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/143460at2.faa + md5sum: f2e91d78b8dd3722840378789f29e8c8 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1491686at2.faa + md5sum: 73c25aef5c9cba7f4151804941b146ea + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1504821at2.faa + md5sum: cda556018d1f84ebe517e89f6fc107d0 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1574817at2.faa + md5sum: a9096c9fb8b25c78a72871ab0463acdc + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1592033at2.faa + md5sum: e463d25ce186c0cebfd749474f3a4c64 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1623045at2.faa + md5sum: f2cfd241590c6d8377286d6135480937 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1661836at2.faa + md5sum: 586569546fb9861502468e3d9ba2775c + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1674344at2.faa + md5sum: 24c658bee14ad84b062d81ad96642eb8 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1698718at2.faa + md5sum: 0b8e26ddf5149bbd8805be7af125208d + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1990650at2.faa + md5sum: 159320712ee01fb2ccb31a25df44eead + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/223233at2.faa + md5sum: 812629c0b06ac3d18661c2ca78de0c08 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/402899at2.faa + md5sum: f7ff4e1591342d30b77392a2e84b57d9 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/505485at2.faa + md5sum: 7b34a24fc49c540d46fcf96ff5129564 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/665824at2.faa + md5sum: 4cff2df64f6bcaff8bc19c234c8bcccd + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/776861at2.faa + md5sum: 613af7a3fea30ea2bece66f603b9284a + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/874197at2.faa + md5sum: a7cd1b13c9ef91c7ef4e31614166f197 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/932854at2.faa + md5sum: fe313ffd5efdb0fed887a04fba352552 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/95696at2.faa + md5sum: 4e1f30a2fea4dfbf9bb7fae2700622a0 + - path: output/busco/versions.yml diff --git a/modules/nf-core/busco/busco/tests/tags.yml b/modules/nf-core/busco/busco/tests/tags.yml new file mode 100644 index 00000000..7c4d2835 --- /dev/null +++ b/modules/nf-core/busco/busco/tests/tags.yml @@ -0,0 +1,2 @@ +busco/busco: + - "modules/nf-core/busco/busco/**" diff --git a/modules/nf-core/busco/meta.yml b/modules/nf-core/busco/meta.yml deleted file mode 100644 index 90b30d4d..00000000 --- a/modules/nf-core/busco/meta.yml +++ /dev/null @@ -1,96 +0,0 @@ -name: busco -description: Benchmarking Universal Single Copy Orthologs -keywords: - - quality control - - genome - - transcriptome - - proteome -tools: - - busco: - description: BUSCO provides measures for quantitative assessment of genome assembly, gene set, and transcriptome completeness based on evolutionarily informed expectations of gene content from near-universal single-copy orthologs selected from OrthoDB. - homepage: https://busco.ezlab.org/ - documentation: https://busco.ezlab.org/busco_userguide.html - tool_dev_url: https://gitlab.com/ezlab/busco - doi: "10.1007/978-1-4939-9173-0_14" - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Nucleic or amino acid sequence file in FASTA format. - pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" - - mode: - type: string - description: The mode to run Busco in. One of genome, proteins, or transcriptome - pattern: "{genome,proteins,transcriptome}" - - lineage: - type: string - description: The BUSCO lineage to use, or "auto" to automatically select lineage - - busco_lineages_path: - type: directory - description: Path to local BUSCO lineages directory. - - config_file: - type: file - description: Path to BUSCO config file. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - batch_summary: - type: file - description: Summary of all sequence files analyzed - pattern: "*-busco.batch_summary.txt" - - short_summaries_txt: - type: file - description: Short Busco summary in plain text format - pattern: "short_summary.*.txt" - - short_summaries_json: - type: file - description: Short Busco summary in JSON format - pattern: "short_summary.*.json" - - busco_dir: - type: directory - description: BUSCO lineage specific output - pattern: "*-busco" - - full_table: - type: file - description: Full BUSCO results table - pattern: "full_table.tsv" - - missing_busco_list: - type: file - description: List of missing BUSCOs - pattern: "missing_busco_list.tsv" - - single_copy_proteins: - type: file - description: Fasta file of single copy proteins (transcriptome mode) - pattern: "single_copy_proteins.faa" - - seq_dir: - type: directory - description: BUSCO sequence directory - pattern: "busco_sequences" - - translated_proteins: - type: directory - description: Six frame translations of each transcript made by the transcriptome mode - pattern: "translated_proteins" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@priyanka-surana" - - "@charles-plessy" - - "@mahesh-panchal" - - "@muffato" - - "@jvhagey" -maintainers: - - "@priyanka-surana" - - "@charles-plessy" - - "@mahesh-panchal" - - "@muffato" - - "@jvhagey" diff --git a/modules/nf-core/cooler/cload/environment.yml b/modules/nf-core/cooler/cload/environment.yml new file mode 100644 index 00000000..f8165ca9 --- /dev/null +++ b/modules/nf-core/cooler/cload/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cooler=0.9.2 diff --git a/modules/nf-core/cooler/cload/main.nf b/modules/nf-core/cooler/cload/main.nf index 80109d48..b170a5d0 100644 --- a/modules/nf-core/cooler/cload/main.nf +++ b/modules/nf-core/cooler/cload/main.nf @@ -2,10 +2,10 @@ process COOLER_CLOAD { tag "$meta.id" label 'process_high' - conda "bioconda::cooler=0.8.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : - 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + 'https://depot.galaxyproject.org/singularity/cooler:0.9.2--pyh7cba7a3_0' : + 'biocontainers/cooler:0.9.2--pyh7cba7a3_0' }" input: tuple val(meta), path(pairs), path(index), val(cool_bin) @@ -36,4 +36,15 @@ process COOLER_CLOAD { cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.cool + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ } diff --git a/modules/nf-core/cooler/cload/meta.yml b/modules/nf-core/cooler/cload/meta.yml index 8513aaec..1bb9f748 100644 --- a/modules/nf-core/cooler/cload/meta.yml +++ b/modules/nf-core/cooler/cload/meta.yml @@ -2,6 +2,9 @@ name: cooler_cload description: Create a cooler from genomic pairs and bins keywords: - cool + - cooler + - cload + - hic tools: - cooler: description: Sparse binary format for genomic interaction matrices @@ -10,44 +13,48 @@ tools: tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" licence: ["BSD-3-clause"] - + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - pairs: - type: file - description: Path to contacts (i.e. read pairs) file. - - index: - type: file - description: Path to index file of the contacts. - - cool_bin: - type: value - description: Bins size in bp - - chromsizes: - type: file - description: Path to a chromsizes file. - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pairs: + type: file + description: Path to contacts (i.e. read pairs) file. + - index: + type: file + description: Path to index file of the contacts. + - cool_bin: + type: integer + description: Bins size in bp + - - chromsizes: + type: file + description: Path to a chromsizes file. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "versions.yml" - cool: - type: file - description: Output COOL file path - pattern: "*.cool" - - cool_bin: - type: value - description: Bins size in bp - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cool": + type: file + description: Output COOL file path + pattern: "*.cool" + - cool_bin: + type: file + description: Output COOL file path + pattern: "*.cool" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@jianhong" - "@muffato" +maintainers: + - "@jianhong" + - "@muffato" diff --git a/modules/nf-core/cooler/dump/environment.yml b/modules/nf-core/cooler/dump/environment.yml new file mode 100644 index 00000000..45f3b64b --- /dev/null +++ b/modules/nf-core/cooler/dump/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cooler=0.10.2 diff --git a/modules/nf-core/cooler/dump/main.nf b/modules/nf-core/cooler/dump/main.nf index fed7502f..0cf527f3 100644 --- a/modules/nf-core/cooler/dump/main.nf +++ b/modules/nf-core/cooler/dump/main.nf @@ -2,10 +2,10 @@ process COOLER_DUMP { tag "$meta.id" label 'process_high' - conda "bioconda::cooler=0.8.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : - 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + 'https://depot.galaxyproject.org/singularity/cooler:0.10.2--pyhdfd78af_0' : + 'biocontainers/cooler:0.10.2--pyhdfd78af_0' }" input: tuple val(meta), path(cool), val(resolution) @@ -32,4 +32,16 @@ process COOLER_DUMP { cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bedpe + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ } diff --git a/modules/nf-core/cooler/dump/meta.yml b/modules/nf-core/cooler/dump/meta.yml index fe60523e..e8150097 100644 --- a/modules/nf-core/cooler/dump/meta.yml +++ b/modules/nf-core/cooler/dump/meta.yml @@ -2,6 +2,8 @@ name: cooler_dump description: Dump a cooler’s data to a text stream. keywords: - dump + - text + - cooler tools: - cooler: description: Sparse binary format for genomic interaction matrices @@ -10,36 +12,39 @@ tools: tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" licence: ["BSD-3-Clause"] - + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - cool: - type: file - description: Path to COOL file - pattern: "*.{cool,mcool}" - - resolution: - type: value - description: Resolution - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cool: + type: file + description: Path to COOL file + pattern: "*.{cool,mcool}" + - resolution: + type: integer + description: Resolution output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - bedpe: - type: file - description: Output text file - pattern: "*.bedpe" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bedpe": + type: file + description: Output text file + pattern: "*.bedpe" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@jianhong" - "@muffato" +maintainers: + - "@jianhong" + - "@muffato" diff --git a/modules/nf-core/cooler/dump/tests/main.nf.test b/modules/nf-core/cooler/dump/tests/main.nf.test new file mode 100644 index 00000000..fc5a2249 --- /dev/null +++ b/modules/nf-core/cooler/dump/tests/main.nf.test @@ -0,0 +1,60 @@ + +nextflow_process { + + name "Test Process COOLER_DUMP" + script "../main.nf" + process "COOLER_DUMP" + + tag "modules" + tag "modules_nfcore" + tag "cooler" + tag "cooler/dump" + + test("test-cooler-dump") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/open2c/cooler/master/tests/data/toy.asymm.16.cool", checkIfExists: true), + [:] // resolution if any + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test-cooler-dump-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/open2c/cooler/master/tests/data/toy.asymm.16.cool", checkIfExists: true), + [:] // resolution if any + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/cooler/dump/tests/main.nf.test.snap b/modules/nf-core/cooler/dump/tests/main.nf.test.snap new file mode 100644 index 00000000..e415486d --- /dev/null +++ b/modules/nf-core/cooler/dump/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "test-cooler-dump-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bedpe:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,af2717b7d684ac9de4d3729d85bfc82f" + ], + "bedpe": [ + [ + { + "id": "test" + }, + "test.bedpe:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,af2717b7d684ac9de4d3729d85bfc82f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T15:27:00.895925" + }, + "test-cooler-dump": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bedpe:md5,38e9b0b8cc74f55a15e8ab01023048bd" + ] + ], + "1": [ + "versions.yml:md5,af2717b7d684ac9de4d3729d85bfc82f" + ], + "bedpe": [ + [ + { + "id": "test" + }, + "test.bedpe:md5,38e9b0b8cc74f55a15e8ab01023048bd" + ] + ], + "versions": [ + "versions.yml:md5,af2717b7d684ac9de4d3729d85bfc82f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T15:26:53.676822" + } +} \ No newline at end of file diff --git a/modules/nf-core/cooler/zoomify/environment.yml b/modules/nf-core/cooler/zoomify/environment.yml new file mode 100644 index 00000000..f8165ca9 --- /dev/null +++ b/modules/nf-core/cooler/zoomify/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cooler=0.9.2 diff --git a/modules/nf-core/cooler/zoomify/main.nf b/modules/nf-core/cooler/zoomify/main.nf index 95e7daff..f9933dff 100644 --- a/modules/nf-core/cooler/zoomify/main.nf +++ b/modules/nf-core/cooler/zoomify/main.nf @@ -2,10 +2,10 @@ process COOLER_ZOOMIFY { tag "$meta.id" label 'process_high' - conda "bioconda::cooler=0.8.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : - 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" + 'https://depot.galaxyproject.org/singularity/cooler:0.9.2--pyh7cba7a3_0' : + 'biocontainers/cooler:0.9.2--pyh7cba7a3_0' }" input: tuple val(meta), path(cool) @@ -32,4 +32,15 @@ process COOLER_ZOOMIFY { cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.mcool + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') + END_VERSIONS + """ } diff --git a/modules/nf-core/cooler/zoomify/meta.yml b/modules/nf-core/cooler/zoomify/meta.yml index 57f55486..3f928781 100644 --- a/modules/nf-core/cooler/zoomify/meta.yml +++ b/modules/nf-core/cooler/zoomify/meta.yml @@ -2,6 +2,8 @@ name: cooler_zoomify description: Generate a multi-resolution cooler file by coarsening keywords: - mcool + - cool + - cooler tools: - cooler: description: Sparse binary format for genomic interaction matrices @@ -10,32 +12,34 @@ tools: tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" licence: ["BSD-3-clause"] - + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - cool: - type: file - description: Path to COOL file - pattern: "*.{cool,mcool}" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cool: + type: file + description: Path to COOL file + pattern: "*.{cool,mcool}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - mcool: - type: file - description: Output mcool file - pattern: "*.mcool" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.mcool": + type: file + description: Output mcool file + pattern: "*.mcool" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@jianhong" +maintainers: + - "@jianhong" diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 00000000..9d79af93 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::multiqc=1.20 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index ebc87273..105f9265 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : + 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index c32657de..dc1e412f 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,36 +1,43 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template +description: Custom module used to dump software versions within the nf-core pipeline + template keywords: - custom - dump - version tools: - custom: - description: Custom module used to dump software versions within the nf-core pipeline template + description: Custom module used to dump software versions within the nf-core pipeline + template homepage: https://github.com/nf-core/tools documentation: https://github.com/nf-core/tools licence: ["MIT"] + identifier: "" input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" - + - - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" output: - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" + - software_versions.yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" + - software_versions_mqc.yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index da033408..b83b32c4 100755 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -3,11 +3,11 @@ """Provide functions to merge multiple versions.yml files.""" - -import yaml import platform from textwrap import dedent +import yaml + def _make_versions_html(versions): """Generate a tabular HTML output of all versions for MultiQC.""" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 00000000..b1e1630b --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 00000000..5f59a936 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Should run without failures": { + "content": [ + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] + ], + "timestamp": "2024-01-09T23:01:18.710682" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 00000000..405aa24a --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastk/fastk/environment.yml b/modules/nf-core/fastk/fastk/environment.yml new file mode 100644 index 00000000..14207ff6 --- /dev/null +++ b/modules/nf-core/fastk/fastk/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::false_flag # False flag to pass nf-core/lint diff --git a/modules/nf-core/fastk/fastk/fastk-fastk.diff b/modules/nf-core/fastk/fastk/fastk-fastk.diff index e019c85c..b39f0bb6 100644 --- a/modules/nf-core/fastk/fastk/fastk-fastk.diff +++ b/modules/nf-core/fastk/fastk/fastk-fastk.diff @@ -1,21 +1,12 @@ Changes in module 'nf-core/fastk/fastk' --- modules/nf-core/fastk/fastk/main.nf +++ modules/nf-core/fastk/fastk/main.nf -@@ -3,7 +3,7 @@ - label 'process_medium' - - // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. -- container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.2' -+ container 'quay.io/sanger-tol/fastk:1.0.1-c1' - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { -@@ -25,7 +25,7 @@ - script: +@@ -24,7 +24,7 @@ + } def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. -+ def FASTK_VERSION = '427104ea91c78c3b8b8b49f1a7d6bbeaa869ba1c' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. ++ def FASTK_VERSION = '666652151335353eef2fcd58880bcef5bc2928e1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ FastK \\ $args \\ diff --git a/modules/nf-core/fastk/fastk/main.nf b/modules/nf-core/fastk/fastk/main.nf index e953cd89..f51c9d90 100644 --- a/modules/nf-core/fastk/fastk/main.nf +++ b/modules/nf-core/fastk/fastk/main.nf @@ -3,12 +3,7 @@ process FASTK_FASTK { label 'process_medium' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - container 'quay.io/sanger-tol/fastk:1.0.1-c1' - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "FASTK_FASTK module does not support Conda. Please use Docker / Singularity / Podman instead." - } + container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.2' input: tuple val(meta), path(reads) @@ -23,16 +18,54 @@ process FASTK_FASTK { task.ext.when == null || task.ext.when script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "FASTK_FASTK module does not support Conda. Please use Docker / Singularity / Podman instead." + } def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def FASTK_VERSION = '427104ea91c78c3b8b8b49f1a7d6bbeaa869ba1c' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def FASTK_VERSION = '666652151335353eef2fcd58880bcef5bc2928e1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ FastK \\ $args \\ -T$task.cpus \\ + -M${task.memory.toGiga()} \\ -N${prefix}_fk \\ $reads + find . -name '*.ktab*' \\ + | xargs chmod a+r + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastk: $FASTK_VERSION + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "FASTK_FASTK module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + def touch_ktab = args.contains('-t') ? "touch ${prefix}_fk.ktab .${prefix}_fk.ktab.1" : '' + def touch_prof = args.contains('-p') ? "touch ${prefix}_fk.prof .${prefix}_fk.pidx.1" : '' + """ + touch ${prefix}_fk.hist + $touch_ktab + $touch_prof + + echo \\ + "FastK \\ + $args \\ + -T$task.cpus \\ + -M${task.memory.toGiga()} \\ + -N${prefix}_fk \\ + $reads" + cat <<-END_VERSIONS > versions.yml "${task.process}": fastk: $FASTK_VERSION diff --git a/modules/nf-core/fastk/fastk/meta.yml b/modules/nf-core/fastk/fastk/meta.yml index 55fd1be7..4abc1033 100644 --- a/modules/nf-core/fastk/fastk/meta.yml +++ b/modules/nf-core/fastk/fastk/meta.yml @@ -8,45 +8,58 @@ tools: - "fastk": description: "A fast K-mer counter for high-fidelity shotgun datasets" homepage: "https://github.com/thegenemyers/FASTK" - tool_dev_url: "https://github.com/thegenemyers/FASTK" - - licence: "https://github.com/thegenemyers/FASTK/blob/master/LICENSE" - + licence: ["https://github.com/thegenemyers/FASTK/blob/master/LICENSE"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - hist: - type: file - description: Histogram of k-mers - pattern: "*.hist" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.hist": + type: file + description: Histogram of k-mers + pattern: "*.hist" - ktab: - type: file - description: A sorted table of all canonical k‑mers along with their counts. - pattern: "*.ktab" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - '*.ktab*", hidden: true': + type: file + description: A sorted table of all canonical k‑mers along with their counts. + pattern: "*.ktab" - prof: - type: file - description: A k‑mer count profile of each sequence in the input data set. - pattern: "*.prof" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - '*.{prof,pidx}*", hidden: true': + type: file + description: A k‑mer count profile of each sequence in the input data set. + pattern: "*.prof" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@mahesh-panchal" +maintainers: + - "@mahesh-panchal" + - "@gallvp" diff --git a/modules/nf-core/fastk/fastk/tests/main.nf.test b/modules/nf-core/fastk/fastk/tests/main.nf.test new file mode 100644 index 00000000..856c36a8 --- /dev/null +++ b/modules/nf-core/fastk/fastk/tests/main.nf.test @@ -0,0 +1,110 @@ +nextflow_process { + + name "Test Process FASTK_FASTK" + script "../main.nf" + config './nextflow.config' + process "FASTK_FASTK" + + tag "modules" + tag "modules_nfcore" + tag "fastk" + tag "fastk/fastk" + + test("test_fastk_fastk_single_end") { + when { + process { + """ + input[0] = [ + [ id:'test' , single_end: true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_fastk_fastk_paired_end") { + + when { + process { + """ + input[0] = [ + [ id:'test' , single_end: false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_fastk_fastk_single_end_stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' , single_end: true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_fastk_fastk_paired_end_stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' , single_end: false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/fastk/fastk/tests/main.nf.test.snap b/modules/nf-core/fastk/fastk/tests/main.nf.test.snap new file mode 100644 index 00000000..1e3fc4da --- /dev/null +++ b/modules/nf-core/fastk/fastk/tests/main.nf.test.snap @@ -0,0 +1,346 @@ +{ + "test_fastk_fastk_single_end_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_fk.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + [ + ".test_fk.ktab.1:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_fk.ktab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + [ + ".test_fk.pidx.1:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_fk.prof:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "3": [ + "versions.yml:md5,c216a1608924d1662d2086e1de1d5abd" + ], + "hist": [ + [ + { + "id": "test", + "single_end": true + }, + "test_fk.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ktab": [ + [ + { + "id": "test", + "single_end": true + }, + [ + ".test_fk.ktab.1:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_fk.ktab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "prof": [ + [ + { + "id": "test", + "single_end": true + }, + [ + ".test_fk.pidx.1:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_fk.prof:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,c216a1608924d1662d2086e1de1d5abd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-18T19:39:28.510263" + }, + "test_fastk_fastk_single_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_fk.hist:md5,c80e12f7321e62dba4b437d7bff36ec0" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + [ + ".test_fk.ktab.1:md5,ceeacd0cb3aa69bf9b2a402830b40e26", + ".test_fk.ktab.2:md5,f2629fd15b285aed3dc2d5fe546edf3f", + "test_fk.ktab:md5,a605a58931a4b5029469e1c2575c8cee" + ] + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + [ + ".test_fk.pidx.1:md5,90bc384f61d2ecdb4586ab52ab04fddf", + ".test_fk.prof.1:md5,ebd48923a724cf79934f0b2ed42ba73d", + "test_fk.prof:md5,43d426c95d277b8148406624d513bd40" + ] + ] + ], + "3": [ + "versions.yml:md5,c216a1608924d1662d2086e1de1d5abd" + ], + "hist": [ + [ + { + "id": "test", + "single_end": true + }, + "test_fk.hist:md5,c80e12f7321e62dba4b437d7bff36ec0" + ] + ], + "ktab": [ + [ + { + "id": "test", + "single_end": true + }, + [ + ".test_fk.ktab.1:md5,ceeacd0cb3aa69bf9b2a402830b40e26", + ".test_fk.ktab.2:md5,f2629fd15b285aed3dc2d5fe546edf3f", + "test_fk.ktab:md5,a605a58931a4b5029469e1c2575c8cee" + ] + ] + ], + "prof": [ + [ + { + "id": "test", + "single_end": true + }, + [ + ".test_fk.pidx.1:md5,90bc384f61d2ecdb4586ab52ab04fddf", + ".test_fk.prof.1:md5,ebd48923a724cf79934f0b2ed42ba73d", + "test_fk.prof:md5,43d426c95d277b8148406624d513bd40" + ] + ] + ], + "versions": [ + "versions.yml:md5,c216a1608924d1662d2086e1de1d5abd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-18T19:31:31.128388" + }, + "test_fastk_fastk_paired_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_fk.hist:md5,4f75b550d87ed4f26a2b10a05ac7e98c" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + [ + ".test_fk.ktab.1:md5,7f28fb44940fda799797e3069f5d7263", + ".test_fk.ktab.2:md5,c14a85c128926ace78372f09029977b1", + "test_fk.ktab:md5,fddd5be0c36ad1d2131b8d8774f7657a" + ] + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + [ + ".test_fk.pidx.1:md5,e7e760f714070a4afefb38ffff559684", + ".test_fk.pidx.2:md5,a549612bbdba2506eb3311237638c4b0", + ".test_fk.prof.1:md5,46a5fd9e297262b058f8c1fd062fcf56", + ".test_fk.prof.2:md5,80326a7406f41ccf2e51e341fc804132", + "test_fk.prof:md5,d3c7d8decd4ea6e298291b8be0e2de85" + ] + ] + ], + "3": [ + "versions.yml:md5,c216a1608924d1662d2086e1de1d5abd" + ], + "hist": [ + [ + { + "id": "test", + "single_end": false + }, + "test_fk.hist:md5,4f75b550d87ed4f26a2b10a05ac7e98c" + ] + ], + "ktab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + ".test_fk.ktab.1:md5,7f28fb44940fda799797e3069f5d7263", + ".test_fk.ktab.2:md5,c14a85c128926ace78372f09029977b1", + "test_fk.ktab:md5,fddd5be0c36ad1d2131b8d8774f7657a" + ] + ] + ], + "prof": [ + [ + { + "id": "test", + "single_end": false + }, + [ + ".test_fk.pidx.1:md5,e7e760f714070a4afefb38ffff559684", + ".test_fk.pidx.2:md5,a549612bbdba2506eb3311237638c4b0", + ".test_fk.prof.1:md5,46a5fd9e297262b058f8c1fd062fcf56", + ".test_fk.prof.2:md5,80326a7406f41ccf2e51e341fc804132", + "test_fk.prof:md5,d3c7d8decd4ea6e298291b8be0e2de85" + ] + ] + ], + "versions": [ + "versions.yml:md5,c216a1608924d1662d2086e1de1d5abd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-18T19:31:35.565502" + }, + "test_fastk_fastk_paired_end_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_fk.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + [ + ".test_fk.ktab.1:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_fk.ktab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + [ + ".test_fk.pidx.1:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_fk.prof:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "3": [ + "versions.yml:md5,c216a1608924d1662d2086e1de1d5abd" + ], + "hist": [ + [ + { + "id": "test", + "single_end": false + }, + "test_fk.hist:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "ktab": [ + [ + { + "id": "test", + "single_end": false + }, + [ + ".test_fk.ktab.1:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_fk.ktab:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "prof": [ + [ + { + "id": "test", + "single_end": false + }, + [ + ".test_fk.pidx.1:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_fk.prof:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,c216a1608924d1662d2086e1de1d5abd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-18T19:39:32.570957" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastk/fastk/tests/nextflow.config b/modules/nf-core/fastk/fastk/tests/nextflow.config new file mode 100644 index 00000000..c89ce9d5 --- /dev/null +++ b/modules/nf-core/fastk/fastk/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '-t -p' +} diff --git a/modules/nf-core/fastk/fastk/tests/tags.yml b/modules/nf-core/fastk/fastk/tests/tags.yml new file mode 100644 index 00000000..82f9df82 --- /dev/null +++ b/modules/nf-core/fastk/fastk/tests/tags.yml @@ -0,0 +1,2 @@ +fastk/fastk: + - "modules/nf-core/fastk/fastk/**" diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml new file mode 100644 index 00000000..ee239841 --- /dev/null +++ b/modules/nf-core/gffread/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::gffread=0.12.7 diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf new file mode 100644 index 00000000..da55cbab --- /dev/null +++ b/modules/nf-core/gffread/main.nf @@ -0,0 +1,60 @@ +process GFFREAD { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gffread:0.12.7--hdcf5f25_4' : + 'biocontainers/gffread:0.12.7--hdcf5f25_4' }" + + input: + tuple val(meta), path(gff) + path fasta + + output: + tuple val(meta), path("*.gtf") , emit: gtf , optional: true + tuple val(meta), path("*.gff3") , emit: gffread_gff , optional: true + tuple val(meta), path("*.fasta"), emit: gffread_fasta , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) + def fasta_arg = fasta ? "-g $fasta" : '' + def output_name = "${prefix}.${extension}" + def output = extension == "fasta" ? "$output_name" : "-o $output_name" + def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim() + // args_sorted = Move '-w', '-x', and '-y' to the end of the args string as gffread expects the file name after these parameters + if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + gffread \\ + $gff \\ + $fasta_arg \\ + $args_sorted \\ + $output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) + def output_name = "${prefix}.${extension}" + if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch $output_name + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml new file mode 100644 index 00000000..bebe7f57 --- /dev/null +++ b/modules/nf-core/gffread/meta.yml @@ -0,0 +1,75 @@ +name: gffread +description: Validate, filter, convert and perform various other operations on GFF + files +keywords: + - gff + - conversion + - validation +tools: + - gffread: + description: GFF/GTF utility providing format conversions, region filtering, FASTA + sequence extraction and more. + homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + tool_dev_url: https://github.com/gpertea/gffread + doi: 10.12688/f1000research.23297.1 + licence: ["MIT"] + identifier: biotools:gffread +input: + - - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - gff: + type: file + description: A reference file in either the GFF3, GFF2 or GTF format. + pattern: "*.{gff, gtf}" + - - fasta: + type: file + description: A multi-fasta file with the genomic sequences + pattern: "*.{fasta,fa,faa,fas,fsa}" +output: + - gtf: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.gtf": + type: file + description: GTF file resulting from the conversion of the GFF input file if + '-T' argument is present + pattern: "*.{gtf}" + - gffread_gff: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.gff3": + type: file + description: GFF3 file resulting from the conversion of the GFF input file if + '-T' argument is absent + pattern: "*.gff3" + - gffread_fasta: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.fasta": + type: file + description: Fasta file produced when either of '-w', '-x', '-y' parameters + is present + pattern: "*.fasta" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" +maintainers: + - "@edmundmiller" + - "@gallvp" diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test new file mode 100644 index 00000000..4cd13dcd --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -0,0 +1,223 @@ +nextflow_process { + + name "Test Process GFFREAD" + script "../main.nf" + process "GFFREAD" + + tag "gffread" + tag "modules_nfcore" + tag "modules" + + test("sarscov2-gff3-gtf") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gffread_gff == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gtf-stub") { + + options '-stub' + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gffread_gff == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gff3") { + + config "./nextflow-gff3.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gff3-stub") { + + options '-stub' + config "./nextflow-gff3.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-fasta") { + + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-fasta-stub") { + + options '-stub' + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-fasta-fail-catch") { + + options '-stub' + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'genome'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert ! process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + +} diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap new file mode 100644 index 00000000..15262320 --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test.snap @@ -0,0 +1,272 @@ +{ + "sarscov2-gff3-gtf": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" + ] + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:48:56.496187" + }, + "sarscov2-gff3-gff3": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" + ] + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:49:00.892782" + }, + "sarscov2-gff3-gtf-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:26.975666" + }, + "sarscov2-gff3-fasta-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gffread_gff": [ + + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:44.34792" + }, + "sarscov2-gff3-gff3-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:35.221671" + }, + "sarscov2-gff3-fasta": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" + ] + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + [ + { + "id": "test" + }, + "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" + ] + ], + "gffread_gff": [ + + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:54:02.88143" + } +} \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/nextflow-fasta.config b/modules/nf-core/gffread/tests/nextflow-fasta.config new file mode 100644 index 00000000..ac6cb148 --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow-fasta.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-w -S' + } +} diff --git a/modules/nf-core/gffread/tests/nextflow-gff3.config b/modules/nf-core/gffread/tests/nextflow-gff3.config new file mode 100644 index 00000000..afe0830e --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow-gff3.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '' + } +} diff --git a/modules/nf-core/gffread/tests/nextflow.config b/modules/nf-core/gffread/tests/nextflow.config new file mode 100644 index 00000000..74b25094 --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-T' + } +} diff --git a/modules/nf-core/gffread/tests/tags.yml b/modules/nf-core/gffread/tests/tags.yml new file mode 100644 index 00000000..05576065 --- /dev/null +++ b/modules/nf-core/gffread/tests/tags.yml @@ -0,0 +1,2 @@ +gffread: + - modules/nf-core/gffread/** diff --git a/modules/nf-core/gnu/sort/environment.yml b/modules/nf-core/gnu/sort/environment.yml new file mode 100644 index 00000000..babcfb55 --- /dev/null +++ b/modules/nf-core/gnu/sort/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::coreutils=9.3 diff --git a/modules/nf-core/gnu/sort/main.nf b/modules/nf-core/gnu/sort/main.nf index b0a57fbb..e1167666 100644 --- a/modules/nf-core/gnu/sort/main.nf +++ b/modules/nf-core/gnu/sort/main.nf @@ -1,11 +1,11 @@ process GNU_SORT { - tag "${meta.id}" + tag "$meta.id" label "process_low" - conda "bioconda::coreutils=8.25" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/coreutils:8.25--1' : - 'biocontainers/coreutils:8.25--1' }" + 'https://depot.galaxyproject.org/singularity/coreutils:9.3': + 'biocontainers/coreutils:9.3' }" input: tuple val(meta), path(input) @@ -22,7 +22,7 @@ process GNU_SORT { def prefix = task.ext.prefix ?: "${meta.id}" suffix = task.ext.suffix ?: "${input.extension}" output_file = "${prefix}.${suffix}" - def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = "9.3" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. if ("$input" == "$output_file") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ sort ${args} ${input} > ${output_file} @@ -34,15 +34,14 @@ process GNU_SORT { """ stub: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" suffix = task.ext.suffix ?: "${input.extension}" output_file = "${prefix}.${suffix}" - def VERSION = "9.1" + def VERSION = "9.3" if ("$input" == "$output_file") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ - sort ${args} ${input} > ${output_file} + touch ${output_file} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gnu/sort/meta.yml b/modules/nf-core/gnu/sort/meta.yml index e7fb0284..c555dbb5 100644 --- a/modules/nf-core/gnu/sort/meta.yml +++ b/modules/nf-core/gnu/sort/meta.yml @@ -1,4 +1,4 @@ -name: "GNU_SORT" +name: "gnu_sort" description: | Writes a sorted concatenation of file/s keywords: @@ -11,32 +11,30 @@ tools: homepage: "https://github.com/vgl-hub/gfastats" documentation: "https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html" licence: ["GPL"] - + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: Draft assembly file - pattern: "*.{txt,bed,interval,genome,bins}" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Draft assembly file + pattern: "*.{txt,bed,interval,genome,bins}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - sorted: - type: file - description: The sorted txt file generated by sort - pattern: "*.{txt,bed,interval,genome,bins}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@DLBPointon" +maintainers: + - "@DLBPointon" diff --git a/modules/nf-core/gnu/sort/tests/main.nf.test b/modules/nf-core/gnu/sort/tests/main.nf.test new file mode 100644 index 00000000..e4030187 --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/main.nf.test @@ -0,0 +1,120 @@ +nextflow_process { + + name "Test Process GNU_SORT" + script "modules/nf-core/gnu/sort/main.nf" + process "GNU_SORT" + + tag "modules" + tag "modules_nfcore" + tag "gnu" + tag "gnu/sort" + + test("unsorted_genome_sort") { + config "./sort_simple_bed.config" + + when { + process { + """ + input[0] = [ + [id:'genome_test'], + file(params.test_data['generic']['unsorted_data']['unsorted_text']['genome_file'], + checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.sorted[0][1]).name + ).match("genome_sort") + } + ) + } + + } + + test("unsorted_intervals_sort") { + config "./sort_simple_bed.config" + when { + process { + """ + input[0] = [ + [id:'test'], + file(params.test_data['generic']['unsorted_data']['unsorted_text']['intervals'], + checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.sorted[0][1]).name + ).match("interval_sort") + } + ) + } + + } + + test("unsorted_csv_sort") { + config "./sort_complex.config" + + when { + process { + """ + input[0] = [ + [id:'test'], + file(params.test_data['generic']['unsorted_data']['unsorted_text']['numbers_csv'], + checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.sorted[0][1]).name + ).match("csv_sort") + } + ) + } + + } + + test("unsorted_csv_sort_stub") { + config "./sort_complex.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [id:'test'], + file(params.test_data['generic']['unsorted_data']['unsorted_text']['numbers_csv'], + checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + +} diff --git a/modules/nf-core/gnu/sort/tests/main.nf.test.snap b/modules/nf-core/gnu/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..63891bc4 --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/main.nf.test.snap @@ -0,0 +1,164 @@ +{ + "unsorted_csv_sort": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.csv.sorted:md5,0b52d1b4c4a0c6e972c6f94aafd75a1d" + ] + ], + "1": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test.csv.sorted:md5,0b52d1b4c4a0c6e972c6f94aafd75a1d" + ] + ], + "versions": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:44.714632791" + }, + "interval_sort": { + "content": [ + "test.bed.sorted" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:37.962807086" + }, + "unsorted_csv_sort_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.csv.sorted:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test.csv.sorted:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:51.456258705" + }, + "csv_sort": { + "content": [ + "test.csv.sorted" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:44.725431761" + }, + "unsorted_genome_sort": { + "content": [ + { + "0": [ + [ + { + "id": "genome_test" + }, + "genome_test.bed.sorted:md5,fd97f7efafdbbfa71d9b560f10b4b048" + ] + ], + "1": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ], + "sorted": [ + [ + { + "id": "genome_test" + }, + "genome_test.bed.sorted:md5,fd97f7efafdbbfa71d9b560f10b4b048" + ] + ], + "versions": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:31.041778719" + }, + "genome_sort": { + "content": [ + "genome_test.bed.sorted" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:31.060201722" + }, + "unsorted_intervals_sort": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed.sorted:md5,abbce903ef263d38b2f71856387799ab" + ] + ], + "1": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test.bed.sorted:md5,abbce903ef263d38b2f71856387799ab" + ] + ], + "versions": [ + "versions.yml:md5,dd412503ec9dd665203e083ea44326cb" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-14T11:13:37.951397547" + } +} \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/tests/sort_complex.config b/modules/nf-core/gnu/sort/tests/sort_complex.config new file mode 100644 index 00000000..103eaaf6 --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/sort_complex.config @@ -0,0 +1,6 @@ +process { + withName: GNU_SORT { + ext.args = { "-t ';' -g -k 1,1 -k 2,2" } + ext.suffix = { "csv.sorted" } + } +} \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/tests/sort_simple_bed.config b/modules/nf-core/gnu/sort/tests/sort_simple_bed.config new file mode 100644 index 00000000..d7d52e0f --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/sort_simple_bed.config @@ -0,0 +1,6 @@ +process { + withName: GNU_SORT { + ext.args = { "-k1,1 -k2,2n" } + ext.suffix = { "bed.sorted" } + } +} \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/tests/sort_simple_genome.config b/modules/nf-core/gnu/sort/tests/sort_simple_genome.config new file mode 100644 index 00000000..4dcec385 --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/sort_simple_genome.config @@ -0,0 +1,6 @@ +process { + withName: GNU_SORT { + ext.args = { "-k1,1 -k2,2n" } + ext.suffix = { "genome.sorted" } + } +} \ No newline at end of file diff --git a/modules/nf-core/gnu/sort/tests/tags.yml b/modules/nf-core/gnu/sort/tests/tags.yml new file mode 100644 index 00000000..ac40e376 --- /dev/null +++ b/modules/nf-core/gnu/sort/tests/tags.yml @@ -0,0 +1,2 @@ +gnu/sort: + - "modules/nf-core/gnu/sort/**" diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 00000000..c7794856 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index e7189d2f..5e67e3b9 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -2,10 +2,10 @@ process GUNZIP { tag "$archive" label 'process_single' - conda "conda-forge::sed=4.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" input: tuple val(meta), path(archive) @@ -18,13 +18,20 @@ process GUNZIP { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ - gunzip \\ - -f \\ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ $args \\ - $archive + $archive \\ + > $gunzip cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -33,7 +40,11 @@ process GUNZIP { """ stub: - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ touch $gunzip cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 4cdcdf4c..9066c035 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -10,26 +10,38 @@ tools: gzip is a file format and a software application used for file compression and decompression. documentation: https://www.gnu.org/software/gzip/manual/gzip.html licence: ["GPL-3.0-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" + - - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" output: - gunzip: - type: file - description: Compressed/uncompressed file - pattern: "*.*" + - meta: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - $gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 00000000..776211ad --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,121 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 00000000..069967e7 --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 00000000..dec77642 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 00000000..fd3f6915 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/merquryfk/merquryfk/main.nf b/modules/nf-core/merquryfk/merquryfk/main.nf deleted file mode 100644 index 43bfe468..00000000 --- a/modules/nf-core/merquryfk/merquryfk/main.nf +++ /dev/null @@ -1,59 +0,0 @@ -process MERQURYFK_MERQURYFK { - tag "$meta.id" - label 'process_medium' - - // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - container 'quay.io/sanger-tol/fastk:1.0.1-c1' - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "MERQURYFK_MERQURYFK module does not support Conda. Please use Docker / Singularity / Podman instead." - } - - input: - tuple val(meta), path(fastk_hist), path(fastk_ktab), path(assembly), path(haplotigs) - - output: - tuple val(meta), path("${prefix}.completeness.stats") , emit: stats - tuple val(meta), path("${prefix}.*_only.bed") , emit: bed - tuple val(meta), path("${prefix}.*.qv") , emit: assembly_qv - tuple val(meta), path("${prefix}.*.spectra-cn.fl.png"), emit: spectra_cn_fl_png, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.fl.pdf"), emit: spectra_cn_fl_pdf, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.ln.png"), emit: spectra_cn_ln_png, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.ln.pdf"), emit: spectra_cn_ln_pdf, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.st.png"), emit: spectra_cn_st_png, optional: true - tuple val(meta), path("${prefix}.*.spectra-cn.st.pdf"), emit: spectra_cn_st_pdf, optional: true - tuple val(meta), path("${prefix}.qv") , emit: qv - tuple val(meta), path("${prefix}.spectra-asm.fl.png") , emit: spectra_asm_fl_png, optional: true - tuple val(meta), path("${prefix}.spectra-asm.fl.pdf") , emit: spectra_asm_fl_pdf, optional: true - tuple val(meta), path("${prefix}.spectra-asm.ln.png") , emit: spectra_asm_ln_png, optional: true - tuple val(meta), path("${prefix}.spectra-asm.ln.pdf") , emit: spectra_asm_ln_pdf, optional: true - tuple val(meta), path("${prefix}.spectra-asm.st.png") , emit: spectra_asm_st_png, optional: true - tuple val(meta), path("${prefix}.spectra-asm.st.pdf") , emit: spectra_asm_st_pdf, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def FASTK_VERSION = '427104ea91c78c3b8b8b49f1a7d6bbeaa869ba1c' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - def MERQURY_VERSION = 'd00d98157618f4e8d1a9190026b19b471055b22e' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - MerquryFK \\ - $args \\ - -T$task.cpus \\ - ${fastk_ktab.find{ it.toString().endsWith(".ktab") }} \\ - $assembly \\ - $haplotigs \\ - $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastk: $FASTK_VERSION - merquryfk: $MERQURY_VERSION - r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff b/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff deleted file mode 100644 index c98c6057..00000000 --- a/modules/nf-core/merquryfk/merquryfk/merquryfk-merquryfk.diff +++ /dev/null @@ -1,25 +0,0 @@ -Changes in module 'nf-core/merquryfk/merquryfk' ---- modules/nf-core/merquryfk/merquryfk/main.nf -+++ modules/nf-core/merquryfk/merquryfk/main.nf -@@ -3,7 +3,7 @@ - label 'process_medium' - - // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. -- container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.2' -+ container 'quay.io/sanger-tol/fastk:1.0.1-c1' - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { -@@ -38,8 +38,8 @@ - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" -- def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. -- def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. -+ def FASTK_VERSION = '427104ea91c78c3b8b8b49f1a7d6bbeaa869ba1c' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. -+ def MERQURY_VERSION = 'd00d98157618f4e8d1a9190026b19b471055b22e' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - MerquryFK \\ - $args \\ - -************************************************************ diff --git a/modules/nf-core/merquryfk/merquryfk/meta.yml b/modules/nf-core/merquryfk/merquryfk/meta.yml deleted file mode 100644 index ad89f8ba..00000000 --- a/modules/nf-core/merquryfk/merquryfk/meta.yml +++ /dev/null @@ -1,113 +0,0 @@ -name: "merquryfk_merquryfk" -description: FastK based version of Merqury -keywords: - - sort -tools: - - "merquryfk": - description: "FastK based version of Merqury" - homepage: "https://github.com/thegenemyers/MERQURY.FK" - - tool_dev_url: "https://github.com/thegenemyers/MERQURY.FK" - - licence: "https://github.com/thegenemyers/MERQURY.FK/blob/main/LICENSE" - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fastk_hist: - type: file - description: A histogram files from the program FastK - pattern: "*.hist" - - fastk_ktab: - type: file - description: Histogram ktab files from the program FastK (option -t) - pattern: "*.ktab*" - - assembly: - type: file - description: Genome (primary) assembly files (fasta format) - pattern: ".fasta" - - haplotigs: - type: file - description: Assembly haplotigs (fasta format) - pattern: ".fasta" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - stats: - type: file - description: Assembly statistics file - pattern: "*.completeness.stats" - - bed: - type: file - description: Assembly only kmer positions not supported by reads in bed format - pattern: "*_only.bed" - - spectra_cn_fl_png: - type: file - description: "Unstacked copy number spectra filled plot in PNG format" - pattern: "*.spectra-cn.fl.png" - - spectra_cn_ln_png: - type: file - description: "Unstacked copy number spectra line plot in PNG format" - pattern: "*.spectra-cn.ln.png" - - spectra_cn_st_png: - type: file - description: "Stacked copy number spectra line plot in PNG format" - pattern: "*.spectra-cn.st.png" - - spectra_asm_fl_png: - type: file - description: "Unstacked assembly spectra filled plot in PNG format" - pattern: "*.spectra-asm.fl.png" - - spectra_asm_ln_png: - type: file - description: "Unstacked assembly spectra line plot in PNG format" - pattern: "*.spectra-asm.ln.png" - - spectra_asm_st_png: - type: file - description: "Stacked assembly spectra line plot in PNG format" - pattern: "*.spectra-asm.st.png" - - spectra_cn_fl_pdf: - type: file - description: "Unstacked copy number spectra filled plot in PDF format" - pattern: "*.spectra-cn.fl.pdf" - - spectra_cn_ln_pdf: - type: file - description: "Unstacked copy number spectra line plot in PDF format" - pattern: "*.spectra-cn.ln.pdf" - - spectra_cn_st_pdf: - type: file - description: "Stacked copy number spectra line plot in PDF format" - pattern: "*.spectra-cn.st.pdf" - - spectra_asm_fl_pdf: - type: file - description: "Unstacked assembly spectra filled plot in PDF format" - pattern: "*.spectra-asm.fl.pdf" - - spectra_asm_ln_pdf: - type: file - description: "Unstacked assembly spectra line plot in PDF format" - pattern: "*.spectra-asm.ln.pdf" - - spectra_asm_st_pdf: - type: file - description: "Stacked assembly spectra line plot in PDF format" - pattern: "*.spectra-asm.st.pdf" - - assembly_qv: - type: file - description: "error and qv table for each scaffold of the assembly" - pattern: "*.qv" - - qv: - type: file - description: "error and qv of each assembly as a whole" - pattern: "*.qv" - -authors: - - "@mahesh-panchal" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..6f5b867b --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::multiqc=1.25.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387be..cc0643e1 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,16 +1,18 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" path(multiqc_config) path(extra_multiqc_config) path(multiqc_logo) + path(replace_names) + path(sample_names) output: path "*multiqc_report.html", emit: report @@ -23,14 +25,22 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ $args \\ $config \\ + $prefix \\ $extra_config \\ + $logo \\ + $replace \\ + $samples \\ . cat <<-END_VERSIONS > versions.yml @@ -41,8 +51,8 @@ process MULTIQC { stub: """ - touch multiqc_data - touch multiqc_plots + mkdir multiqc_data + mkdir multiqc_plots touch multiqc_report.html cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee5..b16c1879 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,6 +1,6 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC -description: Aggregate results from bioinformatics analyses across many samples into a single report +name: multiqc +description: Aggregate results from bioinformatics analyses across many samples into + a single report keywords: - QC - bioinformatics tools @@ -13,44 +13,66 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - + identifier: biotools:multiqc input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. - pattern: "*.{yml,yaml}" - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" - + - - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + - - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" + - "*multiqc_report.html": + type: file + description: MultiQC report file + pattern: "multiqc_report.html" - data: - type: directory - description: MultiQC data dir - pattern: "multiqc_data" + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@abhi18av" - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..33316a7d --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + config "./nextflow.config" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..2fcbb5ff --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:51:46.317523" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:52:20.680978" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-02T17:52:09.185842" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config new file mode 100644 index 00000000..c537a6a3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = null + } +} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 00000000..02cda6e6 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index b87369e5..a6941e63 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -2,24 +2,26 @@ process SAMTOOLS_VIEW { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9e/9edc2564215d5cd137a8b25ca8a311600987186d406b092022444adf3c4447f7/data' : + 'community.wave.seqera.io/library/htslib_samtools:1.21--6cb89bfd40cbaabf' }" input: tuple val(meta), path(input), path(index) - path fasta + tuple val(meta2), path(fasta) path qname output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.sam"), emit: sam, optional: true - tuple val(meta), path("*.bai"), emit: bai, optional: true - tuple val(meta), path("*.csi"), emit: csi, optional: true - tuple val(meta), path("*.crai"), emit: crai, optional: true - path "versions.yml", emit: versions + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}.{bai,csi,crsi}"), emit: unselected_index, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -27,13 +29,13 @@ process SAMTOOLS_VIEW { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" - def readnames = qname ? "--qname-file ${qname}": "" - def file_type = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt bam") ? "bam" : - args.contains("--output-fmt cram") ? "cram" : - input.getExtension() + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": "" if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ samtools \\ @@ -53,10 +55,19 @@ process SAMTOOLS_VIEW { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + index = args.contains("--write-index") ? "touch ${prefix}.${file_type}.csi" : "" + """ - touch ${prefix}.bam - touch ${prefix}.cram + touch ${prefix}.${file_type} + ${index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index 76916033..caa7b015 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -15,65 +15,127 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - index: - type: optional file - description: BAM.BAI/BAM.CSI/CRAM.CRAI file - pattern: "*.{.bai,.csi,.crai}" - - fasta: - type: optional file - description: Reference file the CRAM was created with - pattern: "*.{fasta,fa}" - - qname: - type: file - description: Optional file with read names to output only select alignments - pattern: "*.{txt,list}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bam: - type: file - description: optional filtered/converted BAM file - pattern: "*.{bam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" - cram: - type: file - description: optional filtered/converted CRAM file - pattern: "*.{cram}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" - sam: - type: file - description: optional filtered/converted SAM file - pattern: "*.{sam}" - # bai, csi, and crai are created with `--write-index` + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" - bai: - type: file - description: optional BAM file index - pattern: "*.{bai}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" - csi: - type: file - description: optional tabix BAM file index - pattern: "*.{csi}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" - crai: - type: file - description: optional CRAM file index - pattern: "*.{crai}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + - unselected: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}: + type: file + description: optional file with unselected alignments + pattern: "*.unselected.{bam,cram,sam}" + - unselected_index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}.{bai,csi,crsi}: + type: file + description: index for the "unselected" file + pattern: "*.unselected.{bai,csi,crai}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@joseespinosa" - "@FriederikeHanssen" - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/view/tests/bam.config b/modules/nf-core/samtools/view/tests/bam.config new file mode 100644 index 00000000..c10d1081 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/bam_index.config b/modules/nf-core/samtools/view/tests/bam_index.config new file mode 100644 index 00000000..771ae033 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam --write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test new file mode 100644 index 00000000..37b81a91 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -0,0 +1,214 @@ +nextflow_process { + + name "Test Process SAMTOOLS_VIEW" + script "../main.nf" + process "SAMTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/view" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.bai).match("bam_bai") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.sam).match("bam_sam") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_cram") }, + { assert snapshot(process.out.bai).match("cram_bai") }, + { assert snapshot(process.out.bam).match("cram_bam") }, + { assert snapshot(process.out.crai).match("cram_crai") }, + { assert snapshot(process.out.csi).match("cram_csi") }, + { assert snapshot(process.out.sam).match("cram_sam") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } + + test("cram_to_bam") { + + config "./bam.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_bam") }, + { assert snapshot(process.out.bai).match("cram_to_bam_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_cram") }, + { assert snapshot(process.out.csi).match("cram_to_bam_csi") }, + { assert snapshot(process.out.sam).match("cram_to_bam_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + ) + } + } + + test("cram_to_bam_index") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_versions") } + ) + } + } + + test("cram_to_bam_index_qname") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_qname_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_qname_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_qname_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(file(process.out.unselected[0][1]).name).match("cram_to_bam_index_qname_unselected") }, + { assert snapshot(file(process.out.unselected_index[0][1]).name).match("cram_to_bam_index_qname_unselected_csi") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } + ) + } + } + + test("bam_stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bam_stub_csi") }, + { assert snapshot(process.out.bai).match("bam_stub_bai") }, + { assert snapshot(process.out.crai).match("bam_stub_crai") }, + { assert snapshot(process.out.cram).match("bam_stub_cram") }, + { assert snapshot(process.out.sam).match("bam_stub_sam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap new file mode 100644 index 00000000..63849b03 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -0,0 +1,528 @@ +{ + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.256068" + }, + "cram_to_bam_index_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.958617" + }, + "bam_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.065301" + }, + "bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.258578" + }, + "bam_stub_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.071284" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:26:24.461775464" + }, + "cram_to_bam_index_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.972288" + }, + "cram_to_bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.999247" + }, + "cram_to_bam_index_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.976457" + }, + "cram_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.497581" + }, + "cram_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.50038" + }, + "cram_to_bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.992239" + }, + "cram_to_bam_index_qname_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.325496" + }, + "bam_stub_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.079529" + }, + "cram_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.490286" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.262882" + }, + "cram_to_bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.989247" + }, + "cram_to_bam_index_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.967681" + }, + "cram_to_bam_index_qname_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:25:51.953436682" + }, + "cram_to_bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.982361" + }, + "cram_to_bam_index_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.95456" + }, + "cram_to_bam_index_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:25:14.475388399" + }, + "cram_to_bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.98601" + }, + "cram_to_bam_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:24:49.673441798" + }, + "cram_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.495512" + }, + "bam_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.076908" + }, + "cram_to_bam_index_qname_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "cram_to_bam_index_qname_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.330789" + }, + "cram_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.493129" + }, + "bam_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.074313" + }, + "cram_to_bam_index_qname_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:23:27.151650338" + }, + "cram_to_bam_index_qname_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.333248" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.259774" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.261287" + }, + "cram_to_bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.995454" + }, + "cram_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.502625" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:24:12.95416913" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.264651" + }, + "cram_to_bam_index_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.962863" + }, + "cram_to_bam_index_qname_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.337634" + }, + "bam_stub_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.068596" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/tags.yml b/modules/nf-core/samtools/view/tests/tags.yml new file mode 100644 index 00000000..4fdf1dd1 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/view: + - "modules/nf-core/samtools/view/**" diff --git a/nextflow.config b/nextflow.config index 0785357c..636d52e5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,7 @@ params { input = null binsize = 1000 kmer_size = 31 + annotation_set = null // Metadata assembly = null @@ -242,7 +243,7 @@ manifest { description = """Creating standarised genome assembly publications""" mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '2.0.0' + version = '2.1.0' doi = '10.5281/zenodo.7949384' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 881561d4..24b4e33d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -136,6 +136,15 @@ "type": "string", "description": "The name for the namespace used in the Kubernetes cluster running the HiGlass server.", "hidden": true + }, + "annotation_set": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^(?:[\\w/]+/)?\\S+\\.gff3?(?:\\.gz)?$", + "description": "Path to annotation file :gff.", + "help_text": "You will need to specify the path to the annotation file as eith gff or gff.gz.", + "fa_icon": "fas fa-file-code" } } }, diff --git a/subworkflows/local/annotation_statistics.nf b/subworkflows/local/annotation_statistics.nf new file mode 100644 index 00000000..1cc5d64a --- /dev/null +++ b/subworkflows/local/annotation_statistics.nf @@ -0,0 +1,87 @@ +// include modules from nf-core + +include { AGAT_SPSTATISTICS } from '../../modules/nf-core/agat/spstatistics/main' +include { AGAT_SQSTATBASIC } from '../../modules/nf-core/agat/sqstatbasic/main' +include { GUNZIP } from '../../modules/nf-core/gunzip/main' +include { EXTRACT_ANNOTATION_STATISTICS_INFO } from '../../modules/local/extract_annotation_statistics_info.nf' +include { BUSCO_BUSCO as BUSCOPROTEINS } from '../../modules/nf-core/busco/busco/main' +include { NCBIDATASETS_SUMMARYGENOME as SUMMARYGENOME } from '../../modules/local/ncbidatasets/summarygenome' +include { GFFREAD } from '../../modules/nf-core/gffread/main' +include { NCBI_GET_ODB } from '../../modules/local/ncbidatasets/get_odb' + +workflow ANNOTATION_STATISTICS { + + take: + gff // channel: /path/to/annotation file + genome // channel: [ meta, fasta ] + lineage_tax_ids // channel: /path/to/lineage_tax_ids + lineage_db // channel: /path/to/buscoDB + + main: + ch_versions = Channel.empty() + + // Map the GFF channel to create a tuple with metadata and the file + gff + | map { file -> + [ [ 'id': params.assembly + '_annotation', 'ext': "gff", 'filename': file.baseName ], file ] + } + | set { ch_gff_tupple } + + // Uncompress the gff files if needed + if (params.annotation_set.endsWith('.gz')) { + ch_gff_unzipped = GUNZIP(ch_gff_tupple).gunzip + ch_versions = ch_versions.mix ( GUNZIP.out.versions.first() ) + } else { + ch_gff_unzipped = ch_gff_tupple + } + + // Basic Annotation summary statistics + AGAT_SQSTATBASIC(ch_gff_unzipped) + ch_versions = ch_versions.mix ( AGAT_SQSTATBASIC.out.versions.first() ) + + // Other annotation statistics + AGAT_SPSTATISTICS(ch_gff_unzipped) + ch_versions = ch_versions.mix ( AGAT_SPSTATISTICS.out.versions.first() ) + + genome + | map { meta, fasta -> fasta } + | set { ch_fasta } + + // Obtaining the protein fasta file from the gff3 + GFFREAD(ch_gff_unzipped, ch_fasta) + ch_versions = ch_versions.mix ( GFFREAD.out.versions.first() ) + + // Genome summary statistics + SUMMARYGENOME ( genome ) + ch_versions = ch_versions.mix ( SUMMARYGENOME.out.versions.first() ) + + // Get ODB lineage value + NCBI_GET_ODB ( SUMMARYGENOME.out.summary, lineage_tax_ids ) + ch_versions = ch_versions.mix ( NCBI_GET_ODB.out.versions.first() ) + + // BUSCO + NCBI_GET_ODB.out.csv + | map { meta, csv -> csv } + | splitCsv() + | map { row -> row[1] } + | set { ch_lineage} + + // Running BUSCO in protein mode + BUSCOPROTEINS(GFFREAD.out.gffread_fasta, 'proteins', ch_lineage, lineage_db.ifEmpty([]), [] ) + ch_versions = ch_versions.mix ( BUSCOPROTEINS.out.versions.first() ) + + // Parsing the stats_txt files as input channels + EXTRACT_ANNOTATION_STATISTICS_INFO( + AGAT_SQSTATBASIC.out.stats_txt, + AGAT_SPSTATISTICS.out.stats_txt, + BUSCOPROTEINS.out.short_summaries_json + ) + + ch_versions = ch_versions.mix( EXTRACT_ANNOTATION_STATISTICS_INFO.out.versions.first() ) + + emit: + summary = EXTRACT_ANNOTATION_STATISTICS_INFO.out.csv // channel: [ csv ] + versions = ch_versions // channel: [ versions.yml ] + +} + diff --git a/subworkflows/local/combine_note_data.nf b/subworkflows/local/combine_note_data.nf index b9a34611..b6e405c1 100644 --- a/subworkflows/local/combine_note_data.nf +++ b/subworkflows/local/combine_note_data.nf @@ -13,6 +13,7 @@ workflow COMBINE_NOTE_DATA { ch_params_consistent // channel: /path/to/csv/file/consistent_parameters from GENOME_METADATA subworkflow ch_params_inconsistent // channel: /path/to/csv/file/consistent_parameters from GENOME_METADATA subworkflow ch_summary // channel: /path/to/csv/summary/file from GENOME_STATISTICS subworkflow + ch_annotation_summary // channel: /path/to/csv/summary/file from ANNOTATION_STATISTICS subworkflow ch_higlass // channel: /path/to/csv/higlass_link from CONTACT_MAPS subworkflow ch_note_template // channel: /path/to/genome_note_doc_template @@ -30,11 +31,11 @@ workflow COMBINE_NOTE_DATA { } | set { ch_summary_meta } + PARSE_METADATA(ch_summary_meta) ch_versions = ch_versions.mix( PARSE_METADATA.out.versions.first() ) - - COMBINE_STATISTICS_AND_METADATA(ch_params_consistent, ch_params_inconsistent, PARSE_METADATA.out.file_path) + COMBINE_STATISTICS_AND_METADATA(ch_params_consistent, ch_params_inconsistent, PARSE_METADATA.out.file_path, ch_annotation_summary) ch_versions = ch_versions.mix( COMBINE_STATISTICS_AND_METADATA.out.versions.first() ) ch_higlass diff --git a/subworkflows/local/contact_maps.nf b/subworkflows/local/contact_maps.nf index 5d06d680..6c720f59 100644 --- a/subworkflows/local/contact_maps.nf +++ b/subworkflows/local/contact_maps.nf @@ -33,12 +33,7 @@ workflow CONTACT_MAPS { // CRAM to BAM - genome - | map { meta, fasta -> fasta } - | first - | set { ch_fasta } - - SAMTOOLS_VIEW ( reads, ch_fasta, [] ) + SAMTOOLS_VIEW ( reads, genome.first(), [] ) ch_versions = ch_versions.mix ( SAMTOOLS_VIEW.out.versions.first() ) @@ -72,7 +67,7 @@ workflow CONTACT_MAPS { GET_CHROMLIST.out.list | map { meta, list -> list } | first - | set { ch_chromsizes } + | set { ch_chromsizes } COOLER_CLOAD ( ch_cooler, ch_chromsizes ) ch_versions = ch_versions.mix ( COOLER_CLOAD.out.versions.first() ) @@ -90,7 +85,7 @@ workflow CONTACT_MAPS { // Create the `.genome` file COOLER_CLOAD.out.cool | map { meta, cool, bin -> [ meta, cool, [] ] } - | set { ch_dump } + | set { ch_dump } COOLER_DUMP ( ch_dump ) ch_versions = ch_versions.mix ( COOLER_DUMP.out.versions.first() ) @@ -101,7 +96,7 @@ workflow CONTACT_MAPS { if ( params.upload_higlass_data ) { UPLOAD_HIGLASS_DATA (COOLER_ZOOMIFY.out.mcool, COOLER_DUMP.out.bedpe, params.higlass_data_project_dir, params.higlass_upload_directory ) ch_versions = ch_versions.mix ( UPLOAD_HIGLASS_DATA.out.versions.first() ) - + GENERATE_HIGLASS_LINK (UPLOAD_HIGLASS_DATA.out.file_name, UPLOAD_HIGLASS_DATA.out.map_uuid, UPLOAD_HIGLASS_DATA.out.grid_uuid, params.higlass_url, UPLOAD_HIGLASS_DATA.out.genome_file) ch_versions = ch_versions.mix ( GENERATE_HIGLASS_LINK.out.versions.first() ) ch_higlass_link = ch_higlass_link.mix ( GENERATE_HIGLASS_LINK.out.higlass_link.first() ) diff --git a/subworkflows/local/genome_metadata.nf b/subworkflows/local/genome_metadata.nf index 22088de9..b08f24cb 100755 --- a/subworkflows/local/genome_metadata.nf +++ b/subworkflows/local/genome_metadata.nf @@ -8,6 +8,7 @@ include { RUN_WGET } from '../../modules/local/run_wget' include { PARSE_METADATA } from '../../modules/local/parse_metadata' include { COMBINE_METADATA } from '../../modules/local/combine_metadata' include { FETCH_GBIF_METADATA } from '../../modules/local/fetch_gbif_metadata' +include { FETCH_ENSEMBL_METADATA } from '../../modules/local/fetch_ensembl_metadata' workflow GENOME_METADATA { @@ -92,11 +93,24 @@ workflow GENOME_METADATA { | map { it -> tuple( it )} | set { ch_gbif } + + ch_file_list + | map { meta, it -> + def assembly = meta.id + def taxon_id = meta.taxon_id + [assembly, taxon_id] + } + | set { ch_ensembl_params} + + // Query Ensembl Metadata API to see if this species has been annotated + FETCH_ENSEMBL_METADATA ( ch_ensembl_params ) + ch_versions = ch_versions.mix( FETCH_ENSEMBL_METADATA.out.versions.first() ) + PARSE_METADATA.out.file_path | map { it -> tuple( it[1] )} | set { ch_parsed } - ch_parsed.mix(ch_gbif) + ch_parsed.mix(ch_gbif, FETCH_ENSEMBL_METADATA.out.file_path) | collect | map { it -> [ it ] diff --git a/subworkflows/local/genome_statistics.nf b/subworkflows/local/genome_statistics.nf index 73a7e2ba..758e5920 100644 --- a/subworkflows/local/genome_statistics.nf +++ b/subworkflows/local/genome_statistics.nf @@ -5,12 +5,13 @@ include { NCBIDATASETS_SUMMARYGENOME as SUMMARYGENOME } from '../../modules/local/ncbidatasets/summarygenome' include { NCBIDATASETS_SUMMARYGENOME as SUMMARYSEQUENCE } from '../../modules/local/ncbidatasets/summarygenome' include { NCBI_GET_ODB } from '../../modules/local/ncbidatasets/get_odb' -include { BUSCO } from '../../modules/nf-core/busco/main' +include { BUSCO_BUSCO as BUSCO } from '../../modules/nf-core/busco/busco/main' include { RESTRUCTUREBUSCODIR } from '../../modules/local/restructurebuscodir' include { FASTK_FASTK } from '../../modules/nf-core/fastk/fastk/main' -include { MERQURYFK_MERQURYFK } from '../../modules/nf-core/merquryfk/merquryfk/main' include { CREATETABLE } from '../../modules/local/createtable' +// This is only temporarily removed so I'm leaving it here for now +//include { MERQURYFK_MERQURYFK } from '../../modules/nf-core/merquryfk/merquryfk/main' workflow GENOME_STATISTICS { take: @@ -46,7 +47,7 @@ workflow GENOME_STATISTICS { | splitCsv() | map { row -> row[1] } | set { ch_lineage } - + BUSCO ( genome, "genome", ch_lineage, lineage_db.ifEmpty([]), [] ) ch_versions = ch_versions.mix ( BUSCO.out.versions.first() ) @@ -87,7 +88,7 @@ workflow GENOME_STATISTICS { FASTK_FASTK.out.hist | join ( FASTK_FASTK.out.ktab ) | set { ch_combo } - + ch_pacbio.dir | map { meta, dir -> [ meta, @@ -95,7 +96,7 @@ workflow GENOME_STATISTICS { dir.listFiles().findAll { it.toString().contains(".ktab") } .collect(), ] } | set { ch_grab } - + ch_combo | mix ( ch_grab ) | combine ( genome ) @@ -103,26 +104,28 @@ workflow GENOME_STATISTICS { | set { ch_merq } - // MerquryFK - MERQURYFK_MERQURYFK ( ch_merq ) - ch_versions = ch_versions.mix ( MERQURYFK_MERQURYFK.out.versions.first() ) + // This is only temporarily removed so I'm leaving it here for now + // // MerquryFK + // MERQURYFK_MERQURYFK ( ch_merq, [], [] ) + // ch_versions = ch_versions.mix ( MERQURYFK_MERQURYFK.out.versions.first() ) // Combined table SUMMARYGENOME.out.summary | join ( SUMMARYSEQUENCE.out.summary ) | set { ch_summary } - + BUSCO.out.short_summaries_json | ifEmpty ( [ [], [] ] ) | set { ch_busco } - - MERQURYFK_MERQURYFK.out.qv - | join ( MERQURYFK_MERQURYFK.out.stats ) - | map { meta, qv, comp -> [ meta + [ id: "merq" ], qv, comp ] } - | groupTuple () - | ifEmpty ( [ [], [], [] ] ) - | set { ch_merqury } + + // This is only temporarily removed so I'm leaving it here for now + // MERQURYFK_MERQURYFK.out.qv + // | join ( MERQURYFK_MERQURYFK.out.stats ) + // | map { meta, qv, comp -> [ meta + [ id: "merq" ], qv, comp ] } + // | groupTuple () + // | ifEmpty ( [ [], [], [] ] ) + // | set { ch_merqury } flagstat // Queue channel of tuple(meta, file) @@ -135,7 +138,8 @@ workflow GENOME_STATISTICS { // Now channel of tuple(list(meta), list(file)) | set { ch_flagstat } - CREATETABLE ( ch_summary, ch_busco, ch_merqury, ch_flagstat ) + //CREATETABLE ( ch_summary, ch_busco, ch_merqury, ch_flagstat ) + CREATETABLE ( ch_summary, ch_busco, [[], [], []], ch_flagstat ) ch_versions = ch_versions.mix ( CREATETABLE.out.versions.first() ) diff --git a/workflows/genomenote.nf b/workflows/genomenote.nf index 412718af..6ed474aa 100644 --- a/workflows/genomenote.nf +++ b/workflows/genomenote.nf @@ -21,14 +21,16 @@ if (params.fasta) { ch_fasta = Channel.fromPath(params.fasta) } else { exit if (params.binsize) { ch_bin = Channel.of(params.binsize) } else { exit 1, 'Bin size for cooler/cload not specified!' } if (params.kmer_size) { ch_kmer = Channel.of(params.kmer_size) } else { exit 1, 'Kmer library size for fastk not specified' } -if (params.lineage_tax_ids) { ch_lineage_tax_ids = Channel.fromPath(params.lineage_tax_ids) } else { exit 1, 'Mapping BUSCO lineage <-> taxon_ids not specified' } +if (params.lineage_tax_ids) { ch_lineage_tax_ids = Channel.fromPath(params.lineage_tax_ids) } else { exit 1, 'Mapping BUSCO lineage equivalent taxon_ids not specified' } // Check optional parameters if (params.lineage_db) { ch_lineage_db = Channel.fromPath(params.lineage_db) } else { ch_lineage_db = Channel.empty() } -if (params.note_template) { ch_note_template = Channel.fromPath(params.note_template) } else { ch_note_template = Channel.empty() } +if (params.note_template) { ch_note_template = Channel.fromPath(params.note_template) } else { ch_note_template = Channel.empty() } if (params.cool_order) { ch_cool_order = Channel.fromPath(params.cool_order) } else { ch_cool_order = Channel.empty() } if (params.biosample_hic) metadata_inputs.add(params.biosample_hic) else metadata_inputs.add(null) if (params.biosample_rna) metadata_inputs.add(params.biosample_rna) else metadata_inputs.add(null) +if (params.annotation_set) { ch_gff = Channel.fromPath(params.annotation_set) } else { ch_gff = Channel.empty()} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -52,11 +54,12 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { GENOME_METADATA } from '../subworkflows/local/genome_metadata' -include { CONTACT_MAPS } from '../subworkflows/local/contact_maps' -include { GENOME_STATISTICS } from '../subworkflows/local/genome_statistics' -include { COMBINE_NOTE_DATA } from '../subworkflows/local/combine_note_data' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { GENOME_METADATA } from '../subworkflows/local/genome_metadata' +include { CONTACT_MAPS } from '../subworkflows/local/contact_maps' +include { GENOME_STATISTICS } from '../subworkflows/local/genome_statistics' +include { COMBINE_NOTE_DATA } from '../subworkflows/local/combine_note_data' +include { ANNOTATION_STATISTICS } from '../subworkflows/local/annotation_statistics' /* @@ -85,7 +88,7 @@ def multiqc_report = [] workflow GENOMENOTE { ch_versions = Channel.empty() - + ch_annotation_stats = Channel.empty() // // SUBWORKFLOW: Read in samplesheet, validate and stage input files // @@ -150,19 +153,25 @@ workflow GENOMENOTE { ch_versions = ch_versions.mix ( CONTACT_MAPS.out.versions ) // - // SUBWORKFLOW: Combine data from previous steps to create formatted genome note + // SUBWORKFLOW : Obtain feature statistics from the annotation file : GFF // + if ( params.annotation_set ) { + ANNOTATION_STATISTICS (ch_gff, ch_fasta, ch_lineage_tax_ids, ch_lineage_db) + ch_versions = ch_versions.mix ( ANNOTATION_STATISTICS.out.versions ) + ch_annotation_stats = ch_annotation_stats.mix (ANNOTATION_STATISTICS.out.summary) + } - COMBINE_NOTE_DATA (GENOME_METADATA.out.consistent, GENOME_METADATA.out.inconsistent, GENOME_STATISTICS.out.summary, CONTACT_MAPS.out.link, ch_note_template) + // + // SUBWORKFLOW: Combine data from previous steps to create formatted genome note + // + COMBINE_NOTE_DATA (GENOME_METADATA.out.consistent, GENOME_METADATA.out.inconsistent, GENOME_STATISTICS.out.summary, ch_annotation_stats.ifEmpty([[],[]]), CONTACT_MAPS.out.link, ch_note_template) ch_versions = ch_versions.mix ( COMBINE_NOTE_DATA.out.versions ) - // // MODULE: Combine different versions.yml // CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) - // // MODULE: MultiQC // @@ -183,7 +192,9 @@ workflow GENOMENOTE { ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() + ch_multiqc_logo.toList(), + [], + [] ) multiqc_report = MULTIQC.out.report.toList()
Process Name \\", + " \\ Software Version
CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
yaml5.4.1
TOOL1tool10.11.9
TOOL2tool21.9
WorkflowNextflow