diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c605ae0..bdce0f5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,4 +94,4 @@ jobs: # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --steps ALL diff --git a/assets/github_testing/test.yaml b/assets/github_testing/test.yaml index 63fb8da..fde2e2e 100755 --- a/assets/github_testing/test.yaml +++ b/assets/github_testing/test.yaml @@ -1,13 +1,13 @@ -assembly_path: /home/runner/work/ascc/ascc/asccTinyTest/assembly/Pyoeliiyoelii17XNL_assembly.fa -assembly_title: asccTinyTest -reads_path: /home/runner/work/ascc/ascc/asccTinyTest/pacbio +assembly_path: /home/runner/work/ascc/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa +assembly_title: asccTinyTest_V2 +reads_path: /home/runner/work/ascc/ascc/asccTinyTest_V2/pacbio/ reads_type: "hifi" pacbio_barcodes: /home/runner/work/ascc/ascc/pacbio_barcode/pacbio_adaptors.fa pacbio_multiplexing_barcode_names: "bc2001,bc2009" sci_name: "Plasmodium yoelii yoelii 17XNL" taxid: 352914 -mito_fasta_path: /home/runner/work/ascc/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa -plastid_fasta_path: /home/runner/work/ascc/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa +mito_fasta_path: /home/runner/work/ascc/ascc/asccTinyTest_V2/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa +plastid_fasta_path: /home/runner/work/ascc/ascc/asccTinyTest_V2/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa kmer_len: 7 dimensionality_reduction_methods: "pca,random_trees" # all available methods diff --git a/assets/test.yaml b/assets/test.yaml index 5f6c6cd..122db32 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,13 +1,13 @@ -assembly_path: /lustre/scratch124/tol/projects/tol/data/insects/Polyommatus_atlantica/assembly/draft/treeval/ilPolAtla1_merged/raw/ref.fa -assembly_title: asccTinyTest -reads_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/pacbio/ +assembly_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa +assembly_title: asccTinyTest_V2 +reads_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/pacbio/ reads_type: "hifi" pacbio_barcodes: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/assets/pacbio_adaptors.fa pacbio_multiplexing_barcode_names: "bc2008,bc2009" sci_name: "Plasmodium yoelii yoelii 17XNL" taxid: 352914 -mito_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa -plastid_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa +mito_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa +plastid_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa kmer_len: 7 dimensionality_reduction_methods: "pca,random_trees" # all available methods @@ -20,7 +20,7 @@ ncbi_taxonomy_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdu ncbi_rankedlineage_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdump/rankedlineage.dmp busco_lineages_folder: /lustre/scratch123/tol/resources/busco/data/v5/2021-08-27/lineages fcs_gx_database_path: /lustre/scratch124/tol/projects/asg/sub_projects/ncbi_decon/0.4.0/gxdb -vecscreen_database_path: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/vecscreen_database/ +vecscreen_database_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/vecscreen/ diamond_uniprot_database_path: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/uniprot/uniprot_reference_proteomes_with_taxonnames.dmnd diamond_nr_database_path: /lustre/scratch123/tol/resources/nr/latest/nr.dmnd seqkit: diff --git a/subworkflows/local/se_mapping.nf b/subworkflows/local/se_mapping.nf index 1c3f69c..7ce3657 100644 --- a/subworkflows/local/se_mapping.nf +++ b/subworkflows/local/se_mapping.nf @@ -72,7 +72,6 @@ workflow SE_MAPPING { se_input.bool_cigar_bam ) ch_bams = MINIMAP2_ALIGN_SE.out.bam - ch_bams .map { meta, file -> @@ -110,7 +109,7 @@ process GrabFiles { tuple val(meta), path("in") output: - tuple val(meta), path("in/*.{fa,fasta}.{gz}") + tuple val(meta), path("in/*.{fa,fasta,fna}.{gz}") "true" } \ No newline at end of file diff --git a/workflows/ascc.nf b/workflows/ascc.nf index 66747c4..a80033d 100644 --- a/workflows/ascc.nf +++ b/workflows/ascc.nf @@ -64,6 +64,9 @@ workflow ASCC { main: ch_versions = Channel.empty() + ch_out_merge = Channel.empty() + + workflow_steps = params.steps.split(",") input_ch = Channel.fromPath(params.input, checkIfExists: true) @@ -81,6 +84,7 @@ workflow ASCC { GC_CONTENT ( YAML_INPUT.out.reference_tuple ) + ch_out_merge = ch_out_merge.mix(GC_CONTENT.out.txt) ch_versions = ch_versions.mix(GC_CONTENT.out.versions) // @@ -96,32 +100,37 @@ workflow ASCC { // SUBWORKFLOW: COUNT KMERS, THEN REDUCE DIMENSIONS USING SELECTED METHODS // - GENERATE_GENOME.out.reference_tuple - .map { meta, file -> - tuple ( - meta, - file, - file.countFasta() * 3 - ) - } - .set {autoencoder_epochs_count} - - GET_KMERS_PROFILE ( - GENERATE_GENOME.out.reference_tuple, - YAML_INPUT.out.kmer_len, - YAML_INPUT.out.dimensionality_reduction_methods, - YAML_INPUT.out.n_neighbours, - autoencoder_epochs_count.map{it -> it[2]} - ) - ch_versions = ch_versions.mix(GET_KMERS_PROFILE.out.versions) + if ( workflow_steps.contains('kmers') || workflow_steps.contains('ALL')) { + + GENERATE_GENOME.out.reference_tuple + .map { meta, file -> + tuple ( + meta, + file, + file.countFasta() * 3 + ) + } + .set {autoencoder_epochs_count} + + GET_KMERS_PROFILE ( + GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.kmer_len, + YAML_INPUT.out.dimensionality_reduction_methods, + YAML_INPUT.out.n_neighbours, + autoencoder_epochs_count.map{it -> it[2]} + ) + ch_versions = ch_versions.mix(GET_KMERS_PROFILE.out.versions) + } // // SUBWORKFLOW: EXTRACT RESULTS HITS FROM TIARA // - EXTRACT_TIARA_HITS ( - GENERATE_GENOME.out.reference_tuple - ) - ch_versions = ch_versions.mix(EXTRACT_TIARA_HITS.out.versions) + if ( workflow_steps.contains('tiara') ) { + EXTRACT_TIARA_HITS ( + GENERATE_GENOME.out.reference_tuple + ) + ch_versions = ch_versions.mix(EXTRACT_TIARA_HITS.out.versions) + } // // LOGIC: INJECT SLIDING WINDOW VALUES INTO REFERENCE @@ -141,105 +150,136 @@ workflow ASCC { // // SUBWORKFLOW: EXTRACT RESULTS HITS FROM NT-BLAST // - EXTRACT_NT_BLAST ( - modified_input, - YAML_INPUT.out.nt_database, - YAML_INPUT.out.ncbi_accessions, - YAML_INPUT.out.ncbi_rankedlineage_path - ) - ch_versions = ch_versions.mix(EXTRACT_NT_BLAST.out.versions) - - // - // LOGIC: CHECK WHETHER THERE IS A MITO AND BRANCH - // - YAML_INPUT.out.mito_tuple - .branch { meta, check -> - valid: check != "NO MITO" - invalid: check == "NO MITO" - } - .set { mito_check } + if ( workflow_steps.contains('nt_blast') || workflow_steps.contains('ALL') ) { + EXTRACT_NT_BLAST ( + modified_input, + YAML_INPUT.out.nt_database, + YAML_INPUT.out.ncbi_accessions, + YAML_INPUT.out.ncbi_rankedlineage_path + ) + ch_versions = ch_versions.mix(EXTRACT_NT_BLAST.out.versions) + } + if ( workflow_steps.contains('mito') || workflow_steps.contains('ALL') ) { + // + // LOGIC: CHECK WHETHER THERE IS A MITO AND BRANCH + // + YAML_INPUT.out.mito_tuple + .branch { meta, check -> + valid: check != "NO MITO" + invalid: check == "NO MITO" + } + .set { mito_check } + + + // + // SUBWORKFLOW: BLASTING FOR MITO ASSEMBLIES IN GENOME + // + MITO_ORGANELLAR_BLAST ( + YAML_INPUT.out.reference_tuple, + YAML_INPUT.out.mito_var, + mito_check.valid + ) + ch_versions = ch_versions.mix(MITO_ORGANELLAR_BLAST.out.versions) + } - // - // SUBWORKFLOW: BLASTING FOR MITO ASSEMBLIES IN GENOME - // - MITO_ORGANELLAR_BLAST ( - YAML_INPUT.out.reference_tuple, - YAML_INPUT.out.mito_var, - mito_check.valid - ) - ch_versions = ch_versions.mix(MITO_ORGANELLAR_BLAST.out.versions) + if ( workflow_steps.contains('chloro') || workflow_steps.contains('ALL') ) { + + // + // LOGIC: CHECK WHETHER THERE IS A PLASTID AND BRANCH + // + YAML_INPUT.out.plastid_tuple + .branch { meta, check -> + valid: check != "NO PLASTID" + invalid: check == "NO PLASTID" + } + .set { plastid_check } + + // + // SUBWORKFLOW: BLASTING FOR PLASTID ASSEMBLIES IN GENOME + // + PLASTID_ORGANELLAR_BLAST ( + YAML_INPUT.out.reference_tuple, + YAML_INPUT.out.plastid_var, + plastid_check.valid + ) + ch_versions = ch_versions.mix(PLASTID_ORGANELLAR_BLAST.out.versions) + } - // - // LOGIC: CHECK WHETHER THERE IS A PLASTID AND BRANCH - // - YAML_INPUT.out.plastid_tuple - .branch { meta, check -> - valid: check != "NO PLASTID" - invalid: check == "NO PLASTID" - } - .set { plastid_check } - - // - // SUBWORKFLOW: BLASTING FOR PLASTID ASSEMBLIES IN GENOME - // - PLASTID_ORGANELLAR_BLAST ( - YAML_INPUT.out.reference_tuple, - YAML_INPUT.out.plastid_var, - plastid_check.valid - ) - ch_versions = ch_versions.mix(PLASTID_ORGANELLAR_BLAST.out.versions) // // SUBWORKFLOW: // - RUN_FCSADAPTOR ( - YAML_INPUT.out.reference_tuple - ) - ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions) - + if ( workflow_steps.contains('fcs_adapt') || workflow_steps.contains('ALL') ) { + RUN_FCSADAPTOR ( + YAML_INPUT.out.reference_tuple + ) + ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions) + } // // SUBWORKFLOW: // - RUN_FCSGX ( - YAML_INPUT.out.reference_tuple, - YAML_INPUT.out.fcs_gx_database_path, - YAML_INPUT.out.taxid, - YAML_INPUT.out.ncbi_rankedlineage_path - ) - ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions) + if ( workflow_steps.contains('fcsgx') || workflow_steps.contains('ALL') ) { + RUN_FCSGX ( + YAML_INPUT.out.reference_tuple, + YAML_INPUT.out.fcs_gx_database_path, + YAML_INPUT.out.taxid, + YAML_INPUT.out.ncbi_rankedlineage_path + ) + ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions) + } // // SUBWORKFLOW: IDENTITY PACBIO BARCODES IN INPUT DATA // - PACBIO_BARCODE_CHECK ( - YAML_INPUT.out.reference_tuple, - YAML_INPUT.out.pacbio_tuple, - YAML_INPUT.out.pacbio_barcodes, - YAML_INPUT.out.pacbio_multiplex_codes - ) - ch_versions = ch_versions.mix(PACBIO_BARCODE_CHECK.out.versions) + if ( workflow_steps.contains('barcodes') || workflow_steps.contains('ALL') ) { + PACBIO_BARCODE_CHECK ( + YAML_INPUT.out.reference_tuple, + YAML_INPUT.out.pacbio_tuple, + YAML_INPUT.out.pacbio_barcodes, + YAML_INPUT.out.pacbio_multiplex_codes + ) + ch_versions = ch_versions.mix(PACBIO_BARCODE_CHECK.out.versions) + } // // SUBWORKFLOW: CALCULATE AVERAGE READ COVERAGE // - RUN_READ_COVERAGE ( - YAML_INPUT.out.reference_tuple, - YAML_INPUT.out.assembly_path, - YAML_INPUT.out.pacbio_tuple, - YAML_INPUT.out.reads_type - ) - ch_versions = ch_versions.mix(RUN_READ_COVERAGE.out.versions) - + if ( workflow_steps.contains('coverage') || workflow_steps.contains('ALL') ) { + RUN_READ_COVERAGE ( + YAML_INPUT.out.reference_tuple, + YAML_INPUT.out.assembly_path, + YAML_INPUT.out.pacbio_tuple, + YAML_INPUT.out.reads_type + ) + ch_versions = ch_versions.mix(RUN_READ_COVERAGE.out.versions) + } // // SUBWORKFLOW: COLLECT SOFTWARE VERSIONS // - RUN_VECSCREEN ( - GENERATE_GENOME.out.reference_tuple, - YAML_INPUT.out.vecscreen_database_path - ) - ch_versions = ch_versions.mix(RUN_VECSCREEN.out.versions) + if ( workflow_steps.contains('vecscreen') || workflow_steps.contains('ALL') ) { + RUN_VECSCREEN ( + GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.vecscreen_database_path + ) + ch_versions = ch_versions.mix(RUN_VECSCREEN.out.versions) + } + + // + // SUBWORKFLOW: Run the kraken classifier + // + if ( workflow_steps.contains('kraken') || workflow_steps.contains('ALL') ) { + RUN_NT_KRAKEN( + GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.nt_kraken_db_path, + YAML_INPUT.out.ncbi_rankedlineage_path + ) + } + + // mix the outputs of the outpuutting process so that we can + // insert them into the one process to create the btk and the merged report + // much like the versions channel // // SUBWORKFLOW: Collates version data from prior subworflows @@ -249,8 +289,6 @@ workflow ASCC { ) emit: - - software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml versions_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions }