Skip to content

Commit

Permalink
Updating test files, test data, formatting and logic change
Browse files Browse the repository at this point in the history
  • Loading branch information
DLBPointon committed Apr 4, 2024
1 parent e6550bf commit 4e8d53d
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 115 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,4 @@ jobs:
# For example: adding multiple test runs with different parameters
# Remember that you can parallelise this by using strategy.matrix
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results --steps ALL
10 changes: 5 additions & 5 deletions assets/github_testing/test.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
assembly_path: /home/runner/work/ascc/ascc/asccTinyTest/assembly/Pyoeliiyoelii17XNL_assembly.fa
assembly_title: asccTinyTest
reads_path: /home/runner/work/ascc/ascc/asccTinyTest/pacbio
assembly_path: /home/runner/work/ascc/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa
assembly_title: asccTinyTest_V2
reads_path: /home/runner/work/ascc/ascc/asccTinyTest_V2/pacbio/
reads_type: "hifi"
pacbio_barcodes: /home/runner/work/ascc/ascc/pacbio_barcode/pacbio_adaptors.fa
pacbio_multiplexing_barcode_names: "bc2001,bc2009"
sci_name: "Plasmodium yoelii yoelii 17XNL"
taxid: 352914
mito_fasta_path: /home/runner/work/ascc/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa
plastid_fasta_path: /home/runner/work/ascc/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa
mito_fasta_path: /home/runner/work/ascc/ascc/asccTinyTest_V2/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa
plastid_fasta_path: /home/runner/work/ascc/ascc/asccTinyTest_V2/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa
kmer_len: 7
dimensionality_reduction_methods: "pca,random_trees"
# all available methods
Expand Down
12 changes: 6 additions & 6 deletions assets/test.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
assembly_path: /lustre/scratch124/tol/projects/tol/data/insects/Polyommatus_atlantica/assembly/draft/treeval/ilPolAtla1_merged/raw/ref.fa
assembly_title: asccTinyTest
reads_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/pacbio/
assembly_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa
assembly_title: asccTinyTest_V2
reads_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/pacbio/
reads_type: "hifi"
pacbio_barcodes: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/assets/pacbio_adaptors.fa
pacbio_multiplexing_barcode_names: "bc2008,bc2009"
sci_name: "Plasmodium yoelii yoelii 17XNL"
taxid: 352914
mito_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa
plastid_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa
mito_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa
plastid_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa
kmer_len: 7
dimensionality_reduction_methods: "pca,random_trees"
# all available methods
Expand All @@ -20,7 +20,7 @@ ncbi_taxonomy_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdu
ncbi_rankedlineage_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdump/rankedlineage.dmp
busco_lineages_folder: /lustre/scratch123/tol/resources/busco/data/v5/2021-08-27/lineages
fcs_gx_database_path: /lustre/scratch124/tol/projects/asg/sub_projects/ncbi_decon/0.4.0/gxdb
vecscreen_database_path: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/vecscreen_database/
vecscreen_database_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/vecscreen/
diamond_uniprot_database_path: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/uniprot/uniprot_reference_proteomes_with_taxonnames.dmnd
diamond_nr_database_path: /lustre/scratch123/tol/resources/nr/latest/nr.dmnd
seqkit:
Expand Down
3 changes: 1 addition & 2 deletions subworkflows/local/se_mapping.nf
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ workflow SE_MAPPING {
se_input.bool_cigar_bam
)
ch_bams = MINIMAP2_ALIGN_SE.out.bam


ch_bams
.map { meta, file ->
Expand Down Expand Up @@ -110,7 +109,7 @@ process GrabFiles {
tuple val(meta), path("in")

output:
tuple val(meta), path("in/*.{fa,fasta}.{gz}")
tuple val(meta), path("in/*.{fa,fasta,fna}.{gz}")

"true"
}
240 changes: 139 additions & 101 deletions workflows/ascc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ workflow ASCC {

main:
ch_versions = Channel.empty()
ch_out_merge = Channel.empty()

workflow_steps = params.steps.split(",")

input_ch = Channel.fromPath(params.input, checkIfExists: true)

Expand All @@ -81,6 +84,7 @@ workflow ASCC {
GC_CONTENT (
YAML_INPUT.out.reference_tuple
)
ch_out_merge = ch_out_merge.mix(GC_CONTENT.out.txt)
ch_versions = ch_versions.mix(GC_CONTENT.out.versions)

//
Expand All @@ -96,32 +100,37 @@ workflow ASCC {
// SUBWORKFLOW: COUNT KMERS, THEN REDUCE DIMENSIONS USING SELECTED METHODS
//

GENERATE_GENOME.out.reference_tuple
.map { meta, file ->
tuple (
meta,
file,
file.countFasta() * 3
)
}
.set {autoencoder_epochs_count}

GET_KMERS_PROFILE (
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.kmer_len,
YAML_INPUT.out.dimensionality_reduction_methods,
YAML_INPUT.out.n_neighbours,
autoencoder_epochs_count.map{it -> it[2]}
)
ch_versions = ch_versions.mix(GET_KMERS_PROFILE.out.versions)
if ( workflow_steps.contains('kmers') || workflow_steps.contains('ALL')) {

GENERATE_GENOME.out.reference_tuple
.map { meta, file ->
tuple (
meta,
file,
file.countFasta() * 3
)
}
.set {autoencoder_epochs_count}

GET_KMERS_PROFILE (
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.kmer_len,
YAML_INPUT.out.dimensionality_reduction_methods,
YAML_INPUT.out.n_neighbours,
autoencoder_epochs_count.map{it -> it[2]}
)
ch_versions = ch_versions.mix(GET_KMERS_PROFILE.out.versions)
}

//
// SUBWORKFLOW: EXTRACT RESULTS HITS FROM TIARA
//
EXTRACT_TIARA_HITS (
GENERATE_GENOME.out.reference_tuple
)
ch_versions = ch_versions.mix(EXTRACT_TIARA_HITS.out.versions)
if ( workflow_steps.contains('tiara') ) {
EXTRACT_TIARA_HITS (
GENERATE_GENOME.out.reference_tuple
)
ch_versions = ch_versions.mix(EXTRACT_TIARA_HITS.out.versions)
}

//
// LOGIC: INJECT SLIDING WINDOW VALUES INTO REFERENCE
Expand All @@ -141,105 +150,136 @@ workflow ASCC {
//
// SUBWORKFLOW: EXTRACT RESULTS HITS FROM NT-BLAST
//
EXTRACT_NT_BLAST (
modified_input,
YAML_INPUT.out.nt_database,
YAML_INPUT.out.ncbi_accessions,
YAML_INPUT.out.ncbi_rankedlineage_path
)
ch_versions = ch_versions.mix(EXTRACT_NT_BLAST.out.versions)

//
// LOGIC: CHECK WHETHER THERE IS A MITO AND BRANCH
//
YAML_INPUT.out.mito_tuple
.branch { meta, check ->
valid: check != "NO MITO"
invalid: check == "NO MITO"
}
.set { mito_check }
if ( workflow_steps.contains('nt_blast') || workflow_steps.contains('ALL') ) {
EXTRACT_NT_BLAST (
modified_input,
YAML_INPUT.out.nt_database,
YAML_INPUT.out.ncbi_accessions,
YAML_INPUT.out.ncbi_rankedlineage_path
)
ch_versions = ch_versions.mix(EXTRACT_NT_BLAST.out.versions)
}

if ( workflow_steps.contains('mito') || workflow_steps.contains('ALL') ) {
//
// LOGIC: CHECK WHETHER THERE IS A MITO AND BRANCH
//
YAML_INPUT.out.mito_tuple
.branch { meta, check ->
valid: check != "NO MITO"
invalid: check == "NO MITO"
}
.set { mito_check }


//
// SUBWORKFLOW: BLASTING FOR MITO ASSEMBLIES IN GENOME
//
MITO_ORGANELLAR_BLAST (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.mito_var,
mito_check.valid
)
ch_versions = ch_versions.mix(MITO_ORGANELLAR_BLAST.out.versions)
}

//
// SUBWORKFLOW: BLASTING FOR MITO ASSEMBLIES IN GENOME
//
MITO_ORGANELLAR_BLAST (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.mito_var,
mito_check.valid
)
ch_versions = ch_versions.mix(MITO_ORGANELLAR_BLAST.out.versions)
if ( workflow_steps.contains('chloro') || workflow_steps.contains('ALL') ) {

//
// LOGIC: CHECK WHETHER THERE IS A PLASTID AND BRANCH
//
YAML_INPUT.out.plastid_tuple
.branch { meta, check ->
valid: check != "NO PLASTID"
invalid: check == "NO PLASTID"
}
.set { plastid_check }

//
// SUBWORKFLOW: BLASTING FOR PLASTID ASSEMBLIES IN GENOME
//
PLASTID_ORGANELLAR_BLAST (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.plastid_var,
plastid_check.valid
)
ch_versions = ch_versions.mix(PLASTID_ORGANELLAR_BLAST.out.versions)
}

//
// LOGIC: CHECK WHETHER THERE IS A PLASTID AND BRANCH
//
YAML_INPUT.out.plastid_tuple
.branch { meta, check ->
valid: check != "NO PLASTID"
invalid: check == "NO PLASTID"
}
.set { plastid_check }

//
// SUBWORKFLOW: BLASTING FOR PLASTID ASSEMBLIES IN GENOME
//
PLASTID_ORGANELLAR_BLAST (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.plastid_var,
plastid_check.valid
)
ch_versions = ch_versions.mix(PLASTID_ORGANELLAR_BLAST.out.versions)

//
// SUBWORKFLOW:
//
RUN_FCSADAPTOR (
YAML_INPUT.out.reference_tuple
)
ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions)

if ( workflow_steps.contains('fcs_adapt') || workflow_steps.contains('ALL') ) {
RUN_FCSADAPTOR (
YAML_INPUT.out.reference_tuple
)
ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions)
}
//
// SUBWORKFLOW:
//
RUN_FCSGX (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.fcs_gx_database_path,
YAML_INPUT.out.taxid,
YAML_INPUT.out.ncbi_rankedlineage_path
)
ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions)
if ( workflow_steps.contains('fcsgx') || workflow_steps.contains('ALL') ) {
RUN_FCSGX (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.fcs_gx_database_path,
YAML_INPUT.out.taxid,
YAML_INPUT.out.ncbi_rankedlineage_path
)
ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions)
}

//
// SUBWORKFLOW: IDENTITY PACBIO BARCODES IN INPUT DATA
//
PACBIO_BARCODE_CHECK (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.pacbio_tuple,
YAML_INPUT.out.pacbio_barcodes,
YAML_INPUT.out.pacbio_multiplex_codes
)
ch_versions = ch_versions.mix(PACBIO_BARCODE_CHECK.out.versions)
if ( workflow_steps.contains('barcodes') || workflow_steps.contains('ALL') ) {
PACBIO_BARCODE_CHECK (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.pacbio_tuple,
YAML_INPUT.out.pacbio_barcodes,
YAML_INPUT.out.pacbio_multiplex_codes
)
ch_versions = ch_versions.mix(PACBIO_BARCODE_CHECK.out.versions)
}

//
// SUBWORKFLOW: CALCULATE AVERAGE READ COVERAGE
//
RUN_READ_COVERAGE (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.assembly_path,
YAML_INPUT.out.pacbio_tuple,
YAML_INPUT.out.reads_type
)
ch_versions = ch_versions.mix(RUN_READ_COVERAGE.out.versions)

if ( workflow_steps.contains('coverage') || workflow_steps.contains('ALL') ) {
RUN_READ_COVERAGE (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.assembly_path,
YAML_INPUT.out.pacbio_tuple,
YAML_INPUT.out.reads_type
)
ch_versions = ch_versions.mix(RUN_READ_COVERAGE.out.versions)
}

//
// SUBWORKFLOW: COLLECT SOFTWARE VERSIONS
//
RUN_VECSCREEN (
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.vecscreen_database_path
)
ch_versions = ch_versions.mix(RUN_VECSCREEN.out.versions)
if ( workflow_steps.contains('vecscreen') || workflow_steps.contains('ALL') ) {
RUN_VECSCREEN (
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.vecscreen_database_path
)
ch_versions = ch_versions.mix(RUN_VECSCREEN.out.versions)
}

//
// SUBWORKFLOW: Run the kraken classifier
//
if ( workflow_steps.contains('kraken') || workflow_steps.contains('ALL') ) {
RUN_NT_KRAKEN(
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.nt_kraken_db_path,
YAML_INPUT.out.ncbi_rankedlineage_path
)
}

// mix the outputs of the outpuutting process so that we can
// insert them into the one process to create the btk and the merged report
// much like the versions channel

//
// SUBWORKFLOW: Collates version data from prior subworflows
Expand All @@ -249,8 +289,6 @@ workflow ASCC {
)

emit:


software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml
versions_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions
}
Expand Down

0 comments on commit 4e8d53d

Please sign in to comment.