Skip to content

Commit

Permalink
Merge pull request #35 from sanger-tol/dp24_organellar
Browse files Browse the repository at this point in the history
Dp24 organellar
  • Loading branch information
yumisims authored Nov 6, 2023
2 parents 35cda05 + 1f3d386 commit 5196d4d
Show file tree
Hide file tree
Showing 98 changed files with 1,394 additions and 206 deletions.
2 changes: 2 additions & 0 deletions assets/github_testing/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ plastid_fasta_path: /home/runner/work/ascc/ascc/asccTinyTest/organellar/Pyoeliiy
kmer_len: 7
## Below this point will need updating as more subworkflows are built
nt_database: /home/runner/work/ascc/ascc/NT_database/
nt_database_prefix: 18S_fungal_sequences
nt_kraken_db_path: /home/runner/work/ascc/ascc/kraken2/kraken2
ncbi_accessionids_folder: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/ncbi_taxonomy/20230509_accession2taxid/
ncbi_taxonomy_path: /home/runner/work/ascc/ascc/ncbi_taxdump/
ncbi_rankedlineage_path: /home/runner/work/ascc/ascc/ncbi_taxdump/rankedlineage.dmp
busco_lineages_folder: /home/runner/work/ascc/ascc/busco_database/lineages
Expand Down
20 changes: 11 additions & 9 deletions assets/test.yaml
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
assembly_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/assembly/Pyoeliiyoelii17XNL_assembly.fa
assembly_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/ncbi_dataset/data/reheadered.fna
assembly_title: asccTinyTest
pacbio_barcodes: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/assets/pacbio_adaptors.fa
pacbio_multiplexing_barcode_names: "bc1008,bc1009"
pacbio_reads_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/pacbio/
pacbio_reads_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest/pacbio/
sci_name: "Plasmodium yoelii yoelii 17XNL"
taxid: 352914
mito_fasta_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa
plastid_fasta_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa
mito_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa
plastid_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa
kmer_len: 7
nt_database: /data/blastdb/Supported/NT/current/
nt_database: /data/blastdb/Supported/NT/202308/dbv4/
nt_database_prefix: nt
nt_kraken_db_path: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/nt/nt
ncbi_accessionids_folder: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/ncbi_taxonomy/20230509_accession2taxid/
ncbi_taxonomy_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdump/
ncbi_rankedlineage_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdump/rankedlineage.dmp
busco_lineages_folder: /lustre/scratch123/tol/resources/busco/data/v5/2021-03-14/lineages
busco_lineages_folder: /lustre/scratch123/tol/resources/busco/data/v5/2021-08-27/lineages
fcs_gx_database_path: /lustre/scratch124/tol/projects/asg/sub_projects/ncbi_decon/0.4.0/gxdb
vecscreen_database_path: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/vecscreen_database/adaptors_for_screening_euks.fa
diamond_uniprot_database_path: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/uniprot/uniprot_reference_proteomes_with_taxonnames.dmnd
diamond_nr_database_path: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/proteins2/nr.dmnd
diamond_nr_database_path: /lustre/scratch123/tol/resources/nr/latest/nr.dmnd
seqkit:
sliding: 6000
window: 100000
sliding: 100000
window: 6000
1 change: 1 addition & 0 deletions bin/filter_barcode_blast_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def main():
n_regions_for_record[record.id].append([n_instance.start(0) + 1, n_instance.end(0)])

fasta_input_handle.close()

with open(args.blast, "r") as blast_input_handle:
for line in blast_input_handle:
if not re.search("^#", line):
Expand Down
13 changes: 11 additions & 2 deletions bin/organelle_contamination_recommendation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,27 @@

import csv
import re
import sys
import argparse


def main():
parser = argparse.ArgumentParser(description="Generate text for ENA submission")
parser.add_argument("--input", type=str, help="Input BED", default=None)
parser.add_argument("--input", type=str, help="List of input files (space seperated list)", default=None)
parser.add_argument("--output", type=str, help="Output recommendation", default=None)
parser.add_argument("-v", action="version", version="1.0")
args = parser.parse_args()

# Filters through the list of files that nextflow passes in and ID's the one we want
files_list = args.input
file_for_use = next(
(file for file in files_list.split() if "assembly.ALL.unfiltered_scaffold_coverage.bed" in file), None
)
if file_for_use is None:
sys.exit(1)

recommendation_handle = open(args.output, "w")
with open(args.input) as bed_handle:
with open(file_for_use) as bed_handle:
bed_csv_reader = csv.reader(bed_handle, delimiter="\t")
for field_set in bed_csv_reader:
scaffold = field_set[0]
Expand Down
1 change: 1 addition & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ process {
time = { check_max( 4.h * task.attempt, 'time' ) }

withName: BLAST_BLASTN {
memory = { check_max( 50.GB * task.attempt, 'memory' ) }
time = { check_max( 12.h * task.attempt, 'time' ) }
}

Expand Down
27 changes: 21 additions & 6 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,41 @@ process {
]

withName: SEQKIT_SLIDING {
ext.args = {"-s ${meta.sliding} -W ${meta.window} "}
ext.args = {"-s ${meta.sliding} -W ${meta.window} "}
}

withName: BLAST_CHUNK_TO_FULL {
ext.args = 'nucleotide'
ext.args = 'nucleotide'
}

withName: BLAST_MAKEBLASTDB {
ext.args = { "-dbtype nucl" }
}

withName: BLAST_BLASTN {
ext.args = { "-outfmt '6 qseqid staxids bitscore std' -max_target_seqs 10 -max_hsps 1 -evalue 1e-25 -dust yes -lcase_masking" }
withName: '.*:.*:BLAST_BLASTN' {
ext.args = { '-outfmt "6 qseqid staxids bitscore std" -max_target_seqs 10 -max_hsps 1 -evalue 1e-25 -dust yes -lcase_masking' }
ext.dbprefix = '*'
}

withName: '.*:EXTRACT_NT_BLAST:BLAST_BLASTN_MOD' {
ext.args = { '-outfmt "6 qseqid staxids bitscore std" -max_target_seqs 10 -max_hsps 1 -evalue 1e-25 -dust yes -lcase_masking' }
ext.dbprefix = { "${meta2.id}" }
}

withName: '.*:.*:(PLASTID_ORGANELLAR_BLAST|MITO_ORGANELLAR_BLAST):BLAST_BLASTN' {
ext.args = { "-task megablast -word_size 28 -best_hit_overhang 0.1 -best_hit_score_edge 0.1 -dust yes -evalue 0.0001 -perc_identity 80 -soft_masking true -outfmt 7" }
}

withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
publishDir = [
path: { "${params.outdir}/pipeline_info" },
mode: params.publish_dir_mode,
pattern: '*_versions.yml'
]
}

withName: KRAKEN2_KRAKEN2 {
ext.args = { "--report-zero-counts --use-names --memory-mapping" }
ext.args = { "--report-zero-counts --use-names --memory-mapping" }
}

withName: FCS_FCSADAPTOR_PROK {
Expand All @@ -56,6 +66,11 @@ process {
ext.prefix = { "${meta.id}_euk" }
}

withName: SED_SED {
ext.prefix = { "${meta.id}_fixed" }
ext.args = " -e '/>/s/ //g' "
}

withName: '.*:.*:GENERATE_GENOME:GNU_SORT' {
ext.prefix = { "${meta.id}_sorted"}
ext.args = { '-k2,2 -nr' }
Expand Down
45 changes: 23 additions & 22 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,99 +7,100 @@
"nf-core": {
"blast/blastn": {
"branch": "master",
"git_sha": "1728df69ccbaf4ccb1027c6fd4e9191f48c22194",
"installed_by": ["modules"]
"git_sha": "acacb4075ef46fa74630aa3f4b0684f1021d5930",
"installed_by": ["modules"],
"patch": "modules/nf-core/blast/blastn/blast-blastn.diff"
},
"blast/makeblastdb": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"custom/dumpsoftwareversions": {
"branch": "master",
"git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"custom/getchromsizes": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"],
"patch": "modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff"
},
"diamond/blastx": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"fastqc": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"fcs/fcsadaptor": {
"branch": "master",
"git_sha": "5a35af8b60d45425c4b9193e567d16b614d93dbe",
"git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
"installed_by": ["modules"]
},
"gnu/sort": {
"fcs/fcsgx": {
"branch": "master",
"git_sha": "88f6e982fb8bd40488d837b3b08a65008e602840",
"git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc",
"installed_by": ["modules"]
},
"fcs/fcsgx": {
"gnu/sort": {
"branch": "master",
"git_sha": "8c4542e5d421c4690cf1fa6ec729e9304763fdaf",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"kraken2/kraken2": {
"branch": "master",
"git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"],
"patch": "modules/nf-core/kraken2/kraken2/kraken2-kraken2.diff"
},
"minimap2/align": {
"branch": "master",
"git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"minimap2/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"multiqc": {
"branch": "master",
"git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"samtools/depth": {
"branch": "master",
"git_sha": "a1ffbc1fd87bd5a829e956cc26ec9cc53af3e817",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"samtools/faidx": {
"branch": "master",
"git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"samtools/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"samtools/sort": {
"branch": "master",
"git_sha": "a0f7be95788366c1923171e358da7d049eb440f9",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"seqkit/sliding": {
"branch": "master",
"git_sha": "0aa251c4ce7318c86b9868d8cb8dd6dd5d7da849",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
},
"tiara/tiara": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
"installed_by": ["modules"]
}
}
Expand Down
6 changes: 3 additions & 3 deletions modules/local/blast_get_top_hits.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process BLAST_GET_TOP_HITS {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9"
conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"

input:
tuple val(meta), path(outfmt6)
Expand Down
50 changes: 50 additions & 0 deletions modules/local/blast_v5_database.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
process BLAST_V5_DATABASE {
tag "$meta.id"
label 'process_medium'

conda "bioconda::blast=2.14.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/blast:2.14.1--pl5321h6f7f691_0':
'biocontainers/blast:2.14.1--pl5321h6f7f691_0' }"

input:
tuple val(meta), path(fasta)
path db
val db_prefix

output:
tuple val(meta), path('*.txt'), emit: txt
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
blastn \\
-num_threads $task.cpus \\
-db ${db}/${db_prefix} \\
-query $fasta \\
$args \\
-out ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
blast: \$(blastn -version 2>&1 | sed 's/^.*blastn: //; s/ .*\$//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
blast: \$(blastn -version 2>&1 | sed 's/^.*blastn: //; s/ .*\$//')
END_VERSIONS
"""
}
6 changes: 3 additions & 3 deletions modules/local/extract_contaminants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process EXTRACT_CONTAMINANTS {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9"
conda "conda-forge::python=3.9 conda-forge::biopython=1.78"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"
'https://depot.galaxyproject.org/singularity/biopython:1.78' :
'biocontainers/biopython:1.78' }"

input:
tuple val(meta), path(fasta)
Expand Down
43 changes: 43 additions & 0 deletions modules/local/filter_comments.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
process FILTER_COMMENTS {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::coreutils=9.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'docker.io/ubuntu:20.04' }"

input:
tuple val(meta), path(busco)

output:
tuple val(meta), path( "*txt" ) , emit: txt
path "versions.yml" , emit: versions

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
cat ${busco} | awk '\$4>=200' | grep -v '#' > ${prefix}_filtered_busco.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
ubuntu: \$(ubuntu --version | sed 's/Ubuntu //g')
coreutils: $VERSION
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
touch ${prefix}.fa
cat <<-END_VERSIONS > ${prefix}_filtered_busco.txt
"${task.process}":
ubuntu: \$(ubuntu --version | sed 's/Ubuntu //g')
coreutils: $VERSION
END_VERSIONS
"""
}
Loading

0 comments on commit 5196d4d

Please sign in to comment.