Skip to content

Commit

Permalink
Merge pull request #28 from sanger-tol/dp24_gc_content
Browse files Browse the repository at this point in the history
Dp24 gc content
  • Loading branch information
DLBPointon authored Oct 10, 2023
2 parents 447c12f + 5e01a76 commit 5d5f200
Show file tree
Hide file tree
Showing 34 changed files with 661 additions and 92 deletions.
Empty file modified bin/BedTools.py
100644 → 100755
Empty file.
Empty file modified bin/extract_contaminants_by_type.py
100644 → 100755
Empty file.
Empty file modified bin/filter_barcode_blast_results.py
100644 → 100755
Empty file.
33 changes: 33 additions & 0 deletions bin/gc_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python3
"""
Script for finding the GC content of each sequence in a multiFASTA file
Written by Eerik Aunin @eeaunin
Adapted by Damon-Lee Pointon @DLBPointon
"""

import argparse
import general_purpose_functions as gpf


def main(fasta_path):
fasta_data = gpf.read_fasta_in_chunks(fasta_path)
for header, seq in fasta_data:
header = header.split()[0]
seq = seq.upper()
gc_content = None
gc_count = seq.count("G") + seq.count("C")
seq_len = len(seq)
if seq_len > 0:
gc_content = gc_count / seq_len
gc_content_string = "{:.6f}".format(gc_content)
print("{}\t{}".format(header, gc_content_string))


if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("fasta_path", type=str, help="Path to input FASTA file")
parser.add_argument("-v", action="version", version="1.0")
args = parser.parse_args()
main(args.fasta_path)
Empty file modified bin/organelle_contamination_recommendation.py
100644 → 100755
Empty file.
Empty file modified bin/pacbio_barcode_check.py
100644 → 100755
Empty file.
20 changes: 3 additions & 17 deletions bin/reformat_blast_outfmt6.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,6 @@
in_data = gpf.ll(in_path)

for line in in_data:
split_line = line.split()
assert len(split_line) == 14
output_line = "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}".format(
split_line[0],
split_line[4],
split_line[5],
split_line[6],
split_line[7],
split_line[8],
split_line[9],
split_line[10],
split_line[11],
split_line[12],
split_line[13],
split_line[2],
)
print(output_line)
s = line.split()
assert len(s) == 14
print("\t".join(s[0:1] + s[4:] + s[2:3]))
Empty file modified bin/reformat_diamond_outfmt6.py
100644 → 100755
Empty file.
5 changes: 5 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,9 @@ process {
ext.prefix = { "${meta.id}_euk" }
}

withName: '.*:.*:GENERATE_GENOME:GNU_SORT' {
ext.prefix = { "${meta.id}_sorted"}
ext.args = { '-k2,2 -nr' }
}

}
7 changes: 7 additions & 0 deletions conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@
----------------------------------------------------------------------------------------
*/
process {
maxForks = 1
}

executor {
queueSize=1
}

params {
config_profile_name = 'Full test profile'
Expand Down
16 changes: 16 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
"git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543",
"installed_by": ["modules"]
},
"custom/getchromsizes": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"],
"patch": "modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff"
},
"diamond/blastx": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
Expand All @@ -35,6 +41,11 @@
"git_sha": "5a35af8b60d45425c4b9193e567d16b614d93dbe",
"installed_by": ["modules"]
},
"gnu/sort": {
"branch": "master",
"git_sha": "88f6e982fb8bd40488d837b3b08a65008e602840",
"installed_by": ["modules"]
},
"fcs/fcsgx": {
"branch": "master",
"git_sha": "8c4542e5d421c4690cf1fa6ec729e9304763fdaf",
Expand Down Expand Up @@ -66,6 +77,11 @@
"git_sha": "a1ffbc1fd87bd5a829e956cc26ec9cc53af3e817",
"installed_by": ["modules"]
},
"samtools/faidx": {
"branch": "master",
"git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe",
"installed_by": ["modules"]
},
"samtools/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
Expand Down
7 changes: 4 additions & 3 deletions modules/local/blast_chunk_to_full.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process BLAST_CHUNK_TO_FULL {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(chunked)
Expand All @@ -22,6 +22,7 @@ process BLAST_CHUNK_TO_FULL {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
blast_hit_chunk_coords_to_full_coords: \$(blast_hit_chunk_coords_to_full_coords.py -v)
END_VERSIONS
"""
Expand Down
8 changes: 5 additions & 3 deletions modules/local/blast_get_top_hits.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process BLAST_GET_TOP_HITS {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(outfmt6)
Expand All @@ -21,6 +21,7 @@ process BLAST_GET_TOP_HITS {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
blast_get_top_hits: \$(blast_get_top_hits.py -v)
END_VERSIONS
"""
Expand All @@ -31,6 +32,7 @@ process BLAST_GET_TOP_HITS {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
reformat_blast_outfmt6: \$(blast_get_top_hits.py -v)
END_VERSIONS
"""
Expand Down
8 changes: 5 additions & 3 deletions modules/local/check_barcode.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process CHECK_BARCODE {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta) , path(barcodes)
Expand All @@ -27,6 +27,7 @@ process CHECK_BARCODE {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
pacbio_barcode_check: \$(pacbio_barcode_check.py -v)
END_VERSIONS
"""
Expand All @@ -37,6 +38,7 @@ process CHECK_BARCODE {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
pacbio_barcode_check: \$(pacbio_barcode_check.py -v)
END_VERSIONS
"""
Expand Down
8 changes: 5 additions & 3 deletions modules/local/extract_contaminants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process EXTRACT_CONTAMINANTS {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(fasta)
Expand All @@ -24,6 +24,7 @@ process EXTRACT_CONTAMINANTS {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
extract_contaminants_by_type: \$(extract_contaminants_by_type.py -v)
END_VERSIONS
"""
Expand All @@ -35,6 +36,7 @@ process EXTRACT_CONTAMINANTS {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
extract_contaminants_by_type: \$(extract_contaminants_by_type.py -v)
END_VERSIONS
"""
Expand Down
8 changes: 5 additions & 3 deletions modules/local/filter_barcode.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process FILTER_BARCODE {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(fasta)
Expand All @@ -28,6 +28,7 @@ process FILTER_BARCODE {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
filter_barcode_blast_results: \$(filter_barcode_blast_results.py -v)
END_VERSIONS
"""
Expand All @@ -41,6 +42,7 @@ process FILTER_BARCODE {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
filter_barcode_blast_results: \$(filter_barcode_blast_results.py -v)
END_VERSIONS
"""
Expand Down
8 changes: 5 additions & 3 deletions modules/local/format_diamond_outfmt6.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process REFORMAT_FULL_OUTFMT6 {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(diamond_blast)
Expand All @@ -21,6 +21,7 @@ process REFORMAT_FULL_OUTFMT6 {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
reformat_diamond_outfmt6: \$(reformat_diamond_outfmt6.py -v)
END_VERSIONS
"""
Expand All @@ -32,6 +33,7 @@ process REFORMAT_FULL_OUTFMT6 {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
reformat_diamond_outfmt6: \$(reformat_diamond_outfmt6.py -v)
END_VERSIONS
"""
Expand Down
39 changes: 39 additions & 0 deletions modules/local/gc_content.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
process GC_CONTENT {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"

input:
tuple val(meta), path(fasta)

output:
tuple val(meta), path( "*-gc.txt" ) , emit: txt
path "versions.yml" , emit: versions

script:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
gc_content.py ${fasta} > ${prefix}-gc.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
gc_content: \$(gc_content.py -v)
END_VERSIONS
"""

stub:
"""
touch full_coords.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
gc_content: \$(gc_content.py -v)
END_VERSIONS
"""
}
40 changes: 40 additions & 0 deletions modules/local/get_largest_scaff.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
process GET_LARGEST_SCAFF {

tag "$meta.id"
label 'process_low'

conda "conda-forge::coreutils=9.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'docker.io/ubuntu:20.04' }"

input:
tuple val( meta ), path( file )

output:
env largest_scaff , emit: scaff_size
path "versions.yml" , emit: versions

shell:
def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
$/
largest_scaff=`head -n 1 "${file}" | cut -d$'\t' -f2`

cat <<-END_VERSIONS > versions.yml
"${task.process}":
coreutils: $VERSION
END_VERSIONS
/$

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
largest_scaff=1000000
cat <<-END_VERSIONS > versions.yml
"${task.process}":
coreutils: $VERSION
END_VERSIONS
"""
}
1 change: 1 addition & 0 deletions modules/local/get_lineage_for_kraken.nf
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ process GET_LINEAGE_FOR_KRAKEN {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | sed 's/Python //g')
pandas: \$(pip list | grep "pandas" | sed 's/[[:blank:]]//g' | sed 's/pandas//g')
general_purpose_functions.py: \$(general_purpose_functions.py --version | cut -d' ' -f2)
get_lineage_for_kraken_results.py: \$(get_lineage_for_kraken_results.py --version | cut -d' ' -f2)
END_VERSIONS
Expand Down
Loading

0 comments on commit 5d5f200

Please sign in to comment.