Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dp24 gc content #28

Merged
merged 20 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified bin/BedTools.py
100644 → 100755
Empty file.
Empty file modified bin/extract_contaminants_by_type.py
100644 → 100755
Empty file.
Empty file modified bin/filter_barcode_blast_results.py
100644 → 100755
Empty file.
33 changes: 33 additions & 0 deletions bin/gc_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python3
"""
Script for finding the GC content of each sequence in a multiFASTA file

Written by Eerik Aunin @eeaunin

Adapted by Damon-Lee Pointon @DLBPointon
"""

import argparse
import general_purpose_functions as gpf


def main(fasta_path):
fasta_data = gpf.read_fasta_in_chunks(fasta_path)
for header, seq in fasta_data:
header = header.split()[0]
seq = seq.upper()
gc_content = None
gc_count = seq.count("G") + seq.count("C")
seq_len = len(seq)
if seq_len > 0:
gc_content = gc_count / seq_len
gc_content_string = "{:.6f}".format(gc_content)
print("{}\t{}".format(header, gc_content_string))


if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("fasta_path", type=str, help="Path to input FASTA file")
parser.add_argument("-v", action="version", version="1.0")
args = parser.parse_args()
main(args.fasta_path)
Empty file modified bin/organelle_contamination_recommendation.py
100644 → 100755
Empty file.
Empty file modified bin/pacbio_barcode_check.py
100644 → 100755
Empty file.
Empty file modified bin/reformat_diamond_outfmt6.py
100644 → 100755
DLBPointon marked this conversation as resolved.
Show resolved Hide resolved
Empty file.
5 changes: 5 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,9 @@ process {
ext.prefix = { "${meta.id}_euk" }
}

withName: '.*:.*:GENERATE_GENOME:GNU_SORT' {
ext.prefix = { "${meta.id}_sorted"}
ext.args = { '-k2,2 -nr' }
}

}
7 changes: 7 additions & 0 deletions conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@

----------------------------------------------------------------------------------------
*/
process {
maxForks = 1
}

executor {
queueSize=1
}

params {
config_profile_name = 'Full test profile'
Expand Down
16 changes: 16 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
"git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543",
"installed_by": ["modules"]
},
"custom/getchromsizes": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"],
"patch": "modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff"
},
"diamond/blastx": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
Expand All @@ -35,6 +41,11 @@
"git_sha": "5a35af8b60d45425c4b9193e567d16b614d93dbe",
"installed_by": ["modules"]
},
"gnu/sort": {
"branch": "master",
"git_sha": "88f6e982fb8bd40488d837b3b08a65008e602840",
"installed_by": ["modules"]
},
"kraken2/kraken2": {
"branch": "master",
"git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220",
Expand All @@ -61,6 +72,11 @@
"git_sha": "a1ffbc1fd87bd5a829e956cc26ec9cc53af3e817",
"installed_by": ["modules"]
},
"samtools/faidx": {
"branch": "master",
"git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe",
"installed_by": ["modules"]
},
"samtools/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
Expand Down
37 changes: 37 additions & 0 deletions modules/local/gc_content.nf
DLBPointon marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
process GC_CONTENT {
tag "${meta.id}"
label 'process_low'

conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"

input:
tuple val(meta), path(fasta)

output:
tuple val(meta), path( "*-gc.txt" ) , emit: txt
path "versions.yml" , emit: versions

script:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
gc_content.py ${fasta} > ${prefix}-gc.txt

cat <<-END_VERSIONS > versions.yml
"${task.process}":
gc_content: \$(gc_content.py -v)
END_VERSIONS
"""

stub:
"""
touch full_coords.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
gc_content: \$(gc_content.py -v)
END_VERSIONS
"""
}
40 changes: 40 additions & 0 deletions modules/local/get_largest_scaff.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
process GET_LARGEST_SCAFF {

tag "$meta.id"
label 'process_low'

conda "conda-forge::coreutils=9.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'docker.io/ubuntu:20.04' }"

input:
tuple val( meta ), path( file )

output:
env largest_scaff , emit: scaff_size
path "versions.yml" , emit: versions

shell:
def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
$/
largest_scaff=`head -n 1 "${file}" | cut -d$'\t' -f2`

cat <<-END_VERSIONS > versions.yml
"${task.process}":
coreutils: $VERSION
END_VERSIONS
/$

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
largest_scaff=1000000

cat <<-END_VERSIONS > versions.yml
"${task.process}":
coreutils: $VERSION
END_VERSIONS
"""
}
63 changes: 63 additions & 0 deletions modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 53 additions & 0 deletions modules/nf-core/custom/getchromsizes/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 53 additions & 0 deletions modules/nf-core/custom/getchromsizes/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 52 additions & 0 deletions modules/nf-core/gnu/sort/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading