Skip to content

Commit

Permalink
Merge pull request #74 from sanger-bentley-group/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
blue-moon22 authored Aug 17, 2022
2 parents 7248ad8 + a6aa4b6 commit b30d7f3
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 11 deletions.
9 changes: 9 additions & 0 deletions modules/combine.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@ process combine_results {
-x "${surface_protein_incidence}" \
-n "${version}" \
-o ${pair_id}
unlink ${sero_results}
unlink ${res_incidence}
unlink ${res_alleles}
unlink ${surface_protein_incidence}
unlink ${surface_protein_variants}
unlink ${mlst_allelic_frequency}
unlink ${version}
unlink ${config}
"""
}

Expand Down
35 changes: 30 additions & 5 deletions modules/mlst.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,24 @@ process srst2_for_mlst {
mlst_name="Streptococcus_agalactiae"

"""
set +e
getmlst.py --species 'Streptococcus agalactiae'
srst2 --samtools_args '\\-A' --input_pe ${reads[0]} ${reads[1]} --output ${pair_id} --save_scores --mlst_db ${mlst_db} --mlst_definitions profiles_csv --mlst_delimiter '_' --min_coverage ${min_coverage}
touch ${pair_id}__mlst__${mlst_name}__results.txt
# Clean directory
mkdir output
mv ${pair_id}*.bam output
mv ${pair_id}__mlst__${mlst_name}__results.txt output
mv ${mlst_db} output
find . -maxdepth 1 -type f -delete
unlink ${reads[0]}
unlink ${reads[1]}
mv output/${pair_id}*.bam .
mv output/${pair_id}__mlst__${mlst_name}__results.txt .
mv output/${mlst_db} .
rm -d output
"""
}

Expand All @@ -41,7 +55,7 @@ process get_mlst_allele_and_pileup {
output_new_mlst_alleles_log="${pair_id}_new_mlst_alleles.log"

"""
set +e
# Get alleles from mismatches in SRST2 MLST results file
samtools index ${bam_file}
get_alleles_from_srst2_mlst.py --mlst_results_file ${results_file} --min_read_depth ${min_read_depth} --output_prefix ${pair_id}
Expand Down Expand Up @@ -81,11 +95,22 @@ process get_mlst_allele_and_pileup {
echo "${pair_id}: No new MLST alleles found." > tmp.log
fi
if [ -f tmp.fasta ]
then
mv tmp.fasta ${output_new_mlst_alleles_fasta}
fi
if [ -f tmp_pileup.txt ]
then
mv tmp_pileup.txt ${output_new_mlst_pileup}
fi
touch ${pair_id}_new_mlst_alleles.log
mv tmp.fasta ${output_new_mlst_alleles_fasta}
mv tmp_pileup.txt ${output_new_mlst_pileup}
mv tmp.log ${output_new_mlst_alleles_log}
# Clean
unlink ${bam_file}
unlink ${results_file}
unlink ${mlst_alleles}
"""

}
15 changes: 15 additions & 0 deletions modules/pbp_typer.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ process get_pbp_genes {
# Get BED file of PBP fragments
get_pbp_genes_from_contigs.py --blast_out_file ${pair_id}_blast_blactam.out --query_fasta ${blactam_ref} --frac_align_len_threshold ${frac_align_len_threshold} --frac_identity_threshold ${frac_identity_len_threshold} --output_prefix ${pair_id}_
# Clean directory
mkdir output
mv ${pair_id}_*bed output
mv ${contigs} output
find . -maxdepth 1 -type f -delete
unlink ${blactam_ref}
mv output/${pair_id}_*bed .
mv output/${contigs} .
rm -d output
"""
}

Expand Down Expand Up @@ -45,6 +55,11 @@ process get_pbp_alleles {
# Get identical or imperfect hits
get_pbp_alleles.py --blast_out_file ${pair_id}_blast_${pbp_type}.out --query_fasta ${pair_id}_${pbp_type}.faa --output_prefix ${pair_id}_${pbp_type}_PBP
unlink ${pair_id}_${pbp_type}.bed
fi
unlink ${contigs}
unlink ${gbs_blactam_db}
"""
}
45 changes: 42 additions & 3 deletions modules/res_alignments.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,22 @@ process srst2_for_res_typing {
script:
db_name=db.getSimpleName()
"""
set +e
srst2 --samtools_args '\\-A' --input_pe ${reads[0]} ${reads[1]} --output ${pair_id} --log --save_scores --min_coverage ${min_coverage} --max_divergence ${max_divergence} --gene_db ${db}
touch ${pair_id}__fullgenes__${db_name}__results.txt
# Clean directory
mkdir output
mv ${pair_id}*.bam output
mv ${pair_id}__fullgenes__${db_name}__results.txt output
find . -maxdepth 1 -type f -delete
unlink ${reads[0]}
unlink ${reads[1]}
unlink ${db}
mv output/${pair_id}*.bam .
mv output/${pair_id}__fullgenes__${db_name}__results.txt .
rm -d output
"""
}

Expand All @@ -30,6 +43,10 @@ process split_target_RES_sequences {

"""
get_targets_from_db.py -f ${fasta_file} -t ${targets_file} -o CHECK_
# Clean
unlink ${fasta_file}
unlink ${targets_file}
"""
}

Expand All @@ -45,15 +62,20 @@ process split_target_RES_seq_from_sam_file {
file("*_*_${pair_id}*.bai")

"""
set +e
samtools view -h ${bam_file} > \$(basename ${bam_file} .bam).sam
get_targets_from_samfile.py -s \$(basename ${bam_file} .bam).sam -t ${targets_file} -i ${pair_id} -o CHECK_
for check_sam_file in CHECK_*_${pair_id}*.sam; do
samtools view -bS \${check_sam_file} > \$(basename \${check_sam_file} .sam).bam
samtools index \$(basename \${check_sam_file} .sam).bam \$(basename \${check_sam_file} .sam).bai
done
touch dummy_dummy_${pair_id}_dummy.bam
touch dummy_dummy_${pair_id}_dummy.bai
# Clean directory
unlink ${bam_file}
unlink ${targets_file}
"""
}

Expand All @@ -69,14 +91,31 @@ process freebayes {
tuple val(pair_id), file("${pair_id}_consensus_seq.fna"), emit: consensus

"""
set +e
for check_bam_file in CHECK_*_${pair_id}*.bam; do
target=\$(echo \${check_bam_file} | sed 's/CHECK_//g' | sed 's/_${pair_id}.*//g')
freebayes -q 20 -p 1 -f CHECK_\${target}_ref.fna \${check_bam_file} -v CHECK_\${target}_${pair_id}_seq.vcf
bgzip CHECK_\${target}_${pair_id}_seq.vcf
tabix -p vcf CHECK_\${target}_${pair_id}_seq.vcf.gz
cat CHECK_\${target}_ref.fna | vcf-consensus CHECK_\${target}_${pair_id}_seq.vcf.gz >> ${pair_id}_consensus_seq.fna
rm CHECK_\${target}_${pair_id}_seq.vcf.gz
rm CHECK_\${target}_${pair_id}_seq.vcf.gz.tbi
rm CHECK_\${target}_ref.fna.fai
done
touch ${pair_id}_consensus_seq.fna
# Clean directory
for check_bam_file in CHECK_*_${pair_id}*.bam; do
target=\$(echo \${check_bam_file} | sed 's/CHECK_//g' | sed 's/_${pair_id}.*//g')
unlink \${check_bam_file}
unlink CHECK_\${target}_${pair_id}_seq.bai
unlink CHECK_\${target}_ref.fna
done
mkdir output
mv ${pair_id}_consensus_seq.fna output
find . -maxdepth 1 -type f -delete
mv output/${pair_id}_consensus_seq.fna .
rm -d output
"""
}
2 changes: 1 addition & 1 deletion modules/res_typer.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ process res_typer {
variants_output_file="${pair_id}_res_gbs_variants.txt"
alleles_accessions_file="${pair_id}_res_alleles_accessions.txt"
"""
set +e
process_res_typer_results.py \
--srst2_gbs_fullgenes ${gbs_fullgenes} \
--srst2_gbs_consensus ${gbs_consensus} \
Expand Down
10 changes: 9 additions & 1 deletion modules/serotyping.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ process serotyping {
sero_gene_db="GBS-SBG.fasta"

"""
set +e
# Get latest version of GBS Serotype Database
git clone https://github.com/swainechen/GBS-SBG
Expand All @@ -22,5 +21,14 @@ process serotyping {
process_serotyper_results.py --srst2_output SERO_${pair_id} --sero_db ${sero_gene_db} --output ${pair_id}_SeroType_Results.txt --min_read_depth ${min_read_depth}
touch ${output_file}
# Clean directory
mkdir output
mv ${output_file} output
find . -maxdepth 1 -type f -delete
unlink ${reads[0]}
unlink ${reads[1]}
mv output/${output_file} .
rm -d output
"""
}
12 changes: 11 additions & 1 deletion modules/surface_typer.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,21 @@ process surface_typer {
inc_output_file="${pair_id}_surface_protein_incidence_sample.txt"
variants_output_file="${pair_id}_surface_protein_variants_sample.txt"
"""
set +e
srst2 --samtools_args '\\-A' --input_pe ${reads[0]} ${reads[1]} --output ${pair_id}_SURFACE --log --save_scores --min_coverage ${min_coverage} --max_divergence ${max_divergence} --gene_db ${surface_protein_db}
process_surface_typer_results.py --srst2_gbs_fullgenes ${pair_id}_SURFACE --surface_db ${surface_protein_db} --output_prefix ${pair_id} --min_read_depth ${min_read_depth}
touch ${inc_output_file}
touch ${variants_output_file}
# Clean directory
mkdir output
mv ${inc_output_file} output
mv ${variants_output_file} output
find . -maxdepth 1 -type f -delete
unlink ${surface_protein_db}
mv output/${inc_output_file} .
mv output/${variants_output_file} .
rm -d output
"""
}

0 comments on commit b30d7f3

Please sign in to comment.