Skip to content

Commit

Permalink
[fix] fname list name
Browse files Browse the repository at this point in the history
  • Loading branch information
LaraFuhrmann committed Nov 30, 2023
1 parent 37c7fc0 commit 79c9071
Showing 1 changed file with 46 additions and 0 deletions.
46 changes: 46 additions & 0 deletions workflow/scripts/haplotypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env python3

from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import glob
import subprocess

def unzip(fname):
subprocess.run(
[
"gunzip",
str(fname),
],
)


def main(dname_haplotypes, sample_name, fname_out_haplos):

for fname in glob.glob(f"{dname_haplotypes}*.reads-support.fas.gz"):
unzip(fname)

all_haplotype_files = [file for file in glob.glob(f"{dname_haplotypes}*.reads-support.fas")]

# define output file
with open(fname_out_haplos, 'w') as out_f:
my_seqs = []

# iterate over all haplotype files
for fname_haplotype_region in all_haplotype_files:
region = fname_haplotype_region.split(".reads-support.fas")[0].split("w-")[1]
for record in SeqIO.parse(fname_haplotype_region, 'fasta'):
new_id = sample_name+'-'+region + '-' +str(record.id)
my_seqs.append(SeqRecord(Seq(record.seq), id = new_id))

# write all sequences to file
SeqIO.write(my_seqs, out_f, "fasta")



if __name__ == "__main__":
main(
snakemake.input.dname_haplotypes,
snakemake.params.sample,
snakemake.output.fname_haplotypes,
)

0 comments on commit 79c9071

Please sign in to comment.