Skip to content

Commit

Permalink
Merge branch 'release/1.1.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
Ryan Kelley committed Jan 12, 2017
2 parents 758535f + acee4c2 commit fa77ec2
Show file tree
Hide file tree
Showing 5 changed files with 272 additions and 61 deletions.
5 changes: 5 additions & 0 deletions change_log.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1.1.0
-Added ability to parse customer and reference strand from manifest
-Added ability to generate forward and plus strand nucleotide calls from genotypes

1.0.0
83 changes: 44 additions & 39 deletions examples/gtc_final_report.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,53 @@
from IlluminaBeadArrayFiles import GenotypeCalls, BeadPoolManifest, code2genotype
import sys
import argparse
import os
from datetime import datetime
from IlluminaBeadArrayFiles import GenotypeCalls, BeadPoolManifest, code2genotype

delim = "\t"

if len(sys.argv) < 4:
sys.stderr.write("Generate a final report from a directory of GTC files\n")
sys.stderr.write("usage: python gtc_final_report.py <BPM manifest file> <GTC directory> <output file>\n")
sys.exit(-1)
parser = argparse.ArgumentParser("Generate a final report from a directory of GTC files")
parser.add_argument("manifest", help="BPM manifest file")
parser.add_argument("gtc_directory", help="Directory containing GTC files")
parser.add_argument("output_file", help="Location to write report")

args = parser.parse_args()

try:
names = BeadPoolManifest(sys.argv[1]).names
manifest = BeadPoolManifest(args.manifest)
except:
sys.stderr.write("Failed to read loci names from manifest\n")
sys.exit(-1)

output_file = sys.argv[3]

if os.path.isfile( output_file ):
sys.stderr.write("Output file already exists, please delete and re-run\n")
sys.exit(-1)

with open(output_file, "w") as output_handle:
output_handle.write("[Header]\n")
output_handle.write(delim.join(["Processing Date", datetime.now().strftime("%m/%d/%Y %I:%M %p") ] )+ "\n")
output_handle.write(delim.join(["Content", os.path.basename( sys.argv[1]) ]) + "\n")
output_handle.write(delim.join(["Num SNPs", str( len(names) )]) + "\n")
output_handle.write(delim.join(["Total SNPs", str( len(names))]) + "\n")

samples = []
for file in os.listdir(sys.argv[2]):
if file.lower().endswith(".gtc"):
samples.append( file )

output_handle.write(delim.join(["Num Samples", str(len(samples))]) + "\n")
output_handle.write(delim.join(["Total Samples", str(len(samples))]) + "\n")

output_handle.write("[Data]\n")
output_handle.write(delim.join([ "SNP Name", "Sample ID", "Alleles - AB"]) + "\n")
for file in samples:
sys.stderr.write("Processing " + file + "\n")
gtc_file = os.path.join( sys.argv[2], file )
genotypes = GenotypeCalls(gtc_file).get_genotypes()
assert len(genotypes) == len(names)
for (name, genotype) in zip( names, genotypes):
output_handle.write(delim.join( [name, file[:-4],code2genotype[genotype], ] ) + "\n")
sys.stderr.write("Failed to read data from manifest\n")
sys.exit(-1)

if os.path.isfile(args.output_file):
sys.stderr.write("Output file already exists, please delete and re-run\n")
sys.exit(-1)

with open(args.output_file, "w") as output_handle:
output_handle.write("[Header]\n")
output_handle.write(delim.join(["Processing Date", datetime.now().strftime("%m/%d/%Y %I:%M %p")])+ "\n")
output_handle.write(delim.join(["Content", os.path.basename(args.manifest)]) + "\n")
output_handle.write(delim.join(["Num SNPs", str(len(manifest.names))]) + "\n")
output_handle.write(delim.join(["Total SNPs", str(len(manifest.names))]) + "\n")

samples = []
for gtc_file in os.listdir(args.gtc_directory):
if gtc_file.lower().endswith(".gtc"):
samples.append(gtc_file)

output_handle.write(delim.join(["Num Samples", str(len(samples))]) + "\n")
output_handle.write(delim.join(["Total Samples", str(len(samples))]) + "\n")

output_handle.write("[Data]\n")
output_handle.write(delim.join(["SNP Name", "Sample ID", "Chr", "MapInfo", "Alleles - AB", "Alleles - Plus", "Alleles - Forward"]) + "\n")
for gtc_file in samples:
sys.stderr.write("Processing " + gtc_file + "\n")
gtc_file = os.path.join(args.gtc_directory, gtc_file)
gtc = GenotypeCalls(gtc_file)
genotypes = gtc.get_genotypes()
plus_strand_genotypes = gtc.get_base_calls_plus_strand(manifest.snps, manifest.ref_strands)
forward_strand_genotypes = gtc.get_base_calls_forward_strand(manifest.snps, manifest.source_strands)

assert len(genotypes) == len(manifest.names)
for (name, chrom, map_info, genotype, ref_strand_genotype, source_strand_genotype) in zip(manifest.names, manifest.chroms, manifest.map_infos, genotypes, plus_strand_genotypes, forward_strand_genotypes):
output_handle.write(delim.join([name, os.path.basename(gtc_file)[:-4], chrom, str(map_info), code2genotype[genotype], ref_strand_genotype, source_strand_genotype]) + "\n")
Loading

0 comments on commit fa77ec2

Please sign in to comment.