-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
272 additions
and
61 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
1.1.0 | ||
-Added ability to parse customer and reference strand from manifest | ||
-Added ability to generate forward and plus strand nucleotide calls from genotypes | ||
|
||
1.0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,53 @@ | ||
from IlluminaBeadArrayFiles import GenotypeCalls, BeadPoolManifest, code2genotype | ||
import sys | ||
import argparse | ||
import os | ||
from datetime import datetime | ||
from IlluminaBeadArrayFiles import GenotypeCalls, BeadPoolManifest, code2genotype | ||
|
||
delim = "\t" | ||
|
||
if len(sys.argv) < 4: | ||
sys.stderr.write("Generate a final report from a directory of GTC files\n") | ||
sys.stderr.write("usage: python gtc_final_report.py <BPM manifest file> <GTC directory> <output file>\n") | ||
sys.exit(-1) | ||
parser = argparse.ArgumentParser("Generate a final report from a directory of GTC files") | ||
parser.add_argument("manifest", help="BPM manifest file") | ||
parser.add_argument("gtc_directory", help="Directory containing GTC files") | ||
parser.add_argument("output_file", help="Location to write report") | ||
|
||
args = parser.parse_args() | ||
|
||
try: | ||
names = BeadPoolManifest(sys.argv[1]).names | ||
manifest = BeadPoolManifest(args.manifest) | ||
except: | ||
sys.stderr.write("Failed to read loci names from manifest\n") | ||
sys.exit(-1) | ||
|
||
output_file = sys.argv[3] | ||
|
||
if os.path.isfile( output_file ): | ||
sys.stderr.write("Output file already exists, please delete and re-run\n") | ||
sys.exit(-1) | ||
|
||
with open(output_file, "w") as output_handle: | ||
output_handle.write("[Header]\n") | ||
output_handle.write(delim.join(["Processing Date", datetime.now().strftime("%m/%d/%Y %I:%M %p") ] )+ "\n") | ||
output_handle.write(delim.join(["Content", os.path.basename( sys.argv[1]) ]) + "\n") | ||
output_handle.write(delim.join(["Num SNPs", str( len(names) )]) + "\n") | ||
output_handle.write(delim.join(["Total SNPs", str( len(names))]) + "\n") | ||
|
||
samples = [] | ||
for file in os.listdir(sys.argv[2]): | ||
if file.lower().endswith(".gtc"): | ||
samples.append( file ) | ||
|
||
output_handle.write(delim.join(["Num Samples", str(len(samples))]) + "\n") | ||
output_handle.write(delim.join(["Total Samples", str(len(samples))]) + "\n") | ||
|
||
output_handle.write("[Data]\n") | ||
output_handle.write(delim.join([ "SNP Name", "Sample ID", "Alleles - AB"]) + "\n") | ||
for file in samples: | ||
sys.stderr.write("Processing " + file + "\n") | ||
gtc_file = os.path.join( sys.argv[2], file ) | ||
genotypes = GenotypeCalls(gtc_file).get_genotypes() | ||
assert len(genotypes) == len(names) | ||
for (name, genotype) in zip( names, genotypes): | ||
output_handle.write(delim.join( [name, file[:-4],code2genotype[genotype], ] ) + "\n") | ||
sys.stderr.write("Failed to read data from manifest\n") | ||
sys.exit(-1) | ||
|
||
if os.path.isfile(args.output_file): | ||
sys.stderr.write("Output file already exists, please delete and re-run\n") | ||
sys.exit(-1) | ||
|
||
with open(args.output_file, "w") as output_handle: | ||
output_handle.write("[Header]\n") | ||
output_handle.write(delim.join(["Processing Date", datetime.now().strftime("%m/%d/%Y %I:%M %p")])+ "\n") | ||
output_handle.write(delim.join(["Content", os.path.basename(args.manifest)]) + "\n") | ||
output_handle.write(delim.join(["Num SNPs", str(len(manifest.names))]) + "\n") | ||
output_handle.write(delim.join(["Total SNPs", str(len(manifest.names))]) + "\n") | ||
|
||
samples = [] | ||
for gtc_file in os.listdir(args.gtc_directory): | ||
if gtc_file.lower().endswith(".gtc"): | ||
samples.append(gtc_file) | ||
|
||
output_handle.write(delim.join(["Num Samples", str(len(samples))]) + "\n") | ||
output_handle.write(delim.join(["Total Samples", str(len(samples))]) + "\n") | ||
|
||
output_handle.write("[Data]\n") | ||
output_handle.write(delim.join(["SNP Name", "Sample ID", "Chr", "MapInfo", "Alleles - AB", "Alleles - Plus", "Alleles - Forward"]) + "\n") | ||
for gtc_file in samples: | ||
sys.stderr.write("Processing " + gtc_file + "\n") | ||
gtc_file = os.path.join(args.gtc_directory, gtc_file) | ||
gtc = GenotypeCalls(gtc_file) | ||
genotypes = gtc.get_genotypes() | ||
plus_strand_genotypes = gtc.get_base_calls_plus_strand(manifest.snps, manifest.ref_strands) | ||
forward_strand_genotypes = gtc.get_base_calls_forward_strand(manifest.snps, manifest.source_strands) | ||
|
||
assert len(genotypes) == len(manifest.names) | ||
for (name, chrom, map_info, genotype, ref_strand_genotype, source_strand_genotype) in zip(manifest.names, manifest.chroms, manifest.map_infos, genotypes, plus_strand_genotypes, forward_strand_genotypes): | ||
output_handle.write(delim.join([name, os.path.basename(gtc_file)[:-4], chrom, str(map_info), code2genotype[genotype], ref_strand_genotype, source_strand_genotype]) + "\n") |
Oops, something went wrong.