forked from jthmiller/killifish-RADseq-4popQTL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
BarcodeSplitListBestRadPairedEnd.pl
98 lines (72 loc) · 3.09 KB
/
BarcodeSplitListBestRadPairedEnd.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/perl
### Author: Mike Miller
if ($#ARGV == 3) {
$file1 = $ARGV[0]; #first read
$file2 = $ARGV[1]; #second read
$barcode = $ARGV[2];
$prefix = $ARGV[3];
} else {
die;
}
@commas = split(/\,/, $barcode);
$barcode_length = length($commas[0]); #telling how long the barcodes are as indicated in the .sh file
#this creates all of the files named after the barcodes.
$x=0;
while ($x <= $#commas) {
$hash_r1{$commas[$x]} = $prefix . "_RA_" . $commas[$x] . ".fastq";
$hash_r2{$commas[$x]} = $prefix . "_RB_" . $commas[$x] . ".fastq";
$filename_r1 = $hash_r1{$commas[$x]};
$filename_r2 = $hash_r2{$commas[$x]};
open($filename_r1, ">$filename_r1") or die;
open($filename_r2, ">$filename_r2") or die;
$x++;
}
open(FILE1, "gunzip -dc <$file1 |") or die;
open(FILE2, "gunzip -dc <$file2 |") or die;
my $z = new IO::Compress::Gzip $output or die "IO::Compress::Gzip failed: $GzipError\n";
while (<FILE1>) {
$f1a = $_;
$f1b = <FILE1>;
$f1c = <FILE1>;
$f1d = <FILE1>;
$f2a = <FILE2>;
$f2b = <FILE2>;
$f2c = <FILE2>;
$f2d = <FILE2>;
#substring returns a string from the frist argument. Second argument tells how far from
#the left side of the arguement to start.
#3rd argument limits the size of the substring that is returned.
#so this is looking at the beginning of each forward and reverse read and pulling out
#the bases that contain the barcode and the cut site. Only print if match the barcode specified.
$bc1 = substr($f1b,0,$barcode_length);
$bc2 = substr($f2b,0,$barcode_length);
#ne means not equal.
#the && is making sure both statements are true.
#the following loop checks first bc1 for the barcode, then barcode 2.
#last, checks both and prints if they both contain barcode, which is wrong.
#this is also what puts them in the right orientation.
if ($hash_r1{$bc1} ne "" && $hash_r1{$bc2} eq "") {
$f1b_2 = substr($f1b, $barcode_length, length($f1b));
$f1d_2 = substr($f1d, $barcode_length, length($f1d));
$out1 = $hash_r1{$bc1};
$out2 = $hash_r2{$bc1};
$z-> print $out1 $f1a . $f1b_2 . $f1c . $f1d_2;
$z-> print $out2 $f2a . $f2b . $f2c . $f2d;
} elsif ($hash_r1{$bc1} eq "" && $hash_r1{$bc2} ne "") {
$f2b_2 = substr($f2b, $barcode_length, length($f2b));
$f2d_2 = substr($f2d, $barcode_length, length($f2d));
$out1 = $hash_r1{$bc2};
$out2 = $hash_r2{$bc2};
$z-> print $out1 $f2a . $f2b_2 . $f2c . $f2d_2;
$z-> print $out2 $f1a . $f1b . $f1c . $f1d;
} elsif ($hash_r1{$bc1} ne "" && $hash_r1{$bc2} ne "") {
$z-> print "Double Barcode!\t$bc1\t$bc2\n";
}
}
close FILE1; close FILE2;
$x=0;
while ($x <= $#commas) {
close($hash_r1{$commas[$x]});
close($hash_r2{$commas[$x]});
$x++;
}