Skip to content

Commit

Permalink
Merge pull request #82 from wtsi-npg/devel
Browse files Browse the repository at this point in the history
merge from devel to produce 1.18
  • Loading branch information
jenniferliddle committed Jan 20, 2016
2 parents f6eb77f + b9db15c commit 54d31c1
Show file tree
Hide file tree
Showing 9 changed files with 416 additions and 80 deletions.
4 changes: 4 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
release 1.18
- new command line option CHANGE_RG_NAME default false
- NB this is a change to previous behaviour, where it was always true

release 1.17
- New command line option ADD_CLUSTER_INDEX_TAG default false
- Added function to read experiment name and computer name from runParameters file
Expand Down
2 changes: 1 addition & 1 deletion nbproject/project.properties
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ file.reference.jackson-core-2.0.0.jar=lib/jackson/jackson-core-2.0.0.jar
file.reference.jackson-databind-2.0.0.jar=lib/jackson/jackson-databind-2.0.0.jar
file.reference.picard.jar=lib/picard/picard-1.96.jar
file.reference.sam.jar=lib/picard/sam-1.96.jar
illumina2bam.version=1.17
illumina2bam.version=1.18
includes=**
jar.compress=false
javac.classpath=\
Expand Down
10 changes: 8 additions & 2 deletions src/uk/ac/sanger/npg/picard/BamIndexDecoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ public class BamIndexDecoder extends PicardCommandLine {
@Option(doc="Convert low quality bases in barcode read to Ns .")
public boolean CONVERT_LOW_QUALITY_TO_NO_CALL = false;

@Option(doc="Change the read name by adding #<barcodename> suffix")
public boolean CHANGE_READ_NAME = false;

@Option(doc="Max low quality phred value to convert bases in barcode read to Ns .")
private int MAX_LOW_QUALITY_TO_CONVERT = 15;

Expand Down Expand Up @@ -226,7 +229,7 @@ protected int doWork() {
if (isPaired) {
this.markBarcode(pairedRecord, barcodeName, readGroupOnlyIdInHeader);
}

if( OUTPUT != null ){
out.addAlignment(record);
if(isPaired){
Expand Down Expand Up @@ -263,7 +266,10 @@ protected int doWork() {
private SAMRecord markBarcode(SAMRecord record, String barcodeName, String readGroupOnlyIdInHeader) {

String readName = record.getReadName();
record.setReadName(readName + "#" + barcodeName);

if (CHANGE_READ_NAME) {
record.setReadName(readName + "#" + barcodeName);
}

Object oldReadGroupId = record.getAttribute("RG");
if (oldReadGroupId == null && readGroupOnlyIdInHeader != null) {
Expand Down
219 changes: 149 additions & 70 deletions src/uk/ac/sanger/npg/picard/BamMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Stack;
import net.sf.picard.cmdline.Option;
import net.sf.picard.cmdline.StandardOptionDefinitions;
import net.sf.picard.cmdline.Usage;
Expand Down Expand Up @@ -88,6 +89,152 @@ public class BamMerger extends PicardCommandLine {
@Option(shortName= "REPLACE_QUAL", doc="Replace base qualities in aligned bam wtih the ones in unaligned bam if true.")
public Boolean REPLACE_ALIGNED_BASE_QUALITY = false;

private void mergealign_pairedsupp(SAMRecordIterator iteratorIn, SAMRecordIterator iteratorAlignments, SAMFileWriter out) {
// We are assuming the primary alignment record always comes first
SAMRecord alignment = null;
SAMRecord nextAlignment = null;
Stack<SAMRecord> secondary_supp = new Stack<SAMRecord>();
if(iteratorAlignments.hasNext()){
nextAlignment = iteratorAlignments.next();
}

while( iteratorIn.hasNext() ){
SAMRecord record = iteratorIn.next();
// Unaligned data shouldn't have these but meh
if (record.isSecondaryOrSupplementary())
continue;
// Check if we are out of alignments
if(alignment == null && nextAlignment != null) {
// Hunt for the primary
SAMRecord searchAlignment = nextAlignment;
while (alignment == null) {
if (nextAlignment.getReadName().equals(searchAlignment.getReadName()) &&
nextAlignment.getReadPairedFlag() == searchAlignment.getReadPairedFlag() &&
(
(
nextAlignment.getReadPairedFlag() &&
nextAlignment.getFirstOfPairFlag() == searchAlignment.getFirstOfPairFlag() &&
nextAlignment.getSecondOfPairFlag() == searchAlignment.getSecondOfPairFlag()
)
||
!searchAlignment.getReadPairedFlag()
)
)
{
if (nextAlignment.isSecondaryOrSupplementary()) {
secondary_supp.push(nextAlignment);
if (iteratorAlignments.hasNext()) {
nextAlignment = iteratorAlignments.next();
} else {
log.error( nextAlignment.getReadName() );
throw new RuntimeException("The mapped bam file has a read where there's a supplementary/secondary but no primary.");
}
} else {
alignment = nextAlignment;
nextAlignment = null;
}
} else {
log.error( searchAlignment.getReadName() );
throw new RuntimeException("The mapped bam file has a read where there's a supplementary/secondary but no primary.");
}
}
// Mop up any secondaries
while (iteratorAlignments.hasNext()) {
nextAlignment = iteratorAlignments.next();
// is it from the same mapping as the current alignment?
if (nextAlignment.getReadName().equals(alignment.getReadName()) &&
nextAlignment.isSecondaryOrSupplementary() &&
nextAlignment.getReadPairedFlag() == alignment.getReadPairedFlag() &&
(
(
nextAlignment.getReadPairedFlag() &&
nextAlignment.getFirstOfPairFlag() == alignment.getFirstOfPairFlag() &&
nextAlignment.getSecondOfPairFlag() == alignment.getSecondOfPairFlag()
)
||
!nextAlignment.getReadPairedFlag()
)
) {
// it is a secondary for current alignment
secondary_supp.push(nextAlignment);
nextAlignment = null;
} else {
// new record so we move on
break;
}
}

} else if ( this.KEEP_EXTRA_UNMAPPED_READS ) { // no more piggies
out.addAlignment(record);
continue;
} else {
break;
}
String readName1 = record.getReadName();
String readName2 = alignment.getReadName();

boolean pairedRead1 = record.getReadPairedFlag();
boolean pairedRead2 = alignment.getReadPairedFlag();

boolean firstOfPair1 = false;
if(pairedRead1){
firstOfPair1 = record.getFirstOfPairFlag();
}
boolean firstOfPair2 = false;
if(pairedRead2){
firstOfPair2= alignment.getFirstOfPairFlag();
}

// Try and read in from unaligned BAM until read name matches align'd bam's read
while( ( !readName1.equals(readName2)
|| pairedRead1 != pairedRead2
|| firstOfPair1 != firstOfPair2 )
){
// Output
if( this.KEEP_EXTRA_UNMAPPED_READS ){
out.addAlignment(record);
}

if (iteratorIn.hasNext()) {
record = iteratorIn.next();
readName1 = record.getReadName();
pairedRead1 = record.getReadPairedFlag();
firstOfPair1 = false;
if (pairedRead1) {
firstOfPair1 = record.getFirstOfPairFlag();
}
}else{
break; // No more unaligned input data
}

}
// If we've found a matching record merge them
if( readName1.equals(readName2)
&& pairedRead1 == pairedRead2
&& firstOfPair1 == firstOfPair2
){
this.mergeRecords(alignment, record);
out.addAlignment(alignment); // Write merged record
String parent_rg = (String) alignment.getAttribute("RG");
while ( !secondary_supp.empty() ) {
SAMRecord s = secondary_supp.pop();
s.setAttribute("RG", parent_rg);
out.addAlignment(s); // Write merged supp record
}
alignment = null;
} else if ( this.KEEP_EXTRA_UNMAPPED_READS ) {
out.addAlignment(record);
}
}
// If we have left over records in our aligned BAM something went wrong
if( alignment != null || nextAlignment != null || iteratorAlignments.hasNext() ){
SAMRecord firstRecordLeft = iteratorAlignments.next();
log.error( firstRecordLeft.getReadName() + " " + firstRecordLeft.getFlags() );
throw new RuntimeException("The mapped bam file has more reads than the unmapped"
+ " after reading their common reads in their begining.");
}
}

@Override
protected int doWork() {

Expand Down Expand Up @@ -139,76 +286,8 @@ protected int doWork() {
SAMRecordIterator iteratorAlignments = alignments.iterator();
SAMRecordIterator iteratorIn = in.iterator();

while( iteratorIn.hasNext() ){

SAMRecord record = iteratorIn.next();

SAMRecord alignment;
if(iteratorAlignments.hasNext()){
alignment = iteratorAlignments.next();
}else if ( this.KEEP_EXTRA_UNMAPPED_READS ) {
out.addAlignment(record);
continue;
}else {
break;
}

String readName1 = record.getReadName();
String readName2 = alignment.getReadName();

boolean pairedRead1 = record.getReadPairedFlag();
boolean pairedRead2 = alignment.getReadPairedFlag();

boolean firstOfPair1 = false;
if(pairedRead1){
firstOfPair1 = record.getFirstOfPairFlag();
}
boolean firstOfPair2 = false;
if(pairedRead2){
firstOfPair2= alignment.getFirstOfPairFlag();
}

while( ( !readName1.equals(readName2)
|| pairedRead1 != pairedRead2
|| firstOfPair1 != firstOfPair2 )
){

if( this.KEEP_EXTRA_UNMAPPED_READS ){
out.addAlignment(record);
}

if (iteratorIn.hasNext()) {
record = iteratorIn.next();
readName1 = record.getReadName();
pairedRead1 = record.getReadPairedFlag();
firstOfPair1 = false;
if (pairedRead1) {
firstOfPair1 = record.getFirstOfPairFlag();
}
}else{
break;
}

}

if( readName1.equals(readName2)
&& pairedRead1 == pairedRead2
&& firstOfPair1 == firstOfPair2
){
this.mergeRecords(alignment, record);
out.addAlignment(alignment);
}else if ( this.KEEP_EXTRA_UNMAPPED_READS ) {
out.addAlignment(record);
}

}

if( iteratorAlignments.hasNext() ){
SAMRecord firstRecordLeft = iteratorAlignments.next();
log.error( firstRecordLeft.getReadName() + " " + firstRecordLeft.getFlags() );
throw new RuntimeException("The mapped bam file has more reads than the unmapped"
+ " after reading their common reads in their begining.");
}
//mergealign_legacy(iteratorIn, iteratorAlignments, out);
mergealign_pairedsupp(iteratorIn, iteratorAlignments, out);

out.close();
in.close();
Expand Down
46 changes: 46 additions & 0 deletions test/uk/ac/sanger/npg/illumina/Illumina2bamTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,52 @@ public void testMain() {
assertEquals("e614c4c92250472f1f01ba87bb7673e5", CheckMd5.getBamMd5AfterRemovePGVersion(testData.tempBamFile, "Illumina2bam"));
}

/**
* Test of READ_GROUP_ID paramater.
*/
@Test
public void testRG() {

System.out.println("READ_GROUP_ID");
Data testData = new Data("testdata/test_6000_1.sam");
String[] args = {"INTENSITY_DIR=testdata/110323_HS13_06000_B_B039WABXX/Data/Intensities",
"READ_GROUP_ID=6000_1",
"LANE=1",
"OUTPUT=" + testData.tempBamFile.getPath(),
"VALIDATION_STRINGENCY=STRICT",
"CREATE_MD5_FILE=true",
"FIRST_TILE=1101",
"COMPRESSION_LEVEL=1",
"TILE_LIMIT=1",
"LB=Test library",
"SM=Test Sample",
"ST=testStudy",
"TMP_DIR=testdata/",
"RUN_START_DATE=2011-03-23T00:00:00+0000"
};

testData.commonAsserts(args);
assertEquals("uk.ac.sanger.npg.illumina.Illumina2bam"
+ " INTENSITY_DIR=testdata/110323_HS13_06000_B_B039WABXX/Data/Intensities"
+ " LANE=1 OUTPUT=" + testData.tempBamFile.getPath()
+ " READ_GROUP_ID=6000_1 SAMPLE_ALIAS=Test Sample LIBRARY_NAME=Test library"
+ " STUDY_NAME=testStudy RUN_START_DATE=2011-03-23T00:00:00+0000 FIRST_TILE=1101 TILE_LIMIT=1"
+ " TMP_DIR=[testdata] VALIDATION_STRINGENCY=STRICT COMPRESSION_LEVEL=1"
+ " CREATE_MD5_FILE=true GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true"
+ " SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT"
+ " ADD_CLUSTER_INDEX_TAG=false VERBOSITY=INFO QUIET=false MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false",
testData.illumina2bam.getCommandLine()
);

SAMFileReader samFileReader = new SAMFileReader(testData.tempBamFile);
List<SAMReadGroupRecord> rgl = samFileReader.getFileHeader().getReadGroups();
assertEquals(1, rgl.size());
assertEquals("6000_1",rgl.get(0).getReadGroupId());
samFileReader.close();

assertEquals("9d35354e308e6440d825a3f1a3c5a1db", CheckMd5.getBamMd5AfterRemovePGVersion(testData.tempBamFile, "Illumina2bam"));
}

/**
* Test of instanceMain method and program record if no FIRST_TILE specified.
*/
Expand Down
Loading

0 comments on commit 54d31c1

Please sign in to comment.