From f4970af6ce7e43312ec20f1fbce13d3da0918caa Mon Sep 17 00:00:00 2001 From: mozack Date: Tue, 8 Oct 2019 10:22:34 -0400 Subject: [PATCH] Added option to disallow complex indels at read edge to workaround Manta problem. See: https://github.com/mozack/abra2/issues/24 --- src/main/java/abra/CigarUtils.java | 48 ++++++++++++++++++++++++ src/main/java/abra/ReAligner.java | 6 ++- src/main/java/abra/ReAlignerOptions.java | 6 +++ src/test/java/abra/CigarUtilsTest.java | 32 +++++++++++++++- 4 files changed, 90 insertions(+), 2 deletions(-) diff --git a/src/main/java/abra/CigarUtils.java b/src/main/java/abra/CigarUtils.java index e02c169..5bc7c83 100644 --- a/src/main/java/abra/CigarUtils.java +++ b/src/main/java/abra/CigarUtils.java @@ -178,6 +178,50 @@ public static boolean hasNDN(String cigar) { return hasNDM; } + /** + * Returns true if the input cigar string begins or ends with 2 adjacent indels. + * Clipping is ignored. + */ + public static boolean startsOrEndsWithComplexIndel(String cigar) { + boolean ret = false; + + List blocks = getUnclippedCigarBlocks(cigar); + + if (blocks.size() > 1) { + if (blocks.get(0).isIndel() && blocks.get(1).isIndel()) { + ret = true; + } else if (blocks.get(blocks.size()-1).isIndel() && blocks.get(blocks.size()-2).isIndel()) { + ret = true; + } + } + + return ret; + } + + private static List getUnclippedCigarBlocks(String cigar) { + + List cigarBlocks = new ArrayList(); + try { + StringBuffer len = new StringBuffer(); + for (int i=0; i getCigarBlocks(String cigar) { List cigarBlocks = new ArrayList(); @@ -272,6 +316,10 @@ static class CigarBlock { boolean isGap() { return type == 'D' || type == 'N'; } + + boolean isIndel() { + return type == 'D' || type == 'I'; + } } } diff --git a/src/main/java/abra/ReAligner.java b/src/main/java/abra/ReAligner.java index 6d4dcb8..47503d0 100644 --- a/src/main/java/abra/ReAligner.java +++ b/src/main/java/abra/ReAligner.java @@ -157,6 +157,7 @@ public class ReAligner { private boolean shouldFilterNDN; private boolean isGappedContigsOnly; private boolean shouldUseJunctionsAsContigs; + private boolean disallowComplexIndelsAtReadEdge; public void reAlign(String[] inputFiles, String[] outputFiles) throws Exception { @@ -323,7 +324,7 @@ void processChromosomeChunk(int chromosomeChunkIdx) throws Exception { read1.setBaseQualityString(rc.reverse(read1.getBaseQualityString())); read1.setReadNegativeStrandFlag(false); record.setUnalignedRc(true); - } + } } List overlappingRegions = new ArrayList(); @@ -605,6 +606,8 @@ private void remapRead(ReadEvaluator readEvaluator, SAMRecord read, int origEdit Logger.trace("Not moving read: " + read.getReadName() + " from: " + read.getAlignmentStart() + " to: " + alignment.pos); } else if (shouldFilterNDN && CigarUtils.hasNDN(alignment.cigar)) { Logger.trace("Not remapping read: %s to NDM cigar: %s", read, alignment.cigar); + } else if (disallowComplexIndelsAtReadEdge && CigarUtils.startsOrEndsWithComplexIndel(alignment.cigar)) { + Logger.trace("Not remapping read: %s to edge complex indel cigar: %s", read, alignment.cigar); } else if (origEditDist > alignment.numMismatches) { SAMRecord orig = read.deepCopy(); @@ -1798,6 +1801,7 @@ public static void run(String[] args) throws Exception { realigner.shouldFilterNDN = options.isNoNDN(); realigner.isGappedContigsOnly = options.isGappedContigsOnly(); realigner.shouldUseJunctionsAsContigs = options.shouldUseJunctionsAsContigs(); + realigner.disallowComplexIndelsAtReadEdge = options.disallowComplexIndelsAtReadEdge(); MAX_REGION_LENGTH = options.getWindowSize(); MIN_REGION_REMAINDER = options.getWindowOverlap(); diff --git a/src/main/java/abra/ReAlignerOptions.java b/src/main/java/abra/ReAlignerOptions.java index 161ff38..bd499e2 100644 --- a/src/main/java/abra/ReAlignerOptions.java +++ b/src/main/java/abra/ReAlignerOptions.java @@ -59,6 +59,7 @@ public class ReAlignerOptions extends Options { private static final String NO_NDN = "no-ndn"; private static final String GAPPED_CONTIGS_ONLY = "gc"; private static final String USE_JUNCTIONS_AS_CONTIGS = "ujac"; + private static final String NO_COMPLEX_INDELS_AT_READ_EDGE = "no-edge-ci"; private OptionParser parser; private boolean isValid; @@ -116,6 +117,7 @@ protected OptionParser getOptionParser() { parser.accepts(NO_NDN, "If specified, do not allow adjacent N-D-N cigar elements"); parser.accepts(GAPPED_CONTIGS_ONLY, "If specified, only reprocess regions that contain at least one contig containing an indel or splice (experimental)"); parser.accepts(USE_JUNCTIONS_AS_CONTIGS, "If specified, use junction permuations as contigs (Experimental - may use excessive memory and compute times)"); + parser.accepts(NO_COMPLEX_INDELS_AT_READ_EDGE, "If specified, do not update alignments for reads that have a complex indel at the read edge. i.e. Do not allow alignments like: 90M10D10I"); } return parser; @@ -456,4 +458,8 @@ public int getCompressionLevel() { public boolean shouldUseJunctionsAsContigs() { return (Boolean) getOptions().has(USE_JUNCTIONS_AS_CONTIGS); } + + public boolean disallowComplexIndelsAtReadEdge() { + return (Boolean) getOptions().has(NO_COMPLEX_INDELS_AT_READ_EDGE); + } } diff --git a/src/test/java/abra/CigarUtilsTest.java b/src/test/java/abra/CigarUtilsTest.java index 2aff60d..3ed9ed1 100644 --- a/src/test/java/abra/CigarUtilsTest.java +++ b/src/test/java/abra/CigarUtilsTest.java @@ -2,7 +2,6 @@ import static org.testng.Assert.assertEquals; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -230,4 +229,35 @@ public void testHasNDM() { assertEquals(false, CigarUtils.hasNDN("100I")); assertEquals(false, CigarUtils.hasNDN("50M200N5M300N5M")); } + + @Test (groups = "unit") + public void testStartsOrEndsWithComplexIndel() { + assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("100M")); + assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("50M10D50M")); + assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("50M10I50M")); + assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("50M10I50M5S")); + assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("10S100M10S")); + assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("10H100M10H")); + assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("10H100M1D1M1I")); + assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("50M10D10I50M")); + assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("50M10I10D50M")); + + assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("50M10I10D")); + assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("50M10D10I")); + assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("50M10D10I5S")); + assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("50M10D10I5S5H")); + assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("10D10I50M")); + assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("5S10D10I50M")); + assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("5H10I10D50M")); + + + } + + /* + public static void main(String[] args) { + TestNG testSuite = new TestNG(); + testSuite.setTestClasses(new Class[] { CigarUtilsTest.class }); + testSuite.run(); + } + */ }