From 542ba003541f9a8f2d0bcb5ed21f2bfae99db046 Mon Sep 17 00:00:00 2001 From: MaryamZaheri Date: Mon, 20 Feb 2017 10:29:01 +0100 Subject: [PATCH] fixing a bug and cleaning up the readBamFile (#31) --- src/haploclique.cpp | 80 ++++++------------ .../reads_arabis_mosaic_virus_20nc.bam | Bin 0 -> 791 bytes test/unit/unit1.cpp | 33 ++++++-- 3 files changed, 51 insertions(+), 62 deletions(-) create mode 100644 test/data/simulation/unit_data/reads_arabis_mosaic_virus_20nc.bam diff --git a/src/haploclique.cpp b/src/haploclique.cpp index e4c88c8..e5fa82c 100644 --- a/src/haploclique.cpp +++ b/src/haploclique.cpp @@ -140,57 +140,43 @@ deque* readBamFile(string filename, vector& readNames, name_map_t names_to_reads; deque* reads = new deque; BamTools::BamReader bamreader; + if (not bamreader.Open(filename)) { - cerr << bamreader.GetFilename() << endl; - throw std::runtime_error("Couldn't open Bamfile"); + throw std::runtime_error("Couldn't open Bamfile."); } + BamTools::BamAlignment alignment; + // retrieve 'metadata' from input BAM files, these are required by BamWriter header = bamreader.GetHeader(); references = bamreader.GetReferenceData(); - //int readcounter = 0; - //int singleendcounter = 0; - //int pairedendcounter = 0; - //int overlap = 0; - while (bamreader.GetNextAlignment(alignment)) { - /*std::size_t found = alignment.QueryBases.find('N'); - if (found!=std::string::npos){ - cout << alignment.Name << endl; - cout << alignment.QueryBases << endl; - cout << alignment.Qualities << endl; - cout << endl; - }*/ bool valid = false; for (auto& i : alignment.CigarData){ if (i.Type == 'M' && i.Length > 0) valid = true; } if(alignment.CigarData.size() > 0 && valid){ if(names_to_reads.count(alignment.Name) > 0) { - //cout << alignment.Name << endl; names_to_reads[alignment.Name]->pairWith(alignment); - /*if (names_to_reads[alignment.Name]->isSingleEnd()){ - ++overlap; - ++readcounter; - } else { - pairedendcounter++; - ++readcounter; - }*/ reads->push_back(names_to_reads[alignment.Name]); names_to_reads.erase(alignment.Name); } else { names_to_reads[alignment.Name] = new AlignmentRecord(alignment, readNames.size(), &readNames); readNames.push_back(alignment.Name); - //++readcounter; } } } - // Push all single-end reads remaining in names_to_reads into the reads vector. Unmapped reads are filtered out in advance. + // push all single-end reads remaining in names_to_reads into the reads vector. Unmapped reads are filtered out in advance. for (const auto& i : names_to_reads) { reads->push_back(i.second); - //readcounter++; + } + + // if no reads could be retrieved from the BamFile + if (reads->empty()){ + cerr << bamreader.GetFilename() << endl; + throw std::runtime_error("No reads could be retrieved from the BamFile. "); } bamreader.Close(); @@ -200,40 +186,11 @@ deque* readBamFile(string filename, vector& readNames, sort(reads->begin(), reads->end(), comp); cout << "Read BamFile: done" << endl; - //DEBUGGING - //check which alignments have no cigar string -> no covered positions -> no edge possible - /*while(not reads->empty()){ - assert(reads->front() != nullptr); - unique_ptr al_ptr(reads->front()); - reads->pop_front(); - if (al_ptr->getCigar1().size() == 0){ - cout << al_ptr->getName() << endl; - } - } - //cout << "Number of Reads: " << readcounter << endl; - //cout << "Number of Pairs: " << pairedendcounter << endl; - //cout << "Number of Overlapping Pairs: " << overlap << endl; - //check number of single ends and paired ends after merging - int counter = 0; - while(not reads->empty()){ - assert(reads->front() != nullptr); - unique_ptr al_ptr(reads->front()); - reads->pop_front(); - if (al_ptr->isSingleEnd()){ - singleendcounter++; - } else if (al_ptr->isPairedEnd()){ - pairedendcounter++; - } - counter++; - } - cout << singleendcounter << " " << pairedendcounter << " " << counter << endl; - */ maxPosition = (*std::max_element(std::begin(*reads), std::end(*reads), [](const AlignmentRecord* lhs,const AlignmentRecord* rhs){ return lhs->getIntervalEnd() < rhs->getIntervalEnd(); }))->getIntervalEnd(); - return reads; } @@ -325,7 +282,20 @@ int main(int argc, char* argv[]) { unsigned int maxPosition1; BamTools::SamHeader header; BamTools::RefVector references; - deque* reads = readBamFile(bamfile, originalReadNames,maxPosition1,header,references); + + deque* reads; + try{ + reads = readBamFile(bamfile, originalReadNames,maxPosition1,header,references); + } + catch(const runtime_error& error){ + cerr << error.what() << endl; + return 1; + } + catch(...){ + cerr << "Unexpected error while reading BamFile." << endl; + return 1; + } + EdgeCalculator* edge_calculator = nullptr; EdgeCalculator* indel_edge_calculator = nullptr; unique_ptr > readgroup_params(nullptr); diff --git a/test/data/simulation/unit_data/reads_arabis_mosaic_virus_20nc.bam b/test/data/simulation/unit_data/reads_arabis_mosaic_virus_20nc.bam new file mode 100644 index 0000000000000000000000000000000000000000..f8354e8c10af8e614094bf264357b71e8b4149c3 GIT binary patch literal 791 zcmV+y1L*u8iwFb&00000{{{d;LjnN$0=1Y;Yui8&h9`#}WN5)52OoN{g%7#J`mp7% zQ{)JuLgJ*TExi;WOY2w-vK?8F#P|}UgD?Hfty0-bpk>$*_QGP2*q6t%Gy9HqU|t`+ zI|4YjE$Q2^uQd$m@}@tXO_}7}^!?c^4#KIIu+q8nS#pN`yKvQO>m6Nfu3|P`4KG|( z)r?lVp{=B=Vc$?o=eL*AC#yet^rYM2-=9oZZFCx%aTaS@OFs+3IC+*Xu3leOBNkPY zXzqs7fIU@}uL~A0l%+ReaTqBUTihqJx#ExRT$W6h$|8&w>_N#ED03ETZg;BRyU)vc zW9%)G2jX(w)t{D;{xj0@<)thR@G+anp9DR7?_~4$cD#qzaSbPLp1nSC!pLC{?j%uN z4J`R|lUj16zW(u_VLCp1&*s0fzi`=?b^Z9%tXC`)=x;SqSxU5Oj#d&+M;env(;=CT5 z+M9Y_%QRh_*Mt1bk;`S`zTvzh;Njit`7i>0ob8p@FYKE*uV2{baozxSsZgxdB?DGV|lOf9mB4QYs9u1<<$jkli026f-X;_Z36cSG|7mx zS#QS8f+ma7wtzc4Y86D<7Noo-p~<4OhM=(#X$?VNBht1D2LYvR7v3_Iwj+3m5otSu zXBd&TE7&p+X}f|Q1Ch2T*ys^ydxCu_k=8p5t#=sOk)W9oX-9|gGGFl1he+!Se)$k- z1HlUcB5fdee?g>Wz`MqM(lX$eL&Ww2r5zt86UKs>@CWa=rVnQp001A02m}BC00030 V1^_}s0stET0{{R300000004M)gS7wv literal 0 HcmV?d00001 diff --git a/test/unit/unit1.cpp b/test/unit/unit1.cpp index 4d56412..a974df3 100644 --- a/test/unit/unit1.cpp +++ b/test/unit/unit1.cpp @@ -13,25 +13,44 @@ #include "AlignmentRecord.h" #define main HaploMain #include "haploclique.cpp" - +#include using namespace std; using namespace boost; +// This test verifies if readBamFile function returns correct number of reads for a specific input file. TEST(readBamFileTest, readBamFileCountSeqs){ - // THIS IS NOT A UNIT TEST, BUT A REGRESSION CRAM TEST -#if 0 - string bamfile = "/home/ubuntu/haploclique/test/data/simulation/reads_arabis_mosaic_virus_25_01.bam"; + string bamfile = "test/data/simulation/reads_HIV-1_50_01.bam"; vector originalReadNames; unsigned int maxPosition1; BamTools::SamHeader header; BamTools::RefVector references; - int readsSize = 30; + int readsSize = 1836; std::deque* reads = readBamFile(bamfile, originalReadNames,maxPosition1,header,references); EXPECT_EQ(readsSize, reads->size()); -#endif - EXPECT_EQ(1, 1); + +} + +// This test verifies if readBamFile function can retrieve any reads from the input file. +TEST(readBamFileExpectTest, readBamFileExpectReadsRetrival){ + + string bamfile = "test/data/simulation/unit_data/reads_arabis_mosaic_virus_20nc.bam"; + vector originalReadNames; + unsigned int maxPosition1; + BamTools::SamHeader header; + BamTools::RefVector references; + + try{ + std::deque* reads = readBamFile(bamfile, originalReadNames,maxPosition1,header,references); + FAIL() << "Expected std::runtime_error" << endl; + } + catch(const std::runtime_error& error){ + EXPECT_EQ(error.what(), std::string("No reads could be retrieved from the BamFile. ")); + } + catch(...){ + FAIL() << "Expected std::runtime_error" << endl; + } }