diff --git a/src/haploclique.cpp b/src/haploclique.cpp index e4c88c8..e5fa82c 100644 --- a/src/haploclique.cpp +++ b/src/haploclique.cpp @@ -140,57 +140,43 @@ deque* readBamFile(string filename, vector& readNames, name_map_t names_to_reads; deque* reads = new deque; BamTools::BamReader bamreader; + if (not bamreader.Open(filename)) { - cerr << bamreader.GetFilename() << endl; - throw std::runtime_error("Couldn't open Bamfile"); + throw std::runtime_error("Couldn't open Bamfile."); } + BamTools::BamAlignment alignment; + // retrieve 'metadata' from input BAM files, these are required by BamWriter header = bamreader.GetHeader(); references = bamreader.GetReferenceData(); - //int readcounter = 0; - //int singleendcounter = 0; - //int pairedendcounter = 0; - //int overlap = 0; - while (bamreader.GetNextAlignment(alignment)) { - /*std::size_t found = alignment.QueryBases.find('N'); - if (found!=std::string::npos){ - cout << alignment.Name << endl; - cout << alignment.QueryBases << endl; - cout << alignment.Qualities << endl; - cout << endl; - }*/ bool valid = false; for (auto& i : alignment.CigarData){ if (i.Type == 'M' && i.Length > 0) valid = true; } if(alignment.CigarData.size() > 0 && valid){ if(names_to_reads.count(alignment.Name) > 0) { - //cout << alignment.Name << endl; names_to_reads[alignment.Name]->pairWith(alignment); - /*if (names_to_reads[alignment.Name]->isSingleEnd()){ - ++overlap; - ++readcounter; - } else { - pairedendcounter++; - ++readcounter; - }*/ reads->push_back(names_to_reads[alignment.Name]); names_to_reads.erase(alignment.Name); } else { names_to_reads[alignment.Name] = new AlignmentRecord(alignment, readNames.size(), &readNames); readNames.push_back(alignment.Name); - //++readcounter; } } } - // Push all single-end reads remaining in names_to_reads into the reads vector. Unmapped reads are filtered out in advance. + // push all single-end reads remaining in names_to_reads into the reads vector. Unmapped reads are filtered out in advance. for (const auto& i : names_to_reads) { reads->push_back(i.second); - //readcounter++; + } + + // if no reads could be retrieved from the BamFile + if (reads->empty()){ + cerr << bamreader.GetFilename() << endl; + throw std::runtime_error("No reads could be retrieved from the BamFile. "); } bamreader.Close(); @@ -200,40 +186,11 @@ deque* readBamFile(string filename, vector& readNames, sort(reads->begin(), reads->end(), comp); cout << "Read BamFile: done" << endl; - //DEBUGGING - //check which alignments have no cigar string -> no covered positions -> no edge possible - /*while(not reads->empty()){ - assert(reads->front() != nullptr); - unique_ptr al_ptr(reads->front()); - reads->pop_front(); - if (al_ptr->getCigar1().size() == 0){ - cout << al_ptr->getName() << endl; - } - } - //cout << "Number of Reads: " << readcounter << endl; - //cout << "Number of Pairs: " << pairedendcounter << endl; - //cout << "Number of Overlapping Pairs: " << overlap << endl; - //check number of single ends and paired ends after merging - int counter = 0; - while(not reads->empty()){ - assert(reads->front() != nullptr); - unique_ptr al_ptr(reads->front()); - reads->pop_front(); - if (al_ptr->isSingleEnd()){ - singleendcounter++; - } else if (al_ptr->isPairedEnd()){ - pairedendcounter++; - } - counter++; - } - cout << singleendcounter << " " << pairedendcounter << " " << counter << endl; - */ maxPosition = (*std::max_element(std::begin(*reads), std::end(*reads), [](const AlignmentRecord* lhs,const AlignmentRecord* rhs){ return lhs->getIntervalEnd() < rhs->getIntervalEnd(); }))->getIntervalEnd(); - return reads; } @@ -325,7 +282,20 @@ int main(int argc, char* argv[]) { unsigned int maxPosition1; BamTools::SamHeader header; BamTools::RefVector references; - deque* reads = readBamFile(bamfile, originalReadNames,maxPosition1,header,references); + + deque* reads; + try{ + reads = readBamFile(bamfile, originalReadNames,maxPosition1,header,references); + } + catch(const runtime_error& error){ + cerr << error.what() << endl; + return 1; + } + catch(...){ + cerr << "Unexpected error while reading BamFile." << endl; + return 1; + } + EdgeCalculator* edge_calculator = nullptr; EdgeCalculator* indel_edge_calculator = nullptr; unique_ptr > readgroup_params(nullptr); diff --git a/test/data/simulation/unit_data/reads_arabis_mosaic_virus_20nc.bam b/test/data/simulation/unit_data/reads_arabis_mosaic_virus_20nc.bam new file mode 100644 index 0000000..f8354e8 Binary files /dev/null and b/test/data/simulation/unit_data/reads_arabis_mosaic_virus_20nc.bam differ diff --git a/test/unit/unit1.cpp b/test/unit/unit1.cpp index 4d56412..a974df3 100644 --- a/test/unit/unit1.cpp +++ b/test/unit/unit1.cpp @@ -13,25 +13,44 @@ #include "AlignmentRecord.h" #define main HaploMain #include "haploclique.cpp" - +#include using namespace std; using namespace boost; +// This test verifies if readBamFile function returns correct number of reads for a specific input file. TEST(readBamFileTest, readBamFileCountSeqs){ - // THIS IS NOT A UNIT TEST, BUT A REGRESSION CRAM TEST -#if 0 - string bamfile = "/home/ubuntu/haploclique/test/data/simulation/reads_arabis_mosaic_virus_25_01.bam"; + string bamfile = "test/data/simulation/reads_HIV-1_50_01.bam"; vector originalReadNames; unsigned int maxPosition1; BamTools::SamHeader header; BamTools::RefVector references; - int readsSize = 30; + int readsSize = 1836; std::deque* reads = readBamFile(bamfile, originalReadNames,maxPosition1,header,references); EXPECT_EQ(readsSize, reads->size()); -#endif - EXPECT_EQ(1, 1); + +} + +// This test verifies if readBamFile function can retrieve any reads from the input file. +TEST(readBamFileExpectTest, readBamFileExpectReadsRetrival){ + + string bamfile = "test/data/simulation/unit_data/reads_arabis_mosaic_virus_20nc.bam"; + vector originalReadNames; + unsigned int maxPosition1; + BamTools::SamHeader header; + BamTools::RefVector references; + + try{ + std::deque* reads = readBamFile(bamfile, originalReadNames,maxPosition1,header,references); + FAIL() << "Expected std::runtime_error" << endl; + } + catch(const std::runtime_error& error){ + EXPECT_EQ(error.what(), std::string("No reads could be retrieved from the BamFile. ")); + } + catch(...){ + FAIL() << "Expected std::runtime_error" << endl; + } }