Skip to content

Commit

Permalink
Test for edgeBetween (#32)
Browse files Browse the repository at this point in the history
  • Loading branch information
MaryamZaheri authored and armintoepfer committed Mar 6, 2017
1 parent 542ba00 commit 46120ef
Show file tree
Hide file tree
Showing 7 changed files with 2,666 additions and 42 deletions.
74 changes: 74 additions & 0 deletions src/AlignmentRecord.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2219,3 +2219,77 @@ void printBAM(std::ostream& output, std::string filename, std::deque<AlignmentRe
writer.Close();
}

void AlignmentRecord::saveAlignmentRecord(const char * filename){
std::ofstream ofs;
ofs.exceptions(ofstream::failbit | ofstream::badbit);

try{
ofs.open(filename);
ofs << this->name << endl;
ofs << this->start1 << endl;
ofs << this->end1 << endl;
ofs << this->phred_sum1 << endl;
ofs << this->start2 << endl;
ofs << this->end2 << endl;
ofs << this->phred_sum2 << endl;
ofs << this->probability << endl;
ofs << this->single_end << endl;
ofs << this->cov_pos.size() << endl;

std::vector<AlignmentRecord::mapValue>::const_iterator it;

for(it = this->cov_pos.begin(); it != this->cov_pos.end(); it++){
ofs << it->ref << " "
<< it->base << " "
<< it->qual << " "
<< it->prob << " "
<< it->pir << " "
<< it->read
<< '\n';
}
}
catch (const ifstream::failure& e) {
cout << "Exception opening/writing "<< filename;
}

ofs.close();
}

void AlignmentRecord::restoreAlignmentRecord(const char * filename){
std::ifstream ifs;
ifs.exceptions(ifstream::failbit | ifstream::badbit);

try{
ifs.open(filename);
ifs >> this->name;
ifs >> this->start1;
ifs >> this->end1;
ifs >> this->phred_sum1;
ifs >> this->start2;
ifs >> this->end2;
ifs >> this->phred_sum2;
ifs >> this->probability;
ifs >> this->single_end;

int nob = 0;
ifs >> nob;

int ref; //pos in ref
char base;
char qual; //phred score of base (QUALiy+33)
double prob; //error probability for qual
int pir; //position in read
int read; //number of paired end read: 0 for first, 1 for second read

for(int i=0; i<nob; i++){
ifs >> ref >> base >> qual >> prob >> pir >> read;
this->cov_pos.push_back({ref,base,qual,prob,pir,read});
}
}
catch (const ifstream::failure& e) {
cout << "Exception opening/reading "<< filename;
}

ifs.close();
}

3 changes: 3 additions & 0 deletions src/AlignmentRecord.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class AlignmentRecord {
void mergeAlignmentRecordsPaired(const AlignmentRecord& ar);
void mergeAlignmentRecordsMixed(const AlignmentRecord& ar);
public:
AlignmentRecord(){}
AlignmentRecord(const BamTools::BamAlignment& alignment, int id, std::vector<std::string>* readNameMap);
AlignmentRecord(std::unique_ptr<std::vector<const AlignmentRecord*>>& alignments,unsigned int clique_id);
/** merges overlapping paired end reads while reading in bam files*/
Expand Down Expand Up @@ -154,6 +155,8 @@ class AlignmentRecord {
friend void printReads(std::ostream& output, std::deque<AlignmentRecord*>&, int doc_haplotypes);
friend void printGFF(std::ostream& output, std::deque<AlignmentRecord*>& reads);
friend void printBAM(std::ostream& output, std::string filename, std::deque<AlignmentRecord*>& reads, BamTools::SamHeader& header, BamTools::RefVector& references);
void restoreAlignmentRecord(const char * filename);
void saveAlignmentRecord(const char * filename);
};

#endif /* ALIGNMENTRECORD_H_ */
48 changes: 7 additions & 41 deletions src/haploclique.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,9 @@ deque<AlignmentRecord*>* readBamFile(string filename, vector<string>& readNames,
}

bamreader.Close();

auto comp = [](AlignmentRecord* r1, AlignmentRecord* r2) { return r1->getIntervalStart() < r2->getIntervalStart(); };

sort(reads->begin(), reads->end(), comp);
cout << "Read BamFile: done" << endl;

Expand All @@ -196,8 +196,6 @@ deque<AlignmentRecord*>* readBamFile(string filename, vector<string>& readNames,

int main(int argc, char* argv[]) {

//cout << "Test\n" << endl;

map<std::string, docopt::value> args
= docopt::docopt(USAGE,
{ argv + 1, argv + argc },
Expand Down Expand Up @@ -269,13 +267,10 @@ int main(int argc, char* argv[]) {
} else {
simpson_map[atoi(words[0].c_str())] = std::log10(pow(atof(words[1].c_str()),2)+pow(atof(words[2].c_str()),2)+pow(atof(words[3].c_str()),2)+pow(atof(words[4].c_str()),2));
maxPosition2=atoi(words[0].c_str());
//cerr << simpson_map[atoi(words[0].c_str())] << endl;
}
}
ia.close();
}
//cout << "PARSE PRIOR: done" << endl;


clock_t clock_start = clock();
vector<string> originalReadNames;
Expand All @@ -301,7 +296,7 @@ int main(int argc, char* argv[]) {
unique_ptr<vector<mean_and_stddev_t> > readgroup_params(nullptr);
maxPosition1 = (maxPosition1>maxPosition2) ? maxPosition1 : maxPosition2;
edge_calculator = new NewEdgeCalculator(Q, edge_quasi_cutoff_cliques, overlap_cliques, frameshift_merge, simpson_map, edge_quasi_cutoff_single, overlap_single, edge_quasi_cutoff_mixed, maxPosition1, noProb0);
//edge_calculator = new NoMeEdgeCalculator("HELLO");

if (call_indels) {
double insert_mean = -1.0;
double insert_stddev = -1.0;
Expand All @@ -312,10 +307,12 @@ int main(int argc, char* argv[]) {
cerr << "Null distribution: mean " << insert_mean << ", sd " << insert_stddev << endl;
indel_edge_calculator = new GaussianEdgeCalculator(indel_edge_sig_level,insert_mean,insert_stddev);
}

std::ofstream* indel_os = nullptr;
if (call_indels) {
indel_os = new ofstream(indel_output_file.c_str());
}

LogWriter* lw = nullptr;
unsigned int number_of_reads = originalReadNames.size();
std::vector<unsigned int> read_clique_counter (number_of_reads);
Expand All @@ -334,61 +331,35 @@ int main(int argc, char* argv[]) {
}
ofstream* reads_ofstream = 0;

//DEBUG
//std::vector<unsigned int> res = {0,0,2,3};
//int min_index = std::min_element(res.begin(),res.end()) - res.begin();


// Main loop
int ct = 0;
double stdev = 1.0;
auto filter_fn = [&](unique_ptr<AlignmentRecord>& read, int size) {
//if (ct == 8){
// cout << read->getName() << " " <<read->getProbability() << " " << 1.0 /(size) << " " << significance*stdev << endl;
// bool t = read->getProbability() < 1.0 /size - significance*stdev;
// cout << t << endl;
//}
return (ct == 1 and filter_singletons and read->getReadCount() <= 1) or (ct > 1 and significance != 0.0 and read->getProbability() < 1.0 / size - significance*stdev);
};

int edgecounter = 0;
cout << "start: " << number_of_reads;
while (ct != iterations) {
clique_finder->initialize();
//cout << "Clique_finder initialized " << ct << endl;
//if(ct >= 5 && reads->size()<100 ){
//if(ct== 5){
// edge_calculator->setOverlapCliques(0.1);
//}
//if(ct==8){
// int k = 0;
//}
if (lw != nullptr) lw->initialize();
int size = reads->size();
while(not reads->empty()) {
assert(reads->front() != nullptr);

unique_ptr<AlignmentRecord> al_ptr(reads->front());
//if(al_ptr->getName() == "Clique_1037"){
// int k = 0;
//}
reads->pop_front();
if (filter_fn(al_ptr,size)) continue;

clique_finder->addAlignment(al_ptr,edgecounter);
//cout << "addAlignment " << ct << endl;
}

cout << "\tedges: " << edgecounter << endl;

delete reads;
//cout << "reads deleted " << ct << endl;
clique_finder->finish();
//cout << "clique_finder finished " << ct << endl;
reads = collector.finish();
//cout << "collector finish " << ct << endl;
if (lw != nullptr) lw->finish();

stdev = setProbabilities(*reads);
//if(ct == 5) break;
if (clique_finder->hasConverged()) break;
cout << ct++ << ": " << reads->size();
edgecounter = 0;
Expand Down Expand Up @@ -419,11 +390,6 @@ int main(int argc, char* argv[]) {
cout << "final: " << reads->size() << endl;

for (auto&& r : *reads) {
// if(r->getName() == "Clique_3370"){
// for (auto& i : r->getReadNames()){
// cout << i << endl;
// }
// }
delete r;
}

Expand Down
2 changes: 1 addition & 1 deletion test/cram/HIV-1_50_01.t
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## This script performs an end to end test on haploclique project. The test input data reads_HIV-1_50_01.bam is a simulated dataset generated from HIV-1 reference sequence using Simseq simulator. The expected output is presented in quasispecies_HIV-1_50_circleci.fasta.fasta and is compared to actual output quasispecies.fasta.fasta.
## This script performs an end to end test on haploclique project. The test input data reads_HIV-1_50_01.bam is a simulated dataset generated from HIV-1 reference sequence using Simseq simulator. The expected output is presented in quasispecies_HIV-1_50.fasta.fasta and is compared to actual output quasispecies.fasta.fasta.

$ $__HC_EXE $TESTDIR/../data/simulation/reads_HIV-1_50_01.bam > /dev/null;
$ diff quasispecies.fasta.fasta ${TESTDIR}/../data/simulation/quasispecies_HIV-1_50_circleci.fasta.fasta
Loading

0 comments on commit 46120ef

Please sign in to comment.