ToPS
|
00001 #include "MultipleAlignment.hpp" 00002 00003 using namespace std; 00004 00005 namespace tops{ 00006 00007 void MultipleAlignment::clearAll(){ 00008 _graph.clear(); 00009 _seqPosMap.clear(); 00010 _postProbs.clear(); 00011 _alignment.clear(); 00012 } 00013 00014 void MultipleAlignment::computeAllAlignments(ProbabilisticModelPtr almodel, SequenceList seqs, vector<string> names, int numit, string alFileName, string outDir){\ 00015 boost::timer t; 00016 int avg = 0; 00017 for(int i = 0; i < (int)seqs.size(); i++){ 00018 avg += seqs[i].size(); 00019 } 00020 avg /= seqs.size(); 00021 cerr << alFileName << endl << "number of sequences: " << seqs.size() << endl << "average size: " << avg << endl; 00022 _seqs = seqs; 00023 _names = names; 00024 00025 cerr << "\tComputing posterior probabilities..."; 00026 t.restart(); 00027 postProbAlign(almodel,_ppAlign,_ppGap1,_ppGap2,_eas); 00028 cerr << "Done! (" << t.elapsed() << "s)" << endl << endl; 00029 00030 map<string,map<string,SparseMatrixPtr > > consPPAlign; 00031 map<string,map<string,SparseMatrixPtr > > consPPGap1; 00032 map<string,map<string,SparseMatrixPtr > > consPPGap2; 00033 00034 cerr << "\tNo consistency." << endl; 00035 cerr << "\t\tGenerating alignment with modified sequence annealing..."; 00036 t.restart(); 00037 for(int i = 0; i < (int)_names.size(); i++){ 00038 for(int j = i+1; j < (int)_names.size(); j++){ 00039 _ppAlign[_names[i]][_names[j]]->addGaps(_ppGap1[_names[i]][_names[j]], _ppGap2[_names[i]][_names[j]]); 00040 } 00041 } 00042 initializePostProbsList(_ppAlign); 00043 generateGraph(); 00044 generateAlignment(almodel); 00045 string outFile = ""; 00046 outFile.append(outDir); 00047 outFile.append("C0"); 00048 outFile.append(alFileName); 00049 ofstream fout; 00050 fout.open(outFile.c_str()); 00051 for(int i = 0; i < (int)_seqs.size(); i++){ 00052 fout << _alignment[i]; 00053 } 00054 fout << endl; 00055 fout.close(); 00056 cerr << "Done! (" << t.elapsed() << "s)" << endl; 00057 00058 clearAll(); 00059 00060 cerr << "\t\tGenerating alignment with original sequence annealing..."; 00061 t.restart(); 00062 for(int i = 0; i < (int)_names.size(); i++){ 00063 for(int j = i+1; j < (int)_names.size(); j++){ 00064 _ppAlign[_names[i]][_names[j]]->removeLastLine(); 00065 _ppAlign[_names[i]][_names[j]]->removeLastColumn(); 00066 } 00067 } 00068 initializePostProbsList(_ppAlign); 00069 generateGraph(); 00070 generateAlignment(almodel); 00071 outFile = ""; 00072 outFile.append(outDir); 00073 outFile.append("C1"); 00074 outFile.append(alFileName); 00075 fout.open(outFile.c_str()); 00076 for(int i = 0; i < (int)_seqs.size(); i++){ 00077 fout << _alignment[i]; 00078 } 00079 fout << endl; 00080 fout.close(); 00081 cerr << "Done! (" << t.elapsed() << "s)" << endl << endl; 00082 00083 clearAll(); 00084 00085 for(int i = 0; i < (int)_names.size(); i++){ 00086 for(int j = i+1; j < (int)_names.size(); j++){ 00087 consPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppAlign[_names[i]][_names[j]])); 00088 consPPGap1[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppGap1[_names[i]][_names[j]])); 00089 consPPGap2[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppGap2[_names[i]][_names[j]])); 00090 } 00091 } 00092 00093 cerr << "\tApplying consistency transformation with weights and gaps..."; 00094 t.restart(); 00095 predalignAlConsistencyWithEas(consPPAlign, consPPGap1, consPPGap2, numit, _eas); 00096 cerr << "Done! (" << t.elapsed() << "s)" << endl; 00097 cerr << "\t\tGenerating alignment with modified sequence annealing..."; 00098 t.restart(); 00099 initializePostProbsList(consPPAlign); 00100 generateGraph(); 00101 generateAlignment(almodel); 00102 outFile = ""; 00103 outFile.append(outDir); 00104 outFile.append("C2"); 00105 outFile.append(alFileName); 00106 fout.open(outFile.c_str()); 00107 for(int i = 0; i < (int)_seqs.size(); i++){ 00108 fout << _alignment[i]; 00109 } 00110 fout << endl; 00111 fout.close(); 00112 cerr << "Done! (" << t.elapsed() << "s)" << endl; 00113 00114 clearAll(); 00115 00116 cerr << "\t\tGenerating alignment original sequence annealing..."; 00117 t.restart(); 00118 for(int i = 0; i < (int)_names.size(); i++){ 00119 for(int j = i+1; j < (int)_names.size(); j++){ 00120 consPPAlign[_names[i]][_names[j]]->removeLastLine(); 00121 consPPAlign[_names[i]][_names[j]]->removeLastColumn(); 00122 } 00123 } 00124 initializePostProbsList(consPPAlign); 00125 generateGraph(); 00126 generateAlignment(almodel); 00127 outFile = ""; 00128 outFile.append(outDir); 00129 outFile.append("C3"); 00130 outFile.append(alFileName); 00131 fout.open(outFile.c_str()); 00132 for(int i = 0; i < (int)_seqs.size(); i++){ 00133 fout << _alignment[i]; 00134 } 00135 fout << endl; 00136 fout.close(); 00137 cerr << "Done! (" << t.elapsed() << "s)" << endl << endl; 00138 00139 clearAll(); 00140 00141 for(int i = 0; i < (int)_names.size(); i++){ 00142 for(int j = i+1; j < (int)_names.size(); j++){ 00143 consPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppAlign[_names[i]][_names[j]])); 00144 consPPGap1[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppGap1[_names[i]][_names[j]])); 00145 consPPGap2[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppGap2[_names[i]][_names[j]])); 00146 } 00147 } 00148 00149 cerr << "\tApplying consistency transformation without weights and with gaps..."; 00150 t.restart(); 00151 predalignAlConsistencyNoEas(consPPAlign, _ppGap1, _ppGap2, numit); 00152 cerr << "Done! (" << t.elapsed() << "s)" << endl; 00153 cerr << "\t\tGenerating alignment with modified sequence annealing..."; 00154 t.restart(); 00155 initializePostProbsList(consPPAlign); 00156 generateGraph(); 00157 generateAlignment(almodel); 00158 outFile = ""; 00159 outFile.append(outDir); 00160 outFile.append("C4"); 00161 outFile.append(alFileName); 00162 fout.open(outFile.c_str()); 00163 for(int i = 0; i < (int)_seqs.size(); i++){ 00164 fout << _alignment[i]; 00165 } 00166 fout << endl; 00167 fout.close(); 00168 cerr << "Done! (" << t.elapsed() << "s)" << endl; 00169 00170 clearAll(); 00171 00172 cerr << "\t\tGenerating alignment original sequence annealing..."; 00173 t.restart(); 00174 for(int i = 0; i < (int)_names.size(); i++){ 00175 for(int j = i+1; j < (int)_names.size(); j++){ 00176 consPPAlign[_names[i]][_names[j]]->removeLastLine(); 00177 consPPAlign[_names[i]][_names[j]]->removeLastColumn(); 00178 } 00179 } 00180 initializePostProbsList(consPPAlign); 00181 generateGraph(); 00182 generateAlignment(almodel); 00183 outFile = ""; 00184 outFile.append(outDir); 00185 outFile.append("C5"); 00186 outFile.append(alFileName); 00187 fout.open(outFile.c_str()); 00188 for(int i = 0; i < (int)_seqs.size(); i++){ 00189 fout << _alignment[i]; 00190 } 00191 fout << endl; 00192 fout.close(); 00193 cerr << "Done! (" << t.elapsed() << "s)" << endl << endl; 00194 00195 clearAll(); 00196 00197 for(int i = 0; i < (int)_names.size(); i++){ 00198 for(int j = i+1; j < (int)_names.size(); j++){ 00199 consPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppAlign[_names[i]][_names[j]])); 00200 consPPGap1[_names[i]][_names[j]].reset(); 00201 consPPGap2[_names[i]][_names[j]].reset(); 00202 } 00203 } 00204 00205 cerr << "\tApplying picxaa consistency transformation..."; 00206 t.restart(); 00207 picxaaAlConsistency(consPPAlign, _eas, numit); 00208 cerr << "Done! (" << t.elapsed() << "s)" << endl; 00209 cerr << "\t\tGenerating alignment with original sequence annealing..."; 00210 t.restart(); 00211 initializePostProbsList(consPPAlign); 00212 generateGraph(); 00213 generateAlignment(almodel); 00214 outFile = ""; 00215 outFile.append(outDir); 00216 outFile.append("C6"); 00217 outFile.append(alFileName); 00218 fout.open(outFile.c_str()); 00219 for(int i = 0; i < (int)_seqs.size(); i++){ 00220 fout << _alignment[i]; 00221 } 00222 fout << endl; 00223 fout.close(); 00224 cerr << "Done! (" << t.elapsed() << "s)" << endl; 00225 00226 clearAll(); 00227 00228 cerr << "\t\tGenerating alignment with modified sequence annealing..."; 00229 t.restart(); 00230 for(int i = 0; i < (int)_names.size(); i++){ 00231 for(int j = i+1; j < (int)_names.size(); j++){ 00232 consPPAlign[_names[i]][_names[j]]->addGaps(_ppGap1[_names[i]][_names[j]], _ppGap2[_names[i]][_names[j]]); 00233 } 00234 } 00235 initializePostProbsList(consPPAlign); 00236 generateGraph(); 00237 generateAlignment(almodel); 00238 outFile = ""; 00239 outFile.append(outDir); 00240 outFile.append("C7"); 00241 outFile.append(alFileName); 00242 fout.open(outFile.c_str()); 00243 for(int i = 0; i < (int)_seqs.size(); i++){ 00244 fout << _alignment[i]; 00245 } 00246 fout << endl; 00247 fout.close(); 00248 cerr << "Done! (" << t.elapsed() << "s)" << endl << endl; 00249 00250 clearAll(); 00251 00252 for(int i = 0; i < (int)_names.size(); i++){ 00253 for(int j = i+1; j < (int)_names.size(); j++){ 00254 consPPAlign[_names[i]][_names[j]].reset(); 00255 } 00256 } 00257 00258 cerr << "\tApplying classic consistency transformation..."; 00259 t.restart(); 00260 classicAlConsistency(_ppAlign, numit); 00261 cerr << "Done! (" << t.elapsed() << "s)" << endl; 00262 cerr << "\t\tGenerating alignment with original sequence annealing..."; 00263 t.restart(); 00264 initializePostProbsList(_ppAlign); 00265 generateGraph(); 00266 generateAlignment(almodel); 00267 outFile = ""; 00268 outFile.append(outDir); 00269 outFile.append("C8"); 00270 outFile.append(alFileName); 00271 fout.open(outFile.c_str()); 00272 for(int i = 0; i < (int)_seqs.size(); i++){ 00273 fout << _alignment[i]; 00274 } 00275 fout << endl; 00276 fout.close(); 00277 cerr << "Done! (" << t.elapsed() << "s)" << endl; 00278 00279 clearAll(); 00280 00281 cerr << "\t\tGenerating alignment with modified sequence annealing..."; 00282 t.restart(); 00283 for(int i = 0; i < (int)_names.size(); i++){ 00284 for(int j = i+1; j < (int)_names.size(); j++){ 00285 _ppAlign[_names[i]][_names[j]]->addGaps(_ppGap1[_names[i]][_names[j]], _ppGap2[_names[i]][_names[j]]); 00286 } 00287 } 00288 initializePostProbsList(_ppAlign); 00289 generateGraph(); 00290 generateAlignment(almodel); 00291 outFile = ""; 00292 outFile.append(outDir); 00293 outFile.append("C9"); 00294 outFile.append(alFileName); 00295 fout.open(outFile.c_str()); 00296 for(int i = 0; i < (int)_seqs.size(); i++){ 00297 fout << _alignment[i]; 00298 } 00299 fout << endl; 00300 fout.close(); 00301 cerr << "Done! (" << t.elapsed() << "s)" << endl << endl; 00302 } 00303 00304 void MultipleAlignment::computeOneAlignment(ProbabilisticModelPtr almodel, SequenceList seqs, vector<string> names, int numit, int consScheme){ 00305 _seqs = seqs; 00306 _names = names; 00307 00308 postProbAlign(almodel,_ppAlign,_ppGap1,_ppGap2,_eas); 00309 00310 if(consScheme == 1) 00311 predalignAlConsistencyWithEas(_ppAlign,_ppGap1,_ppGap2,numit,_eas); 00312 00313 if(consScheme == 2){ 00314 predalignAlConsistencyWithEas(_ppAlign,_ppGap1,_ppGap2,numit,_eas); 00315 for(int i = 0; i < (int)_names.size(); i++){ 00316 for(int j = i+1; j < (int)_names.size(); j++){ 00317 _ppAlign[_names[i]][_names[j]]->removeLastLine(); 00318 _ppAlign[_names[i]][_names[j]]->removeLastColumn(); 00319 } 00320 } 00321 } 00322 00323 if(consScheme == 3) 00324 picxaaAlConsistency(_ppAlign,_eas,numit); 00325 00326 if(consScheme == 4) 00327 classicAlConsistency(_ppAlign,numit); 00328 00329 /*if(_alpha != -1) 00330 predalConsistencies(predmodels,ppAlign);*/ 00331 00332 initializePostProbsList(_ppAlign); 00333 cerr << "Number of stored posterior probabilities: " << _postProbs.size() << endl; 00334 generateGraph(); 00335 generateAlignment(almodel); 00336 for(int i = 0; i < (int)_seqs.size(); i++){ 00337 cout << _alignment[i]; 00338 } 00339 cout << endl; 00340 } 00341 00342 void MultipleAlignment::computePredictionsAndAlignment(ProbabilisticModelPtr almodel, vector<ProbabilisticModelPtr> &predmodels, SequenceList seqs, vector<string> names, int numit, bool alternateConsistencies, string outDir){ 00343 _seqs = seqs; 00344 _names = names; 00345 00346 postProbAlign(almodel,_ppAlign,_ppGap1,_ppGap2,_eas); 00347 cerr << "Passou do postProbAlign" << endl; 00348 postProbPred(predmodels, _ppPred); 00349 cerr << "Passou do postProbPred" << endl; 00350 00351 if(alternateConsistencies){ 00352 for(int r = 0; r < numit; r++){ 00353 makeAlignmentConsistentWithPrediction(_ppPred,_ppAlign,1); 00354 makePredictionConsistentWithAlignment(_ppPred,_ppAlign,1); 00355 } 00356 } 00357 else{ 00358 map<string, SparseMatrixPtr> consistentPPpred; 00359 for(int i = 0; i < (int)_names.size(); i++){ 00360 consistentPPpred[_names[i]] = SparseMatrixPtr(new SparseMatrix(_ppPred[_names[i]])); 00361 } 00362 makePredictionConsistentWithAlignment(consistentPPpred,_ppAlign,numit); 00363 makeAlignmentConsistentWithPrediction(_ppPred,_ppAlign,numit); 00364 for(int i = 0; i < (int)_names.size(); i++){ 00365 _ppPred[_names[i]].reset(); 00366 _ppPred[_names[i]] = consistentPPpred[_names[i]]; 00367 } 00368 } 00369 00370 for(int i = 0; i < (int)_names.size(); i++){ 00371 SequenceFormatManager::instance()->setFormat(SequenceFormatPtr(new SequenceFormat())); 00372 SequenceEntry output(predmodels[i]->decodable()->getStateNames()); 00373 Sequence states; 00374 clock_t begin = clock(); 00375 float ea = predmodels[i]->decodable()->MEAPred(_seqs[i], states, _ppPred[_names[i]]); 00376 clock_t end = clock(); 00377 std::cerr << "TIME: " << (double)(end - begin)/CLOCKS_PER_SEC << std::endl; 00378 stringstream new_name; 00379 new_name << _names[i] << ": " << ea; 00380 output.setName(new_name.str()); 00381 output.setSequence(states); 00382 string outFile = ""; 00383 outFile.append(outDir); 00384 outFile.append(_names[i]); 00385 outFile.append(".pred"); 00386 ofstream fout; 00387 fout.open(outFile.c_str()); 00388 fout << output; 00389 fout.close(); 00390 } 00391 00392 initializePostProbsList(_ppAlign); 00393 cerr << "Number of stored posterior probabilities: " << _postProbs.size() << endl; 00394 generateGraph(); 00395 generateAlignment(almodel); 00396 for(int i = 0; i < (int)_seqs.size(); i++){ 00397 cout << _alignment[i]; 00398 } 00399 cout << endl; 00400 } 00401 00402 /*void MultipleAlignment::initializeFromFile(ProbabilisticModelPtr almodel, SequenceList seqs, vector<string> names, int numit, int consScheme, string inputFileName){ 00403 _seqs = seqs; 00404 _names = names; 00405 00406 ifstream fin; 00407 fin.open(inputFileName.c_str()); 00408 00409 for(int i = 0; i < (int)_names.size(); i++){ 00410 for(int j = i+1; j < (int)_names.size(); j++){ 00411 string temp, name1, name2; 00412 int line, col, nEntries; 00413 float prob; 00414 _ppAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_seqs[i].size(),_seqs[j].size())); 00415 _ppGap1[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_seqs[i].size(),_seqs[j].size())); 00416 _ppGap2[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_seqs[i].size(),_seqs[j].size())); 00417 fin >> name1 >> name2 >> temp; 00418 fin >> prob; 00419 if(temp.compare("ea") != 0 || name1.compare(_names[i]) != 0 || name2.compare(_names[j]) != 0){ 00420 cerr << "temp = >" << temp << "< name1 = >" << name1 << "< name2 = >" << name2 << "<" << endl; 00421 cerr << "_namesi = >" << _names[i] << "< _namesj = >" << _names[j] << "<" << endl; 00422 cerr << inputFileName << " is not in the correct format, or does not correspond the the sequence file provided. ea" << endl; 00423 exit(-1); 00424 } 00425 _eas[name1][name2] = prob; 00426 fin >> name1 >> name2 >> temp; 00427 if(temp.compare("ppAlign") != 0 || name1.compare(_names[i]) != 0 || name2.compare(_names[j]) != 0){ 00428 cerr << inputFileName << " is not in the correct format, or does not correspond the the sequence file provided. ppAlign" << endl; 00429 exit(-1); 00430 } 00431 fin >> nEntries; 00432 for(int k = 0; k < nEntries; k++){ 00433 fin >> line >> col >> prob; 00434 _ppAlign[name1][name2]->add(line,col,prob); 00435 } 00436 fin >> name1 >> name2 >> temp; 00437 if(temp.compare("ppGap1") != 0 || name1.compare(_names[i]) != 0 || name2.compare(_names[j]) != 0){ 00438 cerr << inputFileName << " is not in the correct format, or does not correspond the the sequence file provided. ppGap1" << endl; 00439 exit(-1); 00440 } 00441 fin >> nEntries; 00442 for(int k = 0; k < nEntries; k++){ 00443 fin >> line >> col >> prob; 00444 _ppGap1[name1][name2]->add(line,col,prob); 00445 } 00446 fin >> name1 >> name2 >> temp; 00447 if(temp.compare("ppGap2") != 0 || name1.compare(_names[i]) != 0 || name2.compare(_names[j]) != 0){ 00448 cerr << inputFileName << " is not in the correct format, or does not correspond the the sequence file provided.ppGap2" << endl; 00449 exit(-1); 00450 } 00451 fin >> nEntries; 00452 for(int k = 0; k < nEntries; k++){ 00453 fin >> line >> col >> prob; 00454 _ppGap2[name1][name2]->add(line,col,prob); 00455 } 00456 } 00457 } 00458 if(consScheme == 1) 00459 predalignAlConsistencyWithEas(_ppAlign, _ppGap1, _ppGap2,numit,_eas); 00460 00461 if(consScheme == 2){ 00462 predalignAlConsistencyWithEas(_ppAlign,_ppGap1,_ppGap2,numit,_eas); 00463 for(int i = 0; i < (int)_names.size(); i++){ 00464 for(int j = i+1; j < (int)_names.size(); j++){ 00465 _ppAlign[_names[i]][_names[j]]->removeLastLine(); 00466 _ppAlign[_names[i]][_names[j]]->removeLastColumn(); 00467 } 00468 } 00469 } 00470 00471 if(consScheme == 3) 00472 picxaaAlConsistency(_ppAlign,_eas,numit); 00473 00474 if(consScheme == 4) 00475 classicAlConsistency(_ppAlign,numit); 00476 00477 initializePostProbsList(_ppAlign); 00478 generateGraph(); 00479 generateAlignment(almodel); 00480 for(int i = 0; i < (int)_seqs.size(); i++){ 00481 cout << _alignment[i]; 00482 } 00483 cout << endl; 00484 }*/ 00485 00486 /* void MultipleAlignment::trainAndComputePPs(string initialModelFile, SequenceList seqs, vector<string> names, int maxTrainIter, string outFile){ 00487 int numseqs = seqs.size(); 00488 ProbabilisticModelCreatorClient creator; 00489 ProbabilisticModelPtr model; 00490 ofstream fout; 00491 fout.open(outFile.c_str()); 00492 if(!fout.is_open()){ 00493 cerr << "Could not open file: " << outFile << endl; 00494 exit(-1); 00495 } 00496 00497 model = creator.create(initialModelFile); 00498 model->pairDecodable()->trainBaumWelch(seqs, maxTrainIter, 1e-5); 00499 for(int i = 0; i < numseqs; i++){ 00500 for(int j = i+1; j < numseqs; j++){ 00501 SparseMatrixPtr ppAlign, ppGap1, ppGap2; 00502 ostringstream temp; 00503 float ea = model->pairDecodable()->posteriorProbabilities(seqs[i],seqs[j],ppAlign, ppGap1, ppGap2); 00504 fout << names[i] << " " << names[j] << " ea" << endl << ea << endl; 00505 fout << names[i] << " " << names[j] << " ppAlign" << endl; 00506 ppAlign->printMatrix(temp); 00507 fout << temp.str(); 00508 temp.str(""); 00509 fout << names[i] << " " << names[j] << " ppGap1" << endl; 00510 ppGap1->printMatrix(temp); 00511 fout << temp.str(); 00512 temp.str(""); 00513 fout << names[i] << " " << names[j] << " ppGap2" << endl; 00514 ppGap2->printMatrix(temp); 00515 fout << temp.str(); 00516 temp.str(""); 00517 } 00518 } 00519 fout.close(); 00520 }*/ 00521 00523 //Posterior probabilities calculation functions/// 00525 00526 float MultipleAlignment::postProbAlign(ProbabilisticModelPtr model, map<string, map< string, SparseMatrixPtr > > &ppAlign, map<string, map< string, SparseMatrixPtr > > &ppGap1, map<string, map< string, SparseMatrixPtr > > &ppGap2, map<string, map<string, float> > &eas) 00527 { 00528 float sum = 0; 00529 for(int i = 0; i < (int)_seqs.size(); i++){ 00530 for(int j = i+1; j < (int)_seqs.size(); j++){ 00531 float ea = model->pairDecodable()->posteriorProbabilities(_seqs[i],_seqs[j],ppAlign[_names[i]][_names[j]], ppGap1[_names[i]][_names[j]], ppGap2[_names[i]][_names[j]]); 00532 eas[_names[i]][_names[j]] = ea; 00533 sum += ea; 00534 } 00535 } 00536 00537 return sum; 00538 } 00539 00540 void MultipleAlignment::postProbPred(vector<ProbabilisticModelPtr> &predmodels, map<string, SparseMatrixPtr > &ppPred) 00541 { 00542 for(int i = 0; i < (int)_seqs.size(); i++){ 00543 fMatrix postPred; 00544 predmodels[i]->decodable()->posteriorProbabilities(_seqs[i],postPred); 00545 ppPred[_names[i]] = SparseMatrixPtr(new SparseMatrix()); 00546 ppPred[_names[i]]->buildPredMatrix(postPred.size1(),postPred.size2(),postPred); 00547 } 00548 } 00549 00550 00552 //Consistency transformation functions////// 00554 00555 void MultipleAlignment::picxaaAlConsistency(map< string, map< string, SparseMatrixPtr > > &ppAlign, map<string, map<string, float> > eas, int numit){ 00556 for(int r = 0; r < numit; r++){ 00557 map< string, map< string, SparseMatrixPtr > > consistentPPAlign; 00558 for(int i = 0; i < (int)_names.size(); i++){ 00559 for(int j = i+1; j < (int)_names.size(); j++){ 00560 float sumea = 2.0*eas[_names[i]][_names[j]]*eas[_names[i]][_names[j]]; 00561 fMatrix postMatch; 00562 ppAlign[_names[i]][_names[j]]->getfMatrixTimesX(postMatch, 2.0*eas[_names[i]][_names[j]]*eas[_names[i]][_names[j]]); 00563 for(int m = 0; m < (int)_names.size(); m++){ 00564 if(m == i || m ==j) 00565 continue; 00566 else if(m < i){ 00567 ppAlign[_names[m]][_names[i]]->leftTransXright(ppAlign[_names[m]][_names[j]], postMatch, (eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]])); 00568 sumea += (eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]]); 00569 } 00570 else if(m > i && m < j){ 00571 ppAlign[_names[i]][_names[m]]->leftXright(ppAlign[_names[m]][_names[j]], postMatch, (eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]])); 00572 sumea += (eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]]); 00573 } 00574 else if(m > j){ 00575 SparseMatrixPtr AUX = SparseMatrixPtr(new SparseMatrix()); 00576 AUX->transposeOf(ppAlign[_names[j]][_names[m]]); 00577 ppAlign[_names[i]][_names[m]]->leftXright(AUX, postMatch, (eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]])); 00578 AUX.reset(); 00579 sumea += (eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]]); 00580 } 00581 } 00582 consistentPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postMatch,ppAlign[_names[i]][_names[j]], sumea)); 00583 } 00584 } 00585 for(int i = 0; i < (int)_names.size(); i++){ 00586 for(int j = i+1; j < (int)_names.size(); j++){ 00587 ppAlign[_names[i]][_names[j]].reset(); 00588 ppAlign[_names[i]][_names[j]] = consistentPPAlign[_names[i]][_names[j]]; 00589 } 00590 } 00591 } 00592 } 00593 00594 void MultipleAlignment::classicAlConsistency(map< string, map< string, SparseMatrixPtr > > &ppAlign, int numit){ 00595 for(int r = 0; r < numit; r++){ 00596 map< string, map< string, SparseMatrixPtr > > consistentPPAlign; 00597 for(int i = 0; i < (int)_names.size(); i++){ 00598 for(int j = i+1; j < (int)_names.size(); j++){ 00599 fMatrix postMatch; 00600 ppAlign[_names[i]][_names[j]]->getfMatrixTimesX(postMatch, 2.0); 00601 for(int m = 0; m < (int)_names.size(); m++){ 00602 if(m == i || m ==j) 00603 continue; 00604 else if(m < i){ 00605 ppAlign[_names[m]][_names[i]]->leftTransXright(ppAlign[_names[m]][_names[j]], postMatch); 00606 } 00607 else if(m > i && m < j){ 00608 ppAlign[_names[i]][_names[m]]->leftXright(ppAlign[_names[m]][_names[j]], postMatch); 00609 } 00610 else if(m > j){ 00611 SparseMatrixPtr AUX = SparseMatrixPtr(new SparseMatrix()); 00612 AUX->transposeOf(ppAlign[_names[j]][_names[m]]); 00613 ppAlign[_names[i]][_names[m]]->leftXright(AUX, postMatch); 00614 AUX.reset(); 00615 } 00616 } 00617 consistentPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postMatch,ppAlign[_names[i]][_names[j]], (float)_names.size())); 00618 } 00619 } 00620 for(int i = 0; i < (int)_names.size(); i++){ 00621 for(int j = i+1; j < (int)_names.size(); j++){ 00622 ppAlign[_names[i]][_names[j]].reset(); 00623 ppAlign[_names[i]][_names[j]] = consistentPPAlign[_names[i]][_names[j]]; 00624 } 00625 } 00626 } 00627 } 00628 00629 void MultipleAlignment::predalignAlConsistencyNoEas(map< string, map< string, SparseMatrixPtr > > &ppAlign, map< string, map< string, SparseMatrixPtr > > &ppGap1,map< string, map< string, SparseMatrixPtr > > &ppGap2, int numit){ 00630 00631 for(int r = 0; r < numit; r++){ 00632 map< string, map< string, SparseMatrixPtr > > consistentPPAlign; 00633 map< string, map< string, SparseMatrixPtr > > consistentPPGap1; 00634 map< string, map< string, SparseMatrixPtr > > consistentPPGap2; 00635 for(int i = 0; i < (int)_names.size(); i++){ 00636 for(int j = i+1; j < (int)_names.size(); j++){ 00637 fMatrix postMatch; 00638 fMatrix postGap1; 00639 fMatrix postGap2; 00640 ppAlign[_names[i]][_names[j]]->getfMatrixTimesX(postMatch, 2.0); 00641 ppGap1[_names[i]][_names[j]]->getfMatrixTimesX(postGap1, 1.0); 00642 ppGap2[_names[i]][_names[j]]->getfMatrixTimesX(postGap2, 1.0); 00643 for(int m = 0; m < (int)_names.size(); m++){ 00644 if(m == i || m == j) 00645 continue; 00646 else if(m < i){ 00647 ppAlign[_names[m]][_names[i]]->leftTransXright(ppAlign[_names[m]][_names[j]], postMatch); 00648 ppGap1[_names[m]][_names[i]]->leftTransXright(ppGap1[_names[m]][_names[j]], postMatch); 00649 ppAlign[_names[m]][_names[i]]->leftTransXright(ppGap2[_names[m]][_names[j]], postGap2); 00650 ppGap2[_names[m]][_names[i]]->leftTransXright(ppAlign[_names[m]][_names[j]], postGap1); 00651 } 00652 else if(m > i && m < j){ 00653 ppAlign[_names[i]][_names[m]]->leftXright(ppAlign[_names[m]][_names[j]], postMatch); 00654 ppGap2[_names[i]][_names[m]]->leftXright(ppGap1[_names[m]][_names[j]], postMatch); 00655 ppAlign[_names[i]][_names[m]]->leftXright(ppGap2[_names[m]][_names[j]], postGap2); 00656 ppGap1[_names[i]][_names[m]]->leftXright(ppAlign[_names[m]][_names[j]], postGap1); 00657 } 00658 else if(m > j){ 00659 SparseMatrixPtr AUX1 = SparseMatrixPtr(new SparseMatrix()); 00660 SparseMatrixPtr AUX2 = SparseMatrixPtr(new SparseMatrix()); 00661 SparseMatrixPtr AUX3 = SparseMatrixPtr(new SparseMatrix()); 00662 AUX1->transposeOf(ppAlign[_names[j]][_names[m]]); 00663 ppAlign[_names[i]][_names[m]]->leftXright(AUX1, postMatch); 00664 ppGap1[_names[i]][_names[m]]->leftXright(AUX1, postGap1); 00665 AUX1.reset(); 00666 AUX2->transposeOf(ppGap2[_names[j]][_names[m]]); 00667 ppGap2[_names[i]][_names[m]]->leftXright(AUX2, postMatch); 00668 AUX2.reset(); 00669 AUX3->transposeOf(ppGap1[_names[j]][_names[m]]); 00670 ppAlign[_names[i]][_names[m]]->leftXright(AUX3, postGap2); 00671 AUX3.reset(); 00672 } 00673 } 00674 consistentPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postMatch,ppAlign[_names[i]][_names[j]], (float)_names.size())); 00675 consistentPPGap1[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postGap1,ppGap1[_names[i]][_names[j]], (float)(_names.size()-1))); 00676 consistentPPGap2[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postGap2,ppGap2[_names[i]][_names[j]], (float)(_names.size()-1))); 00677 } 00678 } 00679 for(int i = 0; i < (int)_names.size(); i++){ 00680 for(int j = i+1; j < (int)_names.size(); j++){ 00681 ppAlign[_names[i]][_names[j]].reset(); 00682 ppAlign[_names[i]][_names[j]] = consistentPPAlign[_names[i]][_names[j]]; 00683 ppGap1[_names[i]][_names[j]].reset(); 00684 ppGap1[_names[i]][_names[j]] = consistentPPGap1[_names[i]][_names[j]]; 00685 ppGap2[_names[i]][_names[j]].reset(); 00686 ppGap2[_names[i]][_names[j]] = consistentPPGap2[_names[i]][_names[j]]; 00687 } 00688 } 00689 } 00690 for(int i = 0; i < (int)_names.size(); i++){ 00691 for(int j = i+1; j < (int)_names.size(); j++){ 00692 ppAlign[_names[i]][_names[j]]->addGaps(ppGap1[_names[i]][_names[j]], ppGap2[_names[i]][_names[j]]); 00693 } 00694 } 00695 } 00696 00697 void MultipleAlignment::predalignAlConsistencyWithEas(map< string, map< string, SparseMatrixPtr > > &ppAlign, map< string, map< string, SparseMatrixPtr > > &ppGap1,map< string, map< string, SparseMatrixPtr > > &ppGap2, int numit, map<string, map<string, float> > eas){ 00698 00699 for(int r = 0; r < numit; r++){ 00700 map< string, map< string, SparseMatrixPtr > > consistentPPAlign; 00701 map< string, map< string, SparseMatrixPtr > > consistentPPGap1; 00702 map< string, map< string, SparseMatrixPtr > > consistentPPGap2; 00703 for(int i = 0; i < (int)_names.size(); i++){ 00704 for(int j = i+1; j < (int)_names.size(); j++){ 00705 fMatrix postMatch; 00706 fMatrix postGap1; 00707 fMatrix postGap2; 00708 float sumea = 2.0*eas[_names[i]][_names[j]]*eas[_names[i]][_names[j]]; 00709 ppAlign[_names[i]][_names[j]]->getfMatrixTimesX(postMatch, 2.0*eas[_names[i]][_names[j]]*eas[_names[i]][_names[j]]); 00710 ppGap1[_names[i]][_names[j]]->getfMatrixTimesX(postGap1, 1.0); 00711 ppGap2[_names[i]][_names[j]]->getfMatrixTimesX(postGap2, 1.0); 00712 for(int m = 0; m < (int)_names.size(); m++){ 00713 if(m == i || m == j) 00714 continue; 00715 else if(m < i){ 00716 ppAlign[_names[m]][_names[i]]->leftTransXright(ppAlign[_names[m]][_names[j]], postMatch, eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]]); 00717 ppGap1[_names[m]][_names[i]]->leftTransXright(ppGap1[_names[m]][_names[j]], postMatch, eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]]); 00718 ppAlign[_names[m]][_names[i]]->leftTransXright(ppGap2[_names[m]][_names[j]], postGap2, eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]]); 00719 ppGap2[_names[m]][_names[i]]->leftTransXright(ppAlign[_names[m]][_names[j]], postGap1, eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]]); 00720 sumea += eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]]; 00721 } 00722 else if(m > i && m < j){ 00723 ppAlign[_names[i]][_names[m]]->leftXright(ppAlign[_names[m]][_names[j]], postMatch, eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]]); 00724 ppGap2[_names[i]][_names[m]]->leftXright(ppGap1[_names[m]][_names[j]], postMatch, eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]]); 00725 ppAlign[_names[i]][_names[m]]->leftXright(ppGap2[_names[m]][_names[j]], postGap2, eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]]); 00726 ppGap1[_names[i]][_names[m]]->leftXright(ppAlign[_names[m]][_names[j]], postGap1, eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]]); 00727 sumea += eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]]; 00728 } 00729 else if(m > j){ 00730 SparseMatrixPtr AUX1 = SparseMatrixPtr(new SparseMatrix()); 00731 SparseMatrixPtr AUX2 = SparseMatrixPtr(new SparseMatrix()); 00732 SparseMatrixPtr AUX3 = SparseMatrixPtr(new SparseMatrix()); 00733 AUX1->transposeOf(ppAlign[_names[j]][_names[m]]); 00734 ppAlign[_names[i]][_names[m]]->leftXright(AUX1, postMatch, eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]]); 00735 ppGap1[_names[i]][_names[m]]->leftXright(AUX1, postGap1, eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]]); 00736 AUX1.reset(); 00737 AUX2->transposeOf(ppGap2[_names[j]][_names[m]]); 00738 ppGap2[_names[i]][_names[m]]->leftXright(AUX2, postMatch, eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]]); 00739 AUX2.reset(); 00740 AUX3->transposeOf(ppGap1[_names[j]][_names[m]]); 00741 ppAlign[_names[i]][_names[m]]->leftXright(AUX3, postGap2, eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]]); 00742 AUX3.reset(); 00743 sumea += eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]]; 00744 } 00745 } 00746 consistentPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postMatch,ppAlign[_names[i]][_names[j]], sumea)); 00747 consistentPPGap1[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postGap1,ppGap1[_names[i]][_names[j]], sumea)); 00748 consistentPPGap2[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postGap2,ppGap2[_names[i]][_names[j]], sumea)); 00749 } 00750 } 00751 for(int i = 0; i < (int)_names.size(); i++){ 00752 for(int j = i+1; j < (int)_names.size(); j++){ 00753 ppAlign[_names[i]][_names[j]].reset(); 00754 ppAlign[_names[i]][_names[j]] = consistentPPAlign[_names[i]][_names[j]]; 00755 ppGap1[_names[i]][_names[j]].reset(); 00756 ppGap1[_names[i]][_names[j]] = consistentPPGap1[_names[i]][_names[j]]; 00757 ppGap2[_names[i]][_names[j]].reset(); 00758 ppGap2[_names[i]][_names[j]] = consistentPPGap2[_names[i]][_names[j]]; 00759 } 00760 } 00761 } 00762 for(int i = 0; i < (int)_names.size(); i++){ 00763 for(int j = i+1; j < (int)_names.size(); j++){ 00764 ppAlign[_names[i]][_names[j]]->addGaps(ppGap1[_names[i]][_names[j]], ppGap2[_names[i]][_names[j]]); 00765 } 00766 } 00767 } 00768 00769 void MultipleAlignment::makeAlignmentConsistentWithPrediction(map<string, SparseMatrixPtr > &ppPred, map< string, map< string, SparseMatrixPtr > > &ppAlign, int numit){ 00770 for(int i = 0; i < (int)_names.size(); i++){ 00771 for(int j = i+1; j < (int)_names.size(); j++){ 00772 ppAlign[_names[i]][_names[j]]->applyPrediction(ppPred[_names[i]], ppPred[_names[j]]); 00773 } 00774 } 00775 classicAlConsistency(ppAlign,numit); 00776 } 00777 00778 void MultipleAlignment::makePredictionConsistentWithAlignment(map<string, SparseMatrixPtr > &ppPred, map< string, map<string, SparseMatrixPtr> > &ppAlign, int numit){ 00779 for(int r = 0; r < numit; r++){ 00780 map< string, SparseMatrixPtr > consistentPPpred; 00781 for(int i = 0; i < (int)_names.size(); i++){ 00782 fMatrix postPred; 00783 ppPred[_names[i]]->getfMatrixTimesX(postPred, 1.0); 00784 for(int m = 0; m < (int)_names.size(); m++){ 00785 if(m == i) 00786 continue; 00787 else if(m < i){ 00788 ppAlign[_names[m]][_names[i]]->leftTransXright(ppPred[_names[m]], postPred); 00789 } 00790 else{ 00791 ppAlign[_names[i]][_names[m]]->leftXright(ppPred[_names[m]], postPred); 00792 } 00793 } 00794 consistentPPpred[_names[i]] = SparseMatrixPtr(new SparseMatrix(postPred,ppPred[_names[i]], (float)_names.size())); 00795 } 00796 for(int i = 0; i < (int)_names.size(); i++){ 00797 ppPred[_names[i]].reset(); 00798 ppPred[_names[i]] = consistentPPpred[_names[i]]; 00799 } 00800 } 00801 } 00802 00803 00805 //Alignment construction functions/////////////////////////// 00807 00808 bool compare(postProb first, postProb second){ 00809 if(first.prob >= second.prob) 00810 return true; 00811 return false; 00812 } 00813 00814 void MultipleAlignment::initializePostProbsList(map< string, map< string, SparseMatrixPtr > > &ppAlign){ 00815 for(int i = 0; i < (int)_names.size(); i++){ 00816 for(int j = i+1; j < (int)_names.size(); j++){ 00817 bool hasElement = true; 00818 while(hasElement){ 00819 int s1, p1, s2, p2; 00820 postProb p; 00821 s1 = i; 00822 s2 = j; 00823 hasElement = ppAlign[_names[i]][_names[j]]->next(&p1,&p2,&(p.prob)); 00824 p.pos1.first = s1; 00825 p.pos1.second = p1-1; 00826 p.pos2.first = s2; 00827 p.pos2.second = p2-1; 00828 /*p.prob = (p.prob)/(ppAlign[_names[i]][_names[j]]->get(p1,_seqs[j].size())+ppAlign[_names[i]][_names[j]]->get(_seqs[i].size(),p2)); 00829 if(p.prob < 0.5){ 00830 continue; 00831 } 00832 if(p1 == (int)_seqs[s1].size() || p2 == (int)_seqs[s2].size()) 00833 continue;*/ 00834 //cout << "s1 = " << s1 << " p1 = " << p1 << " s2 = " << s2 << " p2 = " << p2 << endl; 00835 _postProbs.push_back(p); 00836 } 00837 } 00838 } 00839 _postProbs.sort(compare); 00840 } 00841 00842 void MultipleAlignment::generateAlignment(ProbabilisticModelPtr almodel){ 00843 vector<Sequence> als; 00844 for(int i = 0; i < (int)_seqs.size(); i++){ 00845 Sequence s(_graph.size(),almodel->pairDecodable()->getGapId()); 00846 als.push_back(s); 00847 } 00848 for(int i = 0; i < (int)_graph.size(); i++){ 00849 for(int k = 0; k < (int)_graph[i].colSeqPos.size(); k++){ 00850 if(_graph[i].colSeqPos[k].second == (int)_seqs[_graph[i].colSeqPos[k].first].size()) 00851 continue; 00852 als[_graph[i].colSeqPos[k].first][i] = _seqs[_graph[i].colSeqPos[k].first][_graph[i].colSeqPos[k].second]; 00853 } 00854 } 00855 for(int i = 0; i < (int)_seqs.size(); i++){ 00856 SequenceEntry se(almodel->alphabet()); 00857 se.setName(_names[i]); 00858 se.setSequence(als[i]); 00859 _alignment.push_back(se); 00860 } 00861 } 00862 00863 00864 00865 void MultipleAlignment::generateGraph(){ 00866 //Initialize all possible columns 00867 _numberOfColumns = 0; 00868 int numSeqs = _seqs.size(); 00869 int maxSeqSize = 0; 00870 for(int i = 0; i < numSeqs; i++){ 00871 if((int)_seqs[i].size() > maxSeqSize) 00872 maxSeqSize = _seqs[i].size(); 00873 } 00874 00875 int index = 0; 00876 for(int j = 0; j < maxSeqSize; j++){ 00877 for(int i = 0; i < numSeqs; i++){ 00878 if(j >= (int)_seqs[i].size()) 00879 continue; 00880 seqPos sp; 00881 column c; 00882 sp.first = i; 00883 sp.second = j; 00884 c.index = index; 00885 _seqPosMap[sp] = index; 00886 index++; 00887 c.isDead = false; 00888 c.visited = false; 00889 c.colSeqPos.push_back(sp); 00890 _graph.push_back(c); 00891 _numberOfColumns++; 00892 } 00893 } 00894 00895 while(!_postProbs.empty()){ 00896 addToGappedGraph(*_postProbs.begin()); 00897 _postProbs.erase(_postProbs.begin()); 00898 } 00899 findGraphMcd(); 00900 } 00901 00902 void MultipleAlignment::addToGraph(postProb p){ 00903 int index1 = _seqPosMap[p.pos1]; 00904 int index2 = _seqPosMap[p.pos2]; 00905 int ub,lb; 00906 seqPos ubsp, lbsp; 00907 if(index1 > index2){ 00908 ub = index1; 00909 ubsp = p.pos1; 00910 lb = index2; 00911 lbsp = p.pos2; 00912 } 00913 else if(index2 > index1){ 00914 ub = index2; 00915 ubsp = p.pos2; 00916 lb = index1; 00917 lbsp = p.pos1; 00918 } 00919 else 00920 return; 00921 00922 vector<column> df,db; 00923 _graph[ub].colSeqPos.push_back(lbsp); 00924 bool cycle = dfsf(ub,_graph[lb],df); 00925 unvisit(df); 00926 if(cycle){ 00927 _graph[ub].colSeqPos.pop_back(); 00928 return; 00929 } 00930 dfsb(lb,_graph[ub],db); 00931 unvisit(db); 00932 if(db[0].index != ub) 00933 cerr << "DEU PAUU!!" << endl; 00934 if(df[0].index != lb) 00935 cerr << "DEU PAUU!!" << endl; 00936 assert(db[0].index == ub); 00937 assert(df[0].index == lb); 00938 merge(&db[0],&df[0]); 00939 reorder(df,db); 00940 } 00941 00942 bool MultipleAlignment::dfsf(int ub, column n, vector<column> &df){ 00943 _graph[n.index].visited = true; 00944 df.push_back(n); 00945 for(int i = 0; i < (int)n.colSeqPos.size(); i++){ 00946 seqPos w = n.colSeqPos[i]; 00947 w.second++; 00948 if(w.second >= (int)_seqs[w.first].size()) 00949 continue; 00950 int next_index = _seqPosMap[w]; 00951 if(next_index == ub){ 00952 return true; 00953 } 00954 if(next_index > ub || _graph[next_index].visited) 00955 continue; 00956 bool cycle = dfsf(ub,_graph[next_index],df); 00957 if(cycle){ 00958 return true; 00959 } 00960 } 00961 return false; 00962 } 00963 00964 void MultipleAlignment::dfsb(int lb, column n, vector<column> &db){ 00965 _graph[n.index].visited = true; 00966 db.push_back(n); 00967 for(int i = 0; i < (int)n.colSeqPos.size(); i++){ 00968 seqPos w = n.colSeqPos[i]; 00969 if(w.second == (int)_seqs[w.first].size()) 00970 continue; 00971 w.second--; 00972 if(w.second < 0) 00973 continue; 00974 int next_index = _seqPosMap[w]; 00975 if(next_index < lb || _graph[next_index].visited) 00976 continue; 00977 dfsb(lb,_graph[next_index],db); 00978 } 00979 } 00980 00981 void MultipleAlignment::merge(column *ub, column *lb){ 00982 lb->isDead = true; 00983 _numberOfColumns--; 00984 bool flag = false; 00985 for(int i = 0; i < (int)(lb->colSeqPos).size(); i++){ 00986 for(int j = 0; j < (int)(ub->colSeqPos).size(); j++){ 00987 if(lb->colSeqPos[i].first == ub->colSeqPos[j].first && lb->colSeqPos[i].second == ub->colSeqPos[j].second) 00988 flag = true; 00989 } 00990 if(flag == false) 00991 ub->colSeqPos.push_back(lb->colSeqPos[i]); 00992 flag = false; 00993 } 00994 lb->colSeqPos.clear(); 00995 } 00996 00997 bool smallerIndex(column a, column b){ 00998 if(a.index < b.index) 00999 return true; 01000 return false; 01001 } 01002 01003 bool biggerIndex(column a, column b){ 01004 if(a.index >= b.index) 01005 return true; 01006 return false; 01007 } 01008 01009 void MultipleAlignment::reorder(vector<column> &df, vector<column> &db){ 01010 vector<int> L; 01011 int i = 0; 01012 int j = 0; 01013 sort(df.begin(), df.end(), smallerIndex); 01014 sort(db.begin(), db.end(),smallerIndex); 01015 for(int k = 0; k < (int)(db.size()+df.size()); k++){ 01016 if(i >= (int)db.size()){ 01017 L.push_back(df[j].index); 01018 j++; 01019 continue; 01020 } 01021 if(j >= (int)df.size()){ 01022 L.push_back(db[i].index); 01023 i++; 01024 continue; 01025 } 01026 if(db[i].index < df[j].index){ 01027 L.push_back(db[i].index); 01028 i++; 01029 } 01030 else{ 01031 L.push_back(df[j].index); 01032 j++; 01033 } 01034 } 01035 i = 0; 01036 for(int k = 0; k < (int)db.size(); k++){ 01037 db[k].index = L[i]; 01038 for(j = 0; j < (int)db[k].colSeqPos.size(); j++){ 01039 _seqPosMap[db[k].colSeqPos[j]] = L[i]; 01040 } 01041 _graph[L[i]] = db[k]; 01042 i++; 01043 } 01044 for(int k = 0; k < (int)df.size(); k++){ 01045 df[k].index = L[i]; 01046 for(j = 0; j < (int)df[k].colSeqPos.size(); j++){ 01047 _seqPosMap[df[k].colSeqPos[j]] = L[i]; 01048 } 01049 _graph[L[i]] = df[k]; 01050 i++; 01051 } 01052 } 01053 01054 void MultipleAlignment::findGraphMcd(){ 01055 //find nodes with first positions in sequences 01056 //unvisit(); 01057 std::stack< column* > nodeStack; 01058 vector<bool> inStack (_graph.size(),false); 01059 vector<int> finishingTimes (_graph.size(),-1); 01060 vector<int> initialColumns; 01061 for(int i = 0; i < (int)_seqs.size(); i++){ 01062 seqPos sp; 01063 sp.first = i; 01064 sp.second = 0; 01065 if(!inStack[_seqPosMap[sp]]){ 01066 initialColumns.push_back(_seqPosMap[sp]); 01067 inStack[_seqPosMap[sp]] = true; 01068 } 01069 } 01070 sort(initialColumns.begin(), initialColumns.end()); 01071 for(int i = 0; i < (int)initialColumns.size(); i++){ 01072 nodeStack.push(&_graph[initialColumns[i]]); 01073 } 01074 int time = 0; 01075 while(!nodeStack.empty()){ 01076 column *c = nodeStack.top(); 01077 nodeStack.pop(); 01078 if(c->visited){ 01079 finishingTimes[c->index] = time++; 01080 continue; 01081 } 01082 c->visited = true; 01083 nodeStack.push(c); 01084 01085 vector<int> neighbors; 01086 for(int i = 0; i < (int)c->colSeqPos.size(); i++){ 01087 seqPos sp = c->colSeqPos[i]; 01088 sp.second++; 01089 if(sp.second >= (int)_seqs[sp.first].size()) 01090 continue; 01091 if(!inStack[_seqPosMap[sp]]){ 01092 neighbors.push_back(_seqPosMap[sp]); 01093 inStack[_seqPosMap[sp]] = true; 01094 } 01095 } 01096 sort(neighbors.begin(), neighbors.end()); 01097 for(int i = 0; i < (int)neighbors.size(); i++){ 01098 nodeStack.push(&_graph[neighbors[i]]); 01099 } 01100 } 01101 01102 vector<column> tempGraph; 01103 for(int i = 0; i < (int)_graph.size(); i++){ 01104 if(_graph[i].isDead){ 01105 continue; 01106 } 01107 if(finishingTimes[i] < 0) 01108 cerr << "DEU PAU TIMES" << endl; 01109 _graph[i].index = finishingTimes[i]; 01110 tempGraph.push_back(_graph[i]); 01111 } 01112 sort(tempGraph.begin(), tempGraph.end(), biggerIndex); 01113 for(int i = 0; i < _numberOfColumns; i++){ 01114 tempGraph[i].index = i; 01115 _graph[i] = tempGraph[i]; 01116 } 01117 _graph.erase(_graph.begin()+_numberOfColumns,_graph.end()); 01118 } 01119 01120 void MultipleAlignment::unvisit(vector<column> &v){ 01121 for(int i = 0; i < (int)v.size(); i++){ 01122 _graph[v[i].index].visited = false; 01123 } 01124 } 01125 01126 void MultipleAlignment::addToGappedGraph(postProb p){ 01127 if(p.pos1.second == (int)_seqs[p.pos1.first].size() && p.pos2.second == (int)_seqs[p.pos2.first].size()){ 01128 cout << "Tried to add gap-gap pair to the graph" << endl; 01129 exit(-1); 01130 } 01131 if(p.pos1.second == (int)_seqs[p.pos1.first].size()){ 01132 int index = _seqPosMap[p.pos2]; 01133 for(int i = 0; i < (int)_graph[index].colSeqPos.size(); i++){ 01134 if(p.pos1.first == _graph[index].colSeqPos[i].first) 01135 return; 01136 } 01137 _graph[index].colSeqPos.push_back(p.pos1); 01138 return; 01139 } 01140 if(p.pos2.second == (int)_seqs[p.pos2.first].size()){ 01141 int index = _seqPosMap[p.pos1]; 01142 for(int i = 0; i < (int)_graph[index].colSeqPos.size(); i++){ 01143 if(p.pos2.first == _graph[index].colSeqPos[i].first) 01144 return; 01145 } 01146 _graph[index].colSeqPos.push_back(p.pos2); 01147 return; 01148 } 01149 01150 int index1 = _seqPosMap[p.pos1]; 01151 int index2 = _seqPosMap[p.pos2]; 01152 int ub,lb; 01153 seqPos ubsp, lbsp; 01154 if(index1 > index2){ 01155 ub = index1; 01156 ubsp = p.pos1; 01157 lb = index2; 01158 lbsp = p.pos2; 01159 } 01160 else if(index2 > index1){ 01161 ub = index2; 01162 ubsp = p.pos2; 01163 lb = index1; 01164 lbsp = p.pos1; 01165 } 01166 else 01167 return; 01168 01169 for(int i = 0; i < (int)_graph[ub].colSeqPos.size(); i++){ 01170 for(int j = 0; j < (int)_graph[lb].colSeqPos.size(); j++){ 01171 if(_graph[ub].colSeqPos[i].first == _graph[lb].colSeqPos[i].first && _graph[ub].colSeqPos[i].second != _graph[lb].colSeqPos[i].second) 01172 return; 01173 } 01174 } 01175 01176 vector<column> df,db; 01177 _graph[ub].colSeqPos.push_back(lbsp); 01178 bool cycle = dfsf(ub,_graph[lb],df); 01179 unvisit(df); 01180 if(cycle){ 01181 _graph[ub].colSeqPos.pop_back(); 01182 return; 01183 } 01184 dfsb(lb,_graph[ub],db); 01185 unvisit(db); 01186 if(db[0].index != ub) 01187 cerr << "DEU PAUU!!" << endl; 01188 if(df[0].index != lb) 01189 cerr << "DEU PAUU!!" << endl; 01190 assert(db[0].index == ub); 01191 assert(df[0].index == lb); 01192 merge(&db[0],&df[0]); 01193 reorder(df,db); 01194 } 01195 01196 01200 01201 void MultipleAlignment::printPPList(){ 01202 if(_postProbs.empty()) 01203 return; 01204 list<postProb>::iterator it = _postProbs.begin(); 01205 while(it != _postProbs.end()){ 01206 cout << "(" << (*it).prob << "," << (*it).pos1.first << "," << (*it).pos1.second << "," << (*it).pos2.first << "," << (*it).pos2.second << ") "; 01207 it++; 01208 } 01209 cout << endl; 01210 } 01211 }