ToPS
MultipleAlignment.cpp
00001 #include "MultipleAlignment.hpp"
00002 
00003 using namespace std;
00004 
00005 namespace tops{
00006 
00007   void MultipleAlignment::clearAll(){
00008     _graph.clear();
00009     _seqPosMap.clear();
00010     _postProbs.clear();
00011     _alignment.clear();
00012   }   
00013 
00014   void MultipleAlignment::computeAllAlignments(ProbabilisticModelPtr almodel, SequenceList seqs, vector<string> names, int numit, string alFileName, string outDir){\
00015     boost::timer t;
00016     int avg = 0;
00017     for(int i = 0; i < (int)seqs.size(); i++){
00018       avg += seqs[i].size();
00019     }
00020     avg /= seqs.size();
00021     cerr << alFileName << endl << "number of sequences: " << seqs.size() << endl << "average size: " << avg << endl;
00022     _seqs = seqs;
00023     _names = names;
00024     
00025     cerr << "\tComputing posterior probabilities...";
00026     t.restart();
00027     postProbAlign(almodel,_ppAlign,_ppGap1,_ppGap2,_eas);
00028     cerr << "Done! (" << t.elapsed() << "s)" << endl << endl;
00029 
00030     map<string,map<string,SparseMatrixPtr > > consPPAlign;
00031     map<string,map<string,SparseMatrixPtr > > consPPGap1;
00032     map<string,map<string,SparseMatrixPtr > > consPPGap2;
00033 
00034     cerr << "\tNo consistency." << endl;
00035     cerr << "\t\tGenerating alignment with modified sequence annealing...";
00036     t.restart();
00037     for(int i = 0; i < (int)_names.size(); i++){
00038       for(int j = i+1; j < (int)_names.size(); j++){
00039         _ppAlign[_names[i]][_names[j]]->addGaps(_ppGap1[_names[i]][_names[j]], _ppGap2[_names[i]][_names[j]]);
00040       }
00041     }
00042     initializePostProbsList(_ppAlign);  
00043     generateGraph();
00044     generateAlignment(almodel);
00045     string outFile = "";
00046     outFile.append(outDir);
00047     outFile.append("C0");
00048     outFile.append(alFileName);
00049     ofstream fout;
00050     fout.open(outFile.c_str());
00051     for(int i = 0; i < (int)_seqs.size(); i++){
00052       fout << _alignment[i];
00053     }
00054     fout << endl;
00055     fout.close();
00056     cerr << "Done! (" << t.elapsed() << "s)" << endl;
00057 
00058     clearAll();
00059 
00060     cerr << "\t\tGenerating alignment with original sequence annealing...";
00061     t.restart();
00062     for(int i = 0; i < (int)_names.size(); i++){
00063       for(int j = i+1; j < (int)_names.size(); j++){
00064         _ppAlign[_names[i]][_names[j]]->removeLastLine();
00065         _ppAlign[_names[i]][_names[j]]->removeLastColumn();
00066       }
00067     }
00068     initializePostProbsList(_ppAlign);  
00069     generateGraph();
00070     generateAlignment(almodel);
00071     outFile = "";
00072     outFile.append(outDir);
00073     outFile.append("C1");
00074     outFile.append(alFileName);
00075     fout.open(outFile.c_str());
00076     for(int i = 0; i < (int)_seqs.size(); i++){
00077       fout << _alignment[i];
00078     }
00079     fout << endl;
00080     fout.close();
00081     cerr << "Done! (" << t.elapsed() << "s)" << endl << endl;
00082 
00083     clearAll();
00084 
00085     for(int i = 0; i < (int)_names.size(); i++){
00086       for(int j = i+1; j < (int)_names.size(); j++){
00087         consPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppAlign[_names[i]][_names[j]]));
00088         consPPGap1[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppGap1[_names[i]][_names[j]]));
00089         consPPGap2[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppGap2[_names[i]][_names[j]]));
00090       }
00091     }
00092 
00093     cerr << "\tApplying consistency transformation with weights and gaps...";
00094     t.restart();
00095     predalignAlConsistencyWithEas(consPPAlign, consPPGap1, consPPGap2, numit, _eas);
00096     cerr << "Done! (" << t.elapsed() << "s)" << endl;
00097     cerr << "\t\tGenerating alignment with modified sequence annealing...";
00098     t.restart();
00099     initializePostProbsList(consPPAlign);  
00100     generateGraph();
00101     generateAlignment(almodel);
00102     outFile = "";
00103     outFile.append(outDir);
00104     outFile.append("C2");
00105     outFile.append(alFileName);
00106     fout.open(outFile.c_str());
00107     for(int i = 0; i < (int)_seqs.size(); i++){
00108       fout << _alignment[i];
00109     }
00110     fout << endl;
00111     fout.close();
00112     cerr << "Done! (" << t.elapsed() << "s)" << endl;
00113 
00114     clearAll();
00115 
00116     cerr << "\t\tGenerating alignment original sequence annealing...";
00117     t.restart();
00118     for(int i = 0; i < (int)_names.size(); i++){
00119       for(int j = i+1; j < (int)_names.size(); j++){
00120         consPPAlign[_names[i]][_names[j]]->removeLastLine();
00121         consPPAlign[_names[i]][_names[j]]->removeLastColumn();
00122       }
00123     }
00124     initializePostProbsList(consPPAlign);  
00125     generateGraph();
00126     generateAlignment(almodel);
00127     outFile = "";
00128     outFile.append(outDir);
00129     outFile.append("C3");
00130     outFile.append(alFileName);
00131     fout.open(outFile.c_str());
00132     for(int i = 0; i < (int)_seqs.size(); i++){
00133       fout << _alignment[i];
00134     }
00135     fout << endl;
00136     fout.close();
00137     cerr << "Done! (" << t.elapsed() << "s)" << endl << endl;
00138 
00139     clearAll();
00140 
00141     for(int i = 0; i < (int)_names.size(); i++){
00142       for(int j = i+1; j < (int)_names.size(); j++){
00143         consPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppAlign[_names[i]][_names[j]]));
00144         consPPGap1[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppGap1[_names[i]][_names[j]]));
00145         consPPGap2[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppGap2[_names[i]][_names[j]]));
00146       }
00147     }
00148 
00149     cerr << "\tApplying consistency transformation without weights and with gaps...";
00150     t.restart();
00151     predalignAlConsistencyNoEas(consPPAlign, _ppGap1, _ppGap2, numit);
00152     cerr << "Done! (" << t.elapsed() << "s)" << endl;
00153     cerr << "\t\tGenerating alignment with modified sequence annealing...";
00154     t.restart();
00155     initializePostProbsList(consPPAlign);  
00156     generateGraph();
00157     generateAlignment(almodel);
00158     outFile = "";
00159     outFile.append(outDir);
00160     outFile.append("C4");
00161     outFile.append(alFileName);
00162     fout.open(outFile.c_str());
00163     for(int i = 0; i < (int)_seqs.size(); i++){
00164       fout << _alignment[i];
00165     }
00166     fout << endl;
00167     fout.close();
00168     cerr << "Done! (" << t.elapsed() << "s)" << endl;
00169 
00170     clearAll();
00171 
00172     cerr << "\t\tGenerating alignment original sequence annealing...";
00173     t.restart();
00174     for(int i = 0; i < (int)_names.size(); i++){
00175       for(int j = i+1; j < (int)_names.size(); j++){
00176         consPPAlign[_names[i]][_names[j]]->removeLastLine();
00177         consPPAlign[_names[i]][_names[j]]->removeLastColumn();
00178       }
00179     }
00180     initializePostProbsList(consPPAlign);  
00181     generateGraph();
00182     generateAlignment(almodel);
00183     outFile = "";
00184     outFile.append(outDir);
00185     outFile.append("C5");
00186     outFile.append(alFileName);
00187     fout.open(outFile.c_str());
00188     for(int i = 0; i < (int)_seqs.size(); i++){
00189       fout << _alignment[i];
00190     }
00191     fout << endl;
00192     fout.close();
00193     cerr << "Done! (" << t.elapsed() << "s)" << endl << endl;
00194 
00195     clearAll();
00196 
00197     for(int i = 0; i < (int)_names.size(); i++){
00198       for(int j = i+1; j < (int)_names.size(); j++){
00199         consPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_ppAlign[_names[i]][_names[j]]));
00200         consPPGap1[_names[i]][_names[j]].reset();
00201         consPPGap2[_names[i]][_names[j]].reset();
00202       }
00203     }
00204     
00205     cerr << "\tApplying picxaa consistency transformation...";
00206     t.restart();
00207     picxaaAlConsistency(consPPAlign, _eas, numit);
00208     cerr << "Done! (" << t.elapsed() << "s)" << endl;
00209     cerr << "\t\tGenerating alignment with original sequence annealing...";
00210     t.restart();
00211     initializePostProbsList(consPPAlign);  
00212     generateGraph();
00213     generateAlignment(almodel);
00214     outFile = "";
00215     outFile.append(outDir);
00216     outFile.append("C6");
00217     outFile.append(alFileName);
00218     fout.open(outFile.c_str());
00219     for(int i = 0; i < (int)_seqs.size(); i++){
00220       fout << _alignment[i];
00221     }
00222     fout << endl;
00223     fout.close();
00224     cerr << "Done! (" << t.elapsed() << "s)" << endl;
00225 
00226     clearAll();
00227 
00228     cerr << "\t\tGenerating alignment with modified sequence annealing...";
00229     t.restart();
00230     for(int i = 0; i < (int)_names.size(); i++){
00231       for(int j = i+1; j < (int)_names.size(); j++){
00232         consPPAlign[_names[i]][_names[j]]->addGaps(_ppGap1[_names[i]][_names[j]], _ppGap2[_names[i]][_names[j]]);
00233       }
00234     }
00235     initializePostProbsList(consPPAlign);  
00236     generateGraph();
00237     generateAlignment(almodel);
00238     outFile = "";
00239     outFile.append(outDir);
00240     outFile.append("C7");
00241     outFile.append(alFileName);
00242     fout.open(outFile.c_str());
00243     for(int i = 0; i < (int)_seqs.size(); i++){
00244       fout << _alignment[i];
00245     }
00246     fout << endl;
00247     fout.close();
00248     cerr << "Done! (" << t.elapsed() << "s)" << endl << endl;
00249 
00250     clearAll();
00251     
00252     for(int i = 0; i < (int)_names.size(); i++){
00253       for(int j = i+1; j < (int)_names.size(); j++){
00254         consPPAlign[_names[i]][_names[j]].reset();
00255       }
00256     }
00257 
00258     cerr << "\tApplying classic consistency transformation...";
00259     t.restart();
00260     classicAlConsistency(_ppAlign, numit);
00261     cerr << "Done! (" << t.elapsed() << "s)" << endl;
00262     cerr << "\t\tGenerating alignment with original sequence annealing...";
00263     t.restart();
00264     initializePostProbsList(_ppAlign);  
00265     generateGraph();
00266     generateAlignment(almodel);
00267     outFile = "";
00268     outFile.append(outDir);
00269     outFile.append("C8");
00270     outFile.append(alFileName);
00271     fout.open(outFile.c_str());
00272     for(int i = 0; i < (int)_seqs.size(); i++){
00273       fout << _alignment[i];
00274     }
00275     fout << endl;
00276     fout.close();
00277     cerr << "Done! (" << t.elapsed() << "s)" << endl;
00278 
00279     clearAll();
00280 
00281     cerr << "\t\tGenerating alignment with modified sequence annealing...";
00282     t.restart();
00283     for(int i = 0; i < (int)_names.size(); i++){
00284       for(int j = i+1; j < (int)_names.size(); j++){
00285         _ppAlign[_names[i]][_names[j]]->addGaps(_ppGap1[_names[i]][_names[j]], _ppGap2[_names[i]][_names[j]]);
00286       }
00287     }
00288     initializePostProbsList(_ppAlign);  
00289     generateGraph();
00290     generateAlignment(almodel);
00291     outFile = "";
00292     outFile.append(outDir);
00293     outFile.append("C9");
00294     outFile.append(alFileName);
00295     fout.open(outFile.c_str());
00296     for(int i = 0; i < (int)_seqs.size(); i++){
00297       fout << _alignment[i];
00298     }
00299     fout << endl;
00300     fout.close();
00301     cerr << "Done! (" << t.elapsed() << "s)" << endl << endl;
00302   }
00303 
00304   void MultipleAlignment::computeOneAlignment(ProbabilisticModelPtr almodel, SequenceList seqs, vector<string> names, int numit, int consScheme){
00305     _seqs = seqs;
00306     _names = names;
00307 
00308     postProbAlign(almodel,_ppAlign,_ppGap1,_ppGap2,_eas);
00309 
00310     if(consScheme == 1)
00311       predalignAlConsistencyWithEas(_ppAlign,_ppGap1,_ppGap2,numit,_eas);
00312 
00313     if(consScheme == 2){
00314       predalignAlConsistencyWithEas(_ppAlign,_ppGap1,_ppGap2,numit,_eas);
00315       for(int i = 0; i < (int)_names.size(); i++){
00316         for(int j = i+1; j < (int)_names.size(); j++){
00317           _ppAlign[_names[i]][_names[j]]->removeLastLine();
00318           _ppAlign[_names[i]][_names[j]]->removeLastColumn();
00319         }
00320       }
00321     }
00322 
00323     if(consScheme == 3)
00324       picxaaAlConsistency(_ppAlign,_eas,numit);
00325     
00326     if(consScheme == 4)
00327       classicAlConsistency(_ppAlign,numit);
00328 
00329     /*if(_alpha != -1)
00330     predalConsistencies(predmodels,ppAlign);*/
00331 
00332     initializePostProbsList(_ppAlign);  
00333     cerr << "Number of stored posterior probabilities: " << _postProbs.size() << endl;
00334     generateGraph();
00335     generateAlignment(almodel);
00336     for(int i = 0; i < (int)_seqs.size(); i++){
00337       cout << _alignment[i];
00338     }
00339     cout << endl;
00340   }
00341 
00342   void MultipleAlignment::computePredictionsAndAlignment(ProbabilisticModelPtr almodel, vector<ProbabilisticModelPtr> &predmodels, SequenceList seqs, vector<string> names, int numit, bool alternateConsistencies, string outDir){
00343     _seqs = seqs;
00344     _names = names;
00345 
00346     postProbAlign(almodel,_ppAlign,_ppGap1,_ppGap2,_eas);
00347     cerr << "Passou do postProbAlign" << endl;
00348     postProbPred(predmodels, _ppPred);
00349     cerr << "Passou do postProbPred" << endl;
00350 
00351     if(alternateConsistencies){
00352       for(int r = 0; r < numit; r++){
00353         makeAlignmentConsistentWithPrediction(_ppPred,_ppAlign,1);
00354         makePredictionConsistentWithAlignment(_ppPred,_ppAlign,1);
00355       }
00356     }
00357     else{
00358       map<string, SparseMatrixPtr> consistentPPpred;
00359       for(int i = 0; i < (int)_names.size(); i++){
00360         consistentPPpred[_names[i]] = SparseMatrixPtr(new SparseMatrix(_ppPred[_names[i]]));
00361       }
00362       makePredictionConsistentWithAlignment(consistentPPpred,_ppAlign,numit);
00363       makeAlignmentConsistentWithPrediction(_ppPred,_ppAlign,numit);
00364       for(int i = 0; i < (int)_names.size(); i++){
00365         _ppPred[_names[i]].reset();
00366         _ppPred[_names[i]] = consistentPPpred[_names[i]];
00367       }
00368     }
00369 
00370     for(int i = 0; i < (int)_names.size(); i++){
00371       SequenceFormatManager::instance()->setFormat(SequenceFormatPtr(new SequenceFormat()));
00372       SequenceEntry output(predmodels[i]->decodable()->getStateNames());
00373       Sequence states;
00374       clock_t begin = clock();
00375       float ea = predmodels[i]->decodable()->MEAPred(_seqs[i], states, _ppPred[_names[i]]);
00376       clock_t end = clock();
00377       std::cerr << "TIME: " << (double)(end - begin)/CLOCKS_PER_SEC << std::endl;
00378       stringstream new_name;
00379       new_name << _names[i] <<  ": " << ea;
00380       output.setName(new_name.str());
00381       output.setSequence(states);
00382       string outFile = "";
00383       outFile.append(outDir);
00384       outFile.append(_names[i]);
00385       outFile.append(".pred");
00386       ofstream fout;
00387       fout.open(outFile.c_str());
00388       fout << output;
00389       fout.close();
00390     }
00391       
00392     initializePostProbsList(_ppAlign);  
00393     cerr << "Number of stored posterior probabilities: " << _postProbs.size() << endl;
00394     generateGraph();
00395     generateAlignment(almodel);
00396     for(int i = 0; i < (int)_seqs.size(); i++){
00397       cout << _alignment[i];
00398     }
00399     cout << endl;
00400   }
00401 
00402   /*void MultipleAlignment::initializeFromFile(ProbabilisticModelPtr almodel, SequenceList seqs, vector<string> names, int numit, int consScheme, string inputFileName){
00403     _seqs = seqs;
00404     _names = names;
00405 
00406     ifstream fin;
00407     fin.open(inputFileName.c_str());
00408 
00409     for(int i = 0; i < (int)_names.size(); i++){
00410       for(int j = i+1; j < (int)_names.size(); j++){
00411         string temp, name1, name2;
00412         int line, col, nEntries;
00413         float prob;
00414         _ppAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_seqs[i].size(),_seqs[j].size()));
00415         _ppGap1[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_seqs[i].size(),_seqs[j].size()));
00416         _ppGap2[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(_seqs[i].size(),_seqs[j].size()));
00417         fin >> name1 >> name2 >> temp;
00418         fin >> prob;
00419         if(temp.compare("ea") != 0 || name1.compare(_names[i]) != 0 || name2.compare(_names[j]) != 0){
00420           cerr << "temp = >" << temp << "< name1 = >" << name1 << "< name2 = >" << name2 << "<" << endl;
00421           cerr << "_namesi = >" << _names[i] << "< _namesj = >" << _names[j] << "<" << endl;
00422           cerr << inputFileName << " is not in the correct format, or does not correspond the the sequence file provided. ea" << endl;
00423           exit(-1);
00424         }
00425         _eas[name1][name2] = prob;
00426         fin >> name1 >> name2 >> temp;
00427         if(temp.compare("ppAlign") != 0 || name1.compare(_names[i]) != 0 || name2.compare(_names[j]) != 0){
00428           cerr << inputFileName << " is not in the correct format, or does not correspond the the sequence file provided. ppAlign" << endl;
00429           exit(-1);
00430         }
00431         fin >> nEntries;
00432         for(int k = 0; k < nEntries; k++){
00433           fin >> line >> col >> prob;
00434           _ppAlign[name1][name2]->add(line,col,prob);
00435         }
00436         fin >> name1 >> name2 >> temp;
00437         if(temp.compare("ppGap1") != 0 || name1.compare(_names[i]) != 0 || name2.compare(_names[j]) != 0){
00438           cerr << inputFileName << " is not in the correct format, or does not correspond the the sequence file provided. ppGap1" << endl;
00439           exit(-1);
00440         }
00441         fin >> nEntries;
00442         for(int k = 0; k < nEntries; k++){
00443           fin >> line >> col >> prob;
00444           _ppGap1[name1][name2]->add(line,col,prob);
00445         }
00446         fin >> name1 >> name2 >> temp;
00447         if(temp.compare("ppGap2") != 0 || name1.compare(_names[i]) != 0 || name2.compare(_names[j]) != 0){
00448           cerr << inputFileName << " is not in the correct format, or does not correspond the the sequence file provided.ppGap2" << endl;
00449           exit(-1);
00450         }
00451         fin >> nEntries;
00452         for(int k = 0; k < nEntries; k++){
00453           fin >> line >> col >> prob;
00454           _ppGap2[name1][name2]->add(line,col,prob);
00455         }
00456       }
00457     }
00458     if(consScheme == 1)
00459       predalignAlConsistencyWithEas(_ppAlign, _ppGap1, _ppGap2,numit,_eas);
00460 
00461     if(consScheme == 2){
00462       predalignAlConsistencyWithEas(_ppAlign,_ppGap1,_ppGap2,numit,_eas);
00463       for(int i = 0; i < (int)_names.size(); i++){
00464         for(int j = i+1; j < (int)_names.size(); j++){
00465           _ppAlign[_names[i]][_names[j]]->removeLastLine();
00466           _ppAlign[_names[i]][_names[j]]->removeLastColumn();
00467         }
00468       }
00469     }
00470 
00471     if(consScheme == 3)
00472       picxaaAlConsistency(_ppAlign,_eas,numit);
00473     
00474     if(consScheme == 4)
00475       classicAlConsistency(_ppAlign,numit);
00476 
00477     initializePostProbsList(_ppAlign);  
00478     generateGraph();
00479     generateAlignment(almodel);
00480     for(int i = 0; i < (int)_seqs.size(); i++){
00481       cout << _alignment[i];
00482     }
00483     cout << endl;
00484     }*/
00485 
00486   /*  void MultipleAlignment::trainAndComputePPs(string initialModelFile, SequenceList seqs, vector<string> names, int maxTrainIter, string outFile){
00487     int numseqs = seqs.size();
00488     ProbabilisticModelCreatorClient creator;
00489     ProbabilisticModelPtr model;
00490     ofstream fout;
00491     fout.open(outFile.c_str());
00492     if(!fout.is_open()){
00493       cerr << "Could not open file: " << outFile << endl; 
00494       exit(-1);
00495     }
00496 
00497     model = creator.create(initialModelFile);
00498     model->pairDecodable()->trainBaumWelch(seqs, maxTrainIter, 1e-5);
00499     for(int i = 0; i < numseqs; i++){
00500       for(int j = i+1; j < numseqs; j++){
00501         SparseMatrixPtr ppAlign, ppGap1, ppGap2;
00502         ostringstream temp;
00503         float ea = model->pairDecodable()->posteriorProbabilities(seqs[i],seqs[j],ppAlign, ppGap1, ppGap2);
00504         fout << names[i] << " " << names[j] << " ea" << endl << ea << endl;
00505         fout << names[i] << " " << names[j] << " ppAlign" << endl;
00506         ppAlign->printMatrix(temp);
00507         fout << temp.str();
00508         temp.str("");
00509         fout << names[i] << " " << names[j] << " ppGap1" << endl;
00510         ppGap1->printMatrix(temp);
00511         fout << temp.str();
00512         temp.str("");
00513         fout << names[i] << " " << names[j] << " ppGap2" << endl;
00514         ppGap2->printMatrix(temp);
00515         fout << temp.str();
00516         temp.str("");
00517       }
00518     }
00519     fout.close();
00520     }*/
00521 
00523   //Posterior probabilities calculation functions///
00525 
00526   float MultipleAlignment::postProbAlign(ProbabilisticModelPtr model, map<string, map< string, SparseMatrixPtr > > &ppAlign, map<string, map< string, SparseMatrixPtr > > &ppGap1, map<string, map< string, SparseMatrixPtr > > &ppGap2, map<string, map<string, float> > &eas)
00527   {
00528     float sum = 0;
00529     for(int i = 0; i < (int)_seqs.size(); i++){
00530       for(int j = i+1; j < (int)_seqs.size(); j++){
00531         float ea = model->pairDecodable()->posteriorProbabilities(_seqs[i],_seqs[j],ppAlign[_names[i]][_names[j]], ppGap1[_names[i]][_names[j]], ppGap2[_names[i]][_names[j]]);
00532         eas[_names[i]][_names[j]] = ea;
00533         sum += ea;
00534       }
00535     }
00536    
00537     return sum;
00538   }
00539 
00540   void MultipleAlignment::postProbPred(vector<ProbabilisticModelPtr> &predmodels, map<string, SparseMatrixPtr > &ppPred)
00541   {  
00542     for(int i = 0; i < (int)_seqs.size(); i++){
00543       fMatrix postPred;
00544       predmodels[i]->decodable()->posteriorProbabilities(_seqs[i],postPred);
00545       ppPred[_names[i]] = SparseMatrixPtr(new SparseMatrix());
00546       ppPred[_names[i]]->buildPredMatrix(postPred.size1(),postPred.size2(),postPred);
00547     }
00548   }
00549 
00550 
00552   //Consistency transformation functions//////
00554     
00555   void MultipleAlignment::picxaaAlConsistency(map< string, map< string, SparseMatrixPtr > > &ppAlign,  map<string, map<string, float> > eas, int numit){
00556     for(int r = 0; r < numit; r++){
00557       map< string, map< string, SparseMatrixPtr > > consistentPPAlign;
00558       for(int i = 0; i < (int)_names.size(); i++){
00559         for(int j = i+1; j < (int)_names.size(); j++){
00560           float sumea = 2.0*eas[_names[i]][_names[j]]*eas[_names[i]][_names[j]];
00561           fMatrix postMatch;
00562           ppAlign[_names[i]][_names[j]]->getfMatrixTimesX(postMatch, 2.0*eas[_names[i]][_names[j]]*eas[_names[i]][_names[j]]);
00563           for(int m = 0; m < (int)_names.size(); m++){
00564             if(m == i || m ==j)
00565               continue;
00566             else if(m < i){
00567               ppAlign[_names[m]][_names[i]]->leftTransXright(ppAlign[_names[m]][_names[j]], postMatch, (eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]]));
00568               sumea += (eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]]);
00569             }
00570             else if(m > i && m < j){
00571               ppAlign[_names[i]][_names[m]]->leftXright(ppAlign[_names[m]][_names[j]], postMatch, (eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]]));
00572               sumea += (eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]]);
00573             }
00574             else if(m > j){
00575               SparseMatrixPtr AUX = SparseMatrixPtr(new SparseMatrix());
00576               AUX->transposeOf(ppAlign[_names[j]][_names[m]]);
00577               ppAlign[_names[i]][_names[m]]->leftXright(AUX, postMatch, (eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]]));
00578               AUX.reset();
00579               sumea += (eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]]);
00580             }
00581           }
00582           consistentPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postMatch,ppAlign[_names[i]][_names[j]], sumea));
00583         }       
00584       }
00585       for(int i = 0; i < (int)_names.size(); i++){
00586         for(int j = i+1; j < (int)_names.size(); j++){
00587           ppAlign[_names[i]][_names[j]].reset();
00588           ppAlign[_names[i]][_names[j]] = consistentPPAlign[_names[i]][_names[j]];
00589         }
00590       }
00591     }
00592   }
00593 
00594   void MultipleAlignment::classicAlConsistency(map< string, map< string, SparseMatrixPtr > > &ppAlign, int numit){
00595     for(int r = 0; r < numit; r++){
00596       map< string, map< string, SparseMatrixPtr > > consistentPPAlign;
00597       for(int i = 0; i < (int)_names.size(); i++){
00598         for(int j = i+1; j < (int)_names.size(); j++){
00599           fMatrix postMatch;
00600           ppAlign[_names[i]][_names[j]]->getfMatrixTimesX(postMatch, 2.0);
00601           for(int m = 0; m < (int)_names.size(); m++){
00602             if(m == i || m ==j)
00603               continue;
00604             else if(m < i){
00605               ppAlign[_names[m]][_names[i]]->leftTransXright(ppAlign[_names[m]][_names[j]], postMatch);
00606             }
00607             else if(m > i && m < j){
00608               ppAlign[_names[i]][_names[m]]->leftXright(ppAlign[_names[m]][_names[j]], postMatch);
00609             }
00610             else if(m > j){
00611               SparseMatrixPtr AUX = SparseMatrixPtr(new SparseMatrix());
00612               AUX->transposeOf(ppAlign[_names[j]][_names[m]]);
00613               ppAlign[_names[i]][_names[m]]->leftXright(AUX, postMatch);
00614               AUX.reset();
00615             }
00616           }
00617           consistentPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postMatch,ppAlign[_names[i]][_names[j]], (float)_names.size()));
00618         }       
00619       }
00620       for(int i = 0; i < (int)_names.size(); i++){
00621         for(int j = i+1; j < (int)_names.size(); j++){
00622           ppAlign[_names[i]][_names[j]].reset();
00623           ppAlign[_names[i]][_names[j]] = consistentPPAlign[_names[i]][_names[j]];
00624         }
00625       }
00626     }
00627   }
00628 
00629   void MultipleAlignment::predalignAlConsistencyNoEas(map< string, map< string, SparseMatrixPtr > > &ppAlign, map< string, map< string, SparseMatrixPtr > > &ppGap1,map< string, map< string, SparseMatrixPtr > > &ppGap2, int numit){
00630 
00631     for(int r = 0; r < numit; r++){
00632       map< string, map< string, SparseMatrixPtr > > consistentPPAlign;
00633       map< string, map< string, SparseMatrixPtr > > consistentPPGap1;
00634       map< string, map< string, SparseMatrixPtr > > consistentPPGap2;
00635       for(int i = 0; i < (int)_names.size(); i++){
00636         for(int j = i+1; j < (int)_names.size(); j++){
00637           fMatrix postMatch;
00638           fMatrix postGap1;
00639           fMatrix postGap2;
00640           ppAlign[_names[i]][_names[j]]->getfMatrixTimesX(postMatch, 2.0);
00641           ppGap1[_names[i]][_names[j]]->getfMatrixTimesX(postGap1, 1.0);
00642           ppGap2[_names[i]][_names[j]]->getfMatrixTimesX(postGap2, 1.0);
00643           for(int m = 0; m < (int)_names.size(); m++){
00644             if(m == i || m == j)
00645               continue;
00646             else if(m < i){
00647               ppAlign[_names[m]][_names[i]]->leftTransXright(ppAlign[_names[m]][_names[j]], postMatch);
00648               ppGap1[_names[m]][_names[i]]->leftTransXright(ppGap1[_names[m]][_names[j]], postMatch);
00649               ppAlign[_names[m]][_names[i]]->leftTransXright(ppGap2[_names[m]][_names[j]], postGap2);
00650               ppGap2[_names[m]][_names[i]]->leftTransXright(ppAlign[_names[m]][_names[j]], postGap1);
00651             }
00652             else if(m > i && m < j){
00653               ppAlign[_names[i]][_names[m]]->leftXright(ppAlign[_names[m]][_names[j]], postMatch);
00654               ppGap2[_names[i]][_names[m]]->leftXright(ppGap1[_names[m]][_names[j]], postMatch);
00655               ppAlign[_names[i]][_names[m]]->leftXright(ppGap2[_names[m]][_names[j]], postGap2);
00656               ppGap1[_names[i]][_names[m]]->leftXright(ppAlign[_names[m]][_names[j]], postGap1);
00657             }
00658             else if(m > j){
00659               SparseMatrixPtr AUX1 = SparseMatrixPtr(new SparseMatrix());
00660               SparseMatrixPtr AUX2 = SparseMatrixPtr(new SparseMatrix());
00661               SparseMatrixPtr AUX3 = SparseMatrixPtr(new SparseMatrix());
00662               AUX1->transposeOf(ppAlign[_names[j]][_names[m]]);
00663               ppAlign[_names[i]][_names[m]]->leftXright(AUX1, postMatch);
00664               ppGap1[_names[i]][_names[m]]->leftXright(AUX1, postGap1);
00665               AUX1.reset();
00666               AUX2->transposeOf(ppGap2[_names[j]][_names[m]]);
00667               ppGap2[_names[i]][_names[m]]->leftXright(AUX2, postMatch);
00668               AUX2.reset();
00669               AUX3->transposeOf(ppGap1[_names[j]][_names[m]]);
00670               ppAlign[_names[i]][_names[m]]->leftXright(AUX3, postGap2);
00671               AUX3.reset();
00672             }
00673           }
00674           consistentPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postMatch,ppAlign[_names[i]][_names[j]], (float)_names.size()));
00675           consistentPPGap1[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postGap1,ppGap1[_names[i]][_names[j]], (float)(_names.size()-1)));
00676           consistentPPGap2[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postGap2,ppGap2[_names[i]][_names[j]], (float)(_names.size()-1)));
00677         }       
00678       }
00679       for(int i = 0; i < (int)_names.size(); i++){
00680         for(int j = i+1; j < (int)_names.size(); j++){
00681           ppAlign[_names[i]][_names[j]].reset();
00682           ppAlign[_names[i]][_names[j]] = consistentPPAlign[_names[i]][_names[j]];
00683           ppGap1[_names[i]][_names[j]].reset();
00684           ppGap1[_names[i]][_names[j]] = consistentPPGap1[_names[i]][_names[j]];
00685           ppGap2[_names[i]][_names[j]].reset();
00686           ppGap2[_names[i]][_names[j]] = consistentPPGap2[_names[i]][_names[j]];
00687         }
00688       }
00689     }
00690     for(int i = 0; i < (int)_names.size(); i++){
00691       for(int j = i+1; j < (int)_names.size(); j++){
00692         ppAlign[_names[i]][_names[j]]->addGaps(ppGap1[_names[i]][_names[j]], ppGap2[_names[i]][_names[j]]);
00693       }
00694     }
00695   }
00696 
00697   void MultipleAlignment::predalignAlConsistencyWithEas(map< string, map< string, SparseMatrixPtr > > &ppAlign, map< string, map< string, SparseMatrixPtr > > &ppGap1,map< string, map< string, SparseMatrixPtr > > &ppGap2, int numit, map<string, map<string, float> > eas){
00698 
00699     for(int r = 0; r < numit; r++){
00700       map< string, map< string, SparseMatrixPtr > > consistentPPAlign;
00701       map< string, map< string, SparseMatrixPtr > > consistentPPGap1;
00702       map< string, map< string, SparseMatrixPtr > > consistentPPGap2;
00703       for(int i = 0; i < (int)_names.size(); i++){
00704         for(int j = i+1; j < (int)_names.size(); j++){
00705           fMatrix postMatch;
00706           fMatrix postGap1;
00707           fMatrix postGap2;
00708           float sumea = 2.0*eas[_names[i]][_names[j]]*eas[_names[i]][_names[j]];
00709           ppAlign[_names[i]][_names[j]]->getfMatrixTimesX(postMatch, 2.0*eas[_names[i]][_names[j]]*eas[_names[i]][_names[j]]);
00710           ppGap1[_names[i]][_names[j]]->getfMatrixTimesX(postGap1, 1.0);
00711           ppGap2[_names[i]][_names[j]]->getfMatrixTimesX(postGap2, 1.0);
00712           for(int m = 0; m < (int)_names.size(); m++){
00713             if(m == i || m == j)
00714               continue;
00715             else if(m < i){
00716               ppAlign[_names[m]][_names[i]]->leftTransXright(ppAlign[_names[m]][_names[j]], postMatch, eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]]);
00717               ppGap1[_names[m]][_names[i]]->leftTransXright(ppGap1[_names[m]][_names[j]], postMatch, eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]]);
00718               ppAlign[_names[m]][_names[i]]->leftTransXright(ppGap2[_names[m]][_names[j]], postGap2, eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]]);
00719               ppGap2[_names[m]][_names[i]]->leftTransXright(ppAlign[_names[m]][_names[j]], postGap1, eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]]);
00720               sumea += eas[_names[m]][_names[i]]*eas[_names[m]][_names[j]];
00721             }
00722             else if(m > i && m < j){
00723               ppAlign[_names[i]][_names[m]]->leftXright(ppAlign[_names[m]][_names[j]], postMatch, eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]]);
00724               ppGap2[_names[i]][_names[m]]->leftXright(ppGap1[_names[m]][_names[j]], postMatch, eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]]);
00725               ppAlign[_names[i]][_names[m]]->leftXright(ppGap2[_names[m]][_names[j]], postGap2, eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]]);
00726               ppGap1[_names[i]][_names[m]]->leftXright(ppAlign[_names[m]][_names[j]], postGap1, eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]]);
00727               sumea += eas[_names[i]][_names[m]]*eas[_names[m]][_names[j]];
00728             }
00729             else if(m > j){
00730               SparseMatrixPtr AUX1 = SparseMatrixPtr(new SparseMatrix());
00731               SparseMatrixPtr AUX2 = SparseMatrixPtr(new SparseMatrix());
00732               SparseMatrixPtr AUX3 = SparseMatrixPtr(new SparseMatrix());
00733               AUX1->transposeOf(ppAlign[_names[j]][_names[m]]);
00734               ppAlign[_names[i]][_names[m]]->leftXright(AUX1, postMatch, eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]]);
00735               ppGap1[_names[i]][_names[m]]->leftXright(AUX1, postGap1, eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]]);
00736               AUX1.reset();
00737               AUX2->transposeOf(ppGap2[_names[j]][_names[m]]);
00738               ppGap2[_names[i]][_names[m]]->leftXright(AUX2, postMatch, eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]]);
00739               AUX2.reset();
00740               AUX3->transposeOf(ppGap1[_names[j]][_names[m]]);
00741               ppAlign[_names[i]][_names[m]]->leftXright(AUX3, postGap2, eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]]);
00742               AUX3.reset();
00743               sumea += eas[_names[i]][_names[m]]*eas[_names[j]][_names[m]];
00744             }
00745           }
00746           consistentPPAlign[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postMatch,ppAlign[_names[i]][_names[j]], sumea));
00747           consistentPPGap1[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postGap1,ppGap1[_names[i]][_names[j]], sumea));
00748           consistentPPGap2[_names[i]][_names[j]] = SparseMatrixPtr(new SparseMatrix(postGap2,ppGap2[_names[i]][_names[j]], sumea));
00749         }       
00750       }
00751       for(int i = 0; i < (int)_names.size(); i++){
00752         for(int j = i+1; j < (int)_names.size(); j++){
00753           ppAlign[_names[i]][_names[j]].reset();
00754           ppAlign[_names[i]][_names[j]] = consistentPPAlign[_names[i]][_names[j]];
00755           ppGap1[_names[i]][_names[j]].reset();
00756           ppGap1[_names[i]][_names[j]] = consistentPPGap1[_names[i]][_names[j]];
00757           ppGap2[_names[i]][_names[j]].reset();
00758           ppGap2[_names[i]][_names[j]] = consistentPPGap2[_names[i]][_names[j]];
00759         }
00760       }
00761     }
00762     for(int i = 0; i < (int)_names.size(); i++){
00763       for(int j = i+1; j < (int)_names.size(); j++){
00764         ppAlign[_names[i]][_names[j]]->addGaps(ppGap1[_names[i]][_names[j]], ppGap2[_names[i]][_names[j]]);
00765       }
00766     }
00767   }
00768 
00769   void MultipleAlignment::makeAlignmentConsistentWithPrediction(map<string, SparseMatrixPtr > &ppPred, map< string, map< string, SparseMatrixPtr > > &ppAlign, int numit){
00770     for(int i = 0; i < (int)_names.size(); i++){
00771       for(int j = i+1; j < (int)_names.size(); j++){
00772         ppAlign[_names[i]][_names[j]]->applyPrediction(ppPred[_names[i]], ppPred[_names[j]]);
00773       }
00774     }
00775     classicAlConsistency(ppAlign,numit);
00776   }
00777 
00778   void MultipleAlignment::makePredictionConsistentWithAlignment(map<string, SparseMatrixPtr > &ppPred, map< string, map<string, SparseMatrixPtr> > &ppAlign, int numit){
00779     for(int r = 0; r < numit; r++){
00780       map< string, SparseMatrixPtr > consistentPPpred;
00781       for(int i = 0; i < (int)_names.size(); i++){
00782         fMatrix postPred;
00783         ppPred[_names[i]]->getfMatrixTimesX(postPred, 1.0);
00784         for(int m = 0; m < (int)_names.size(); m++){
00785           if(m == i)
00786             continue;
00787           else if(m < i){
00788             ppAlign[_names[m]][_names[i]]->leftTransXright(ppPred[_names[m]], postPred);
00789           }
00790           else{
00791             ppAlign[_names[i]][_names[m]]->leftXright(ppPred[_names[m]], postPred);
00792           }
00793         }
00794         consistentPPpred[_names[i]] = SparseMatrixPtr(new SparseMatrix(postPred,ppPred[_names[i]], (float)_names.size()));
00795       } 
00796       for(int i = 0; i < (int)_names.size(); i++){
00797         ppPred[_names[i]].reset();
00798         ppPred[_names[i]] = consistentPPpred[_names[i]];
00799       }
00800     }
00801   }     
00802 
00803 
00805   //Alignment construction functions///////////////////////////
00807 
00808   bool compare(postProb first, postProb second){
00809     if(first.prob >= second.prob)
00810       return true;
00811     return false;
00812   }
00813 
00814   void MultipleAlignment::initializePostProbsList(map< string, map< string, SparseMatrixPtr > > &ppAlign){
00815     for(int i = 0; i < (int)_names.size(); i++){
00816       for(int j = i+1; j < (int)_names.size(); j++){
00817         bool hasElement = true;
00818         while(hasElement){
00819           int s1, p1, s2, p2;
00820           postProb p;
00821           s1 = i;
00822           s2 = j;
00823           hasElement = ppAlign[_names[i]][_names[j]]->next(&p1,&p2,&(p.prob));
00824           p.pos1.first = s1;
00825           p.pos1.second = p1-1;
00826           p.pos2.first = s2;
00827           p.pos2.second = p2-1;
00828           /*p.prob = (p.prob)/(ppAlign[_names[i]][_names[j]]->get(p1,_seqs[j].size())+ppAlign[_names[i]][_names[j]]->get(_seqs[i].size(),p2));
00829           if(p.prob < 0.5){
00830             continue;
00831           }
00832           if(p1 == (int)_seqs[s1].size() || p2 == (int)_seqs[s2].size())
00833           continue;*/
00834           //cout << "s1 = " << s1 << " p1 = " << p1 << " s2 = " << s2 << " p2 = " << p2 << endl;
00835           _postProbs.push_back(p);
00836         }
00837       }
00838     }
00839     _postProbs.sort(compare);
00840   }
00841 
00842   void MultipleAlignment::generateAlignment(ProbabilisticModelPtr almodel){
00843     vector<Sequence> als;
00844     for(int i = 0; i < (int)_seqs.size(); i++){
00845       Sequence s(_graph.size(),almodel->pairDecodable()->getGapId());
00846       als.push_back(s);
00847     }
00848     for(int i = 0; i < (int)_graph.size(); i++){
00849       for(int k = 0; k < (int)_graph[i].colSeqPos.size(); k++){
00850         if(_graph[i].colSeqPos[k].second == (int)_seqs[_graph[i].colSeqPos[k].first].size())
00851           continue;
00852         als[_graph[i].colSeqPos[k].first][i] = _seqs[_graph[i].colSeqPos[k].first][_graph[i].colSeqPos[k].second];
00853       }
00854     }
00855     for(int i = 0; i < (int)_seqs.size(); i++){
00856       SequenceEntry se(almodel->alphabet());
00857       se.setName(_names[i]);
00858       se.setSequence(als[i]);
00859       _alignment.push_back(se);
00860     }
00861   }
00862 
00863 
00864 
00865   void MultipleAlignment::generateGraph(){
00866     //Initialize all possible columns
00867     _numberOfColumns = 0;
00868     int numSeqs = _seqs.size();
00869     int maxSeqSize = 0;
00870     for(int i = 0; i < numSeqs; i++){
00871       if((int)_seqs[i].size() > maxSeqSize)
00872         maxSeqSize = _seqs[i].size();
00873     }
00874 
00875     int index = 0;
00876     for(int j = 0; j < maxSeqSize; j++){
00877       for(int i = 0; i < numSeqs; i++){
00878         if(j >= (int)_seqs[i].size())
00879           continue;
00880         seqPos sp;
00881         column c;
00882         sp.first = i;
00883         sp.second = j;
00884         c.index = index;
00885         _seqPosMap[sp] = index;
00886         index++;
00887         c.isDead = false;
00888         c.visited = false;
00889         c.colSeqPos.push_back(sp);
00890         _graph.push_back(c);
00891         _numberOfColumns++;
00892       }
00893     }
00894 
00895     while(!_postProbs.empty()){
00896       addToGappedGraph(*_postProbs.begin());
00897       _postProbs.erase(_postProbs.begin());
00898     }
00899     findGraphMcd();
00900   }
00901 
00902   void MultipleAlignment::addToGraph(postProb p){
00903     int index1 = _seqPosMap[p.pos1];
00904     int index2 = _seqPosMap[p.pos2];
00905     int ub,lb;
00906     seqPos ubsp, lbsp;
00907     if(index1 > index2){
00908       ub = index1;
00909       ubsp = p.pos1;
00910       lb = index2;
00911       lbsp = p.pos2;
00912     }
00913     else if(index2 > index1){
00914       ub = index2;
00915       ubsp = p.pos2;
00916       lb = index1;
00917       lbsp = p.pos1;
00918     }
00919     else
00920       return;
00921 
00922     vector<column> df,db;
00923     _graph[ub].colSeqPos.push_back(lbsp);
00924     bool cycle = dfsf(ub,_graph[lb],df);
00925     unvisit(df);
00926     if(cycle){
00927       _graph[ub].colSeqPos.pop_back();
00928       return;
00929     }
00930     dfsb(lb,_graph[ub],db);
00931     unvisit(db);
00932     if(db[0].index != ub)
00933       cerr << "DEU PAUU!!" << endl;
00934     if(df[0].index != lb)
00935       cerr << "DEU PAUU!!" << endl;
00936     assert(db[0].index == ub);
00937     assert(df[0].index == lb);
00938     merge(&db[0],&df[0]);
00939     reorder(df,db);    
00940   }
00941 
00942   bool MultipleAlignment::dfsf(int ub, column n, vector<column> &df){
00943     _graph[n.index].visited = true;
00944     df.push_back(n);
00945     for(int i = 0; i < (int)n.colSeqPos.size(); i++){
00946       seqPos w = n.colSeqPos[i];
00947       w.second++;
00948       if(w.second >= (int)_seqs[w.first].size())
00949         continue;
00950       int next_index = _seqPosMap[w];
00951       if(next_index == ub){
00952         return true;
00953       }
00954       if(next_index > ub || _graph[next_index].visited)
00955         continue;
00956       bool cycle = dfsf(ub,_graph[next_index],df);
00957       if(cycle){
00958         return true;
00959       }
00960     }
00961     return false;
00962   }
00963 
00964   void MultipleAlignment::dfsb(int lb, column n, vector<column> &db){
00965     _graph[n.index].visited = true;
00966     db.push_back(n);
00967     for(int i = 0; i < (int)n.colSeqPos.size(); i++){
00968       seqPos w = n.colSeqPos[i];
00969       if(w.second == (int)_seqs[w.first].size())
00970         continue;
00971       w.second--;
00972       if(w.second < 0)
00973         continue;
00974       int next_index = _seqPosMap[w];
00975       if(next_index < lb || _graph[next_index].visited)
00976         continue;
00977       dfsb(lb,_graph[next_index],db);
00978     }
00979   }
00980 
00981   void MultipleAlignment::merge(column *ub, column *lb){
00982     lb->isDead = true;
00983     _numberOfColumns--;
00984     bool flag = false;
00985     for(int i = 0; i < (int)(lb->colSeqPos).size(); i++){
00986       for(int j = 0; j < (int)(ub->colSeqPos).size(); j++){
00987         if(lb->colSeqPos[i].first == ub->colSeqPos[j].first && lb->colSeqPos[i].second == ub->colSeqPos[j].second)
00988           flag = true;
00989       }
00990       if(flag == false)
00991         ub->colSeqPos.push_back(lb->colSeqPos[i]);
00992       flag = false;
00993     }
00994     lb->colSeqPos.clear();
00995   }
00996     
00997   bool smallerIndex(column a, column b){
00998     if(a.index < b.index)
00999       return true;
01000     return false;
01001   }
01002 
01003   bool biggerIndex(column a, column b){
01004     if(a.index >= b.index)
01005       return true;
01006     return false;
01007   }
01008 
01009   void MultipleAlignment::reorder(vector<column> &df, vector<column> &db){
01010     vector<int> L;
01011     int i = 0;
01012     int j = 0;
01013     sort(df.begin(), df.end(), smallerIndex);
01014     sort(db.begin(), db.end(),smallerIndex);
01015     for(int k = 0; k < (int)(db.size()+df.size()); k++){
01016       if(i >= (int)db.size()){
01017         L.push_back(df[j].index);
01018         j++;
01019         continue;
01020       }
01021       if(j >= (int)df.size()){
01022         L.push_back(db[i].index);
01023         i++;
01024         continue;
01025       }
01026       if(db[i].index < df[j].index){
01027         L.push_back(db[i].index);
01028         i++;
01029       }
01030       else{
01031         L.push_back(df[j].index);
01032         j++;
01033       }
01034     }
01035     i = 0;
01036     for(int k = 0; k < (int)db.size(); k++){
01037       db[k].index = L[i];
01038       for(j = 0; j < (int)db[k].colSeqPos.size(); j++){
01039         _seqPosMap[db[k].colSeqPos[j]] = L[i];
01040       }
01041       _graph[L[i]] = db[k];
01042       i++;
01043     }
01044     for(int k = 0; k < (int)df.size(); k++){
01045       df[k].index = L[i];
01046       for(j = 0; j < (int)df[k].colSeqPos.size(); j++){
01047         _seqPosMap[df[k].colSeqPos[j]] = L[i];
01048       }
01049       _graph[L[i]] = df[k];
01050       i++;
01051     }
01052   }
01053 
01054   void MultipleAlignment::findGraphMcd(){
01055     //find nodes with first positions in sequences
01056     //unvisit();
01057     std::stack< column* > nodeStack;
01058     vector<bool> inStack (_graph.size(),false);
01059     vector<int> finishingTimes (_graph.size(),-1);
01060     vector<int> initialColumns;
01061     for(int i = 0; i < (int)_seqs.size(); i++){
01062       seqPos sp;
01063       sp.first = i;
01064       sp.second = 0;
01065       if(!inStack[_seqPosMap[sp]]){
01066         initialColumns.push_back(_seqPosMap[sp]);
01067         inStack[_seqPosMap[sp]] = true;
01068       }
01069     }
01070     sort(initialColumns.begin(), initialColumns.end());
01071     for(int i = 0; i < (int)initialColumns.size(); i++){
01072       nodeStack.push(&_graph[initialColumns[i]]);
01073     }
01074     int time = 0;
01075     while(!nodeStack.empty()){
01076       column *c = nodeStack.top();
01077       nodeStack.pop();
01078       if(c->visited){
01079         finishingTimes[c->index] = time++;
01080         continue;
01081       }
01082       c->visited = true;
01083       nodeStack.push(c);
01084       
01085       vector<int> neighbors;
01086       for(int i = 0; i < (int)c->colSeqPos.size(); i++){
01087         seqPos sp = c->colSeqPos[i];
01088         sp.second++;
01089         if(sp.second >= (int)_seqs[sp.first].size())
01090           continue;
01091         if(!inStack[_seqPosMap[sp]]){
01092           neighbors.push_back(_seqPosMap[sp]);
01093           inStack[_seqPosMap[sp]] = true;
01094         }
01095       }
01096       sort(neighbors.begin(), neighbors.end());
01097       for(int i = 0; i < (int)neighbors.size(); i++){
01098         nodeStack.push(&_graph[neighbors[i]]);
01099       }
01100     }
01101     
01102     vector<column> tempGraph;
01103     for(int i = 0; i < (int)_graph.size(); i++){
01104       if(_graph[i].isDead){
01105         continue;
01106       }
01107       if(finishingTimes[i] < 0)
01108         cerr << "DEU PAU TIMES" << endl;
01109       _graph[i].index = finishingTimes[i]; 
01110       tempGraph.push_back(_graph[i]);
01111     }
01112     sort(tempGraph.begin(), tempGraph.end(), biggerIndex);
01113     for(int i = 0; i < _numberOfColumns; i++){
01114       tempGraph[i].index = i;
01115       _graph[i] = tempGraph[i];
01116     }
01117     _graph.erase(_graph.begin()+_numberOfColumns,_graph.end());
01118   }
01119       
01120   void MultipleAlignment::unvisit(vector<column> &v){
01121     for(int i = 0; i < (int)v.size(); i++){
01122       _graph[v[i].index].visited = false;
01123     }
01124   }
01125 
01126   void MultipleAlignment::addToGappedGraph(postProb p){
01127     if(p.pos1.second == (int)_seqs[p.pos1.first].size() && p.pos2.second == (int)_seqs[p.pos2.first].size()){
01128       cout << "Tried to add gap-gap pair to the graph" << endl;
01129       exit(-1);
01130     }
01131     if(p.pos1.second == (int)_seqs[p.pos1.first].size()){
01132       int index = _seqPosMap[p.pos2];
01133       for(int i = 0; i < (int)_graph[index].colSeqPos.size(); i++){
01134         if(p.pos1.first == _graph[index].colSeqPos[i].first)
01135           return;
01136       }
01137       _graph[index].colSeqPos.push_back(p.pos1);
01138       return;
01139     }
01140     if(p.pos2.second == (int)_seqs[p.pos2.first].size()){
01141       int index = _seqPosMap[p.pos1];
01142       for(int i = 0; i < (int)_graph[index].colSeqPos.size(); i++){
01143         if(p.pos2.first == _graph[index].colSeqPos[i].first)
01144           return;
01145       }
01146       _graph[index].colSeqPos.push_back(p.pos2);
01147       return;
01148     }
01149     
01150     int index1 = _seqPosMap[p.pos1];
01151     int index2 = _seqPosMap[p.pos2];
01152     int ub,lb;
01153     seqPos ubsp, lbsp;
01154     if(index1 > index2){
01155       ub = index1;
01156       ubsp = p.pos1;
01157       lb = index2;
01158       lbsp = p.pos2;
01159     }
01160     else if(index2 > index1){
01161       ub = index2;
01162       ubsp = p.pos2;
01163       lb = index1;
01164       lbsp = p.pos1;
01165     }
01166     else
01167       return;
01168 
01169     for(int i = 0; i < (int)_graph[ub].colSeqPos.size(); i++){
01170       for(int j = 0; j < (int)_graph[lb].colSeqPos.size(); j++){
01171         if(_graph[ub].colSeqPos[i].first == _graph[lb].colSeqPos[i].first && _graph[ub].colSeqPos[i].second != _graph[lb].colSeqPos[i].second)
01172           return;
01173       }
01174     }
01175 
01176     vector<column> df,db;
01177     _graph[ub].colSeqPos.push_back(lbsp);
01178     bool cycle = dfsf(ub,_graph[lb],df);
01179     unvisit(df);
01180     if(cycle){
01181       _graph[ub].colSeqPos.pop_back();
01182       return;
01183     }
01184     dfsb(lb,_graph[ub],db);
01185     unvisit(db);
01186     if(db[0].index != ub)
01187       cerr << "DEU PAUU!!" << endl;
01188     if(df[0].index != lb)
01189       cerr << "DEU PAUU!!" << endl;
01190     assert(db[0].index == ub);
01191     assert(df[0].index == lb);
01192     merge(&db[0],&df[0]);
01193     reorder(df,db);    
01194   }
01195 
01196 
01200 
01201   void MultipleAlignment::printPPList(){
01202     if(_postProbs.empty())
01203       return;
01204     list<postProb>::iterator it = _postProbs.begin();
01205     while(it != _postProbs.end()){
01206       cout << "(" << (*it).prob << "," << (*it).pos1.first << "," << (*it).pos1.second << "," << (*it).pos2.first << "," << (*it).pos2.second << ") ";
01207       it++;
01208     }
01209     cout << endl;
01210   }
01211 }