ToPS
DiscreteIIDModel.cpp
00001 /*
00002  *       DiscreteIIDModel.cpp
00003  *
00004  *       Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br>
00005  *                      Ígor Bonádio <ibonadio@ime.usp.br>
00006  *                      Vitor Onuchic <vitoronuchic@gmail.com>
00007  *                      Alan Mitchell Durham <aland@usp.br>
00008  *
00009  *       This program is free software; you can redistribute it and/or modify
00010  *       it under the terms of the GNU  General Public License as published by
00011  *       the Free Software Foundation; either version 3 of the License, or
00012  *       (at your option) any later version.
00013  *
00014  *       This program is distributed in the hope that it will be useful,
00015  *       but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  *       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  *       GNU General Public License for more details.
00018  *
00019  *       You should have received a copy of the GNU General Public License
00020  *       along with this program; if not, write to the Free Software
00021  *       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00022  *       MA 02110-1301, USA.
00023  */
00024 
00025 #include "DiscreteIIDModel.hpp"
00026 #include "DiscreteIIDModelCreator.hpp"
00027 #include "Symbol.hpp"
00028 #include <iostream>
00029 #include <cmath>
00030 #include <sstream>
00031 #include <vector>
00032 #include <iterator>
00033 
00034 namespace tops {
00035 
00036   DiscreteIIDModel:: DiscreteIIDModel(const DoubleVector & probabilities)
00037   {
00038     _size = probabilities.size() - 1;
00039     _log_probabilities.resize(_size+1);
00040     double  sum = 0;
00041     for(unsigned int i = 0; i < probabilities.size(); i++)
00042       sum += probabilities[i];
00043     for(int i = 0; (i <= _size); i++)
00044       if(i >= (int)probabilities.size())
00045         _log_probabilities[i] = -HUGE;
00046       else if(close(probabilities[i], 0.0, 1e-10))
00047         _log_probabilities[i] = -HUGE;
00048       else
00049         _log_probabilities[i] = log(probabilities[i]/sum);
00050   }
00051 
00052   DiscreteIIDModel::DiscreteIIDModel(const Matrix & probabilities){
00053     int size1 = probabilities.size1();
00054     int size2 = probabilities.size2();
00055     _size = size1 * size2 - 1;
00056     _log_probabilities_matrix.resize(size1, size2);
00057     double sum = 0;
00058     for(int i = 0; i < size1; i++){
00059       for(int j = 0; j < size2; j++){
00060         if(close(probabilities(i,j), 0.0, 1e-10))
00061           continue;
00062         sum += probabilities(i,j);
00063       }
00064     }
00065     for(int i = 0; i < size1; i++){
00066       for(int j = 0; j < size2; j++){
00067         if(close(probabilities(i,j), 0.0, 1e-10))
00068           _log_probabilities_matrix(i,j) = -HUGE;
00069         else
00070           _log_probabilities_matrix(i,j) = log(probabilities(i,j)/sum);
00071       }
00072       _geometric_tail = false;
00073     }
00074   }
00075 
00076   void DiscreteIIDModel::setProbabilities(const DoubleVector & probabilities) {
00077     _size = probabilities.size() - 1;
00078     _log_probabilities.resize(_size+1);
00079     double  sum = 0;
00080     for(unsigned int i = 0; i < probabilities.size(); i++)
00081       sum += probabilities[i];
00082     for(int i = 0; (i <= _size); i++)
00083       if(i >= (int)probabilities.size())
00084         _log_probabilities[i] = -HUGE;
00085       else if(close(probabilities[i], 0.0, 1e-10))
00086         _log_probabilities[i] = -HUGE;
00087       else
00088         _log_probabilities[i] = log(probabilities[i]/sum);
00089   }
00090  double DiscreteIIDModel::choosePosition(const Sequence & s, int i ) const {
00091    return choose();
00092  }
00093 
00094   double DiscreteIIDModel::evaluatePosition(const Sequence & s, unsigned int i) const  {
00095     return log_probability_of(s[i]);
00096   }
00097 
00098    double DiscreteIIDModel::log_probability_of(int s) const  {
00099      if ((s>=0) && (s < (int)(_log_probabilities.size() )))
00100         return _log_probabilities[s];
00101     return -HUGE;
00102    }
00103 
00104   double DiscreteIIDModel::log_probability_of_pair(int s1, int s2) const  {
00105      if ((s1>=0) && (s1 < (int)(_log_probabilities_matrix.size1())) && (s2>=0) && (s2 < (int)(_log_probabilities_matrix.size2())))
00106        return _log_probabilities_matrix(s1,s2);
00107       else {
00108         return -HUGE;
00109       }
00110    }
00111 
00112   double DiscreteIIDModel::log_probability_of(int s, double new_value) {
00113     _log_probabilities[s] = new_value;
00114     return log_probability_of(s);
00115   }
00116 
00117   double DiscreteIIDModel::log_probability_of_pair(int s1, int s2, double new_value) {
00118     _log_probabilities_matrix(s1,s2) = new_value;
00119     return log_probability_of_pair(s1,s2);
00120   }
00121 
00122   int DiscreteIIDModel::size() const
00123   {
00124     return _size;
00125   }
00126 
00127   double DiscreteIIDModel:: choose() const {
00128     double random = ((double)rand())/ ((double) RAND_MAX + 1.0) ;
00129     int result;
00130     for(result = 0; result < (int)_log_probabilities.size(); result++)
00131       {
00132         random -= exp(_log_probabilities[result]);
00133         if(random <= 0)
00134           break;
00135       }
00136     if(result >= (int)_log_probabilities.size())
00137       return (double)(result-1);
00138     return (double)result;
00139   }
00140 
00141   void DiscreteIIDModel::choosePair(int* a, int* b) const {
00142     double random = ((double)rand())/ ((double) RAND_MAX + 1.0) ;
00143     int result1 = 0;
00144     int result2 = 0;
00145     for(result1 = 0; result1 < (int)_log_probabilities_matrix.size1(); result1++){
00146       for(result2 = 0; result2 < (int)_log_probabilities_matrix.size2(); result2++){
00147         random -= exp(_log_probabilities_matrix(result1,result2));
00148         if(random <= 0)
00149           break;
00150       }
00151       if(random <= 0)
00152         break;
00153     }
00154     if(result1 >= (int)_log_probabilities_matrix.size1() && result2 >= (int)_log_probabilities_matrix.size2()){
00155       *a = (result1 - 1);
00156       *b = (result2 - 1);
00157     }
00158     else{
00159       *a = result1;
00160       *b = result2;
00161     }
00162   }
00163 
00164   ProbabilisticModelCreatorPtr DiscreteIIDModel::getFactory () const{
00165     return DiscreteIIDModelCreatorPtr(new DiscreteIIDModelCreator());
00166   }
00167 
00168   std::string DiscreteIIDModel::str () const {
00169     std::stringstream out;
00170     AlphabetPtr alpha = alphabet();
00171 
00172     out << "model_name = \"DiscreteIIDModel\"\n" ;
00173     out << "probabilities = (";
00174     if(_log_probabilities.size() > 0){
00175       out << exp(_log_probabilities[0]);
00176       for(int k = 1; k < (int)_log_probabilities.size(); k++)
00177         {
00178           out << "," << exp(_log_probabilities[k]);
00179         }
00180       out << ")\n";
00181     }
00182     if(alpha != NULL)
00183       {
00184         out << alpha->str();
00185       }
00186     return out.str();
00187   }
00188 
00189 
00190   void DiscreteIIDModel::strMatrix () const {
00191     if(_log_probabilities_matrix.size1() > 0){
00192       for(int i = 0; i < (int)_log_probabilities_matrix.size1(); i++){
00193         for(int j = 0; j < (int)_log_probabilities_matrix.size2(); j++){
00194           if(j == (int)_log_probabilities_matrix.size2() -1)
00195             std::cout << exp(_log_probabilities_matrix(i,j)) << endl;
00196           else
00197             std::cout << exp(_log_probabilities_matrix(i,j)) << " ";
00198         }
00199       }
00200       cout << endl << endl;
00201     }
00202   }
00203 
00204   void DiscreteIIDModel::initializeFromMap(const std::map <std::string, double> & probabilities, AlphabetPtr alphabet)
00205   {
00206     std::map<std::string,double>::const_iterator it;
00207     DoubleVector probs;
00208     probs.resize(alphabet->size());
00209     for(int i = 0; i < (int)alphabet->size()  ; i++)
00210       probs[i] = 0.0;
00211     for(it = probabilities.begin(); it != probabilities.end(); it++)
00212       {
00213 
00214         int id = alphabet->getSymbol(it->first)->id();
00215         if(id < (int)probs.size())
00216           probs[id] = it->second;
00217       }
00218     _log_probabilities.resize(probs.size());
00219     double  sum = 0;
00220     for(unsigned int i = 0; i < probs.size(); i++)
00221       {
00222         sum += probs[i] ;
00223         if(close(probs[i], 0.0, 1e-100))
00224           _log_probabilities[i] = -HUGE;
00225         else
00226           _log_probabilities[i] = log(probs[i]);
00227       }
00228     if(close(sum, 1.0, 1e-5))
00229       _log_probabilities.push_back(-HUGE);
00230     else {
00231       _log_probabilities[probs.size()-1] = log(1.0 - sum);
00232       std::cerr << "WARNING: DiscreteIIDModel parameters must sum 1.0: sum is " << sum << std::endl;
00233     }
00234     setAlphabet(alphabet);
00235   }
00236 
00237   void DiscreteIIDModel::initialize(const ProbabilisticModelParameters & p) {
00238     ProbabilisticModelParameterValuePtr probs = p.getMandatoryParameterValue("probabilities");
00239     ProbabilisticModelParameterValuePtr alphabet = p.getOptionalParameterValue("alphabet");
00240     ProbabilisticModelParameterValuePtr geometric_tail_ptr = p.getOptionalParameterValue("geometric_tail");
00241     DoubleVector distr = probs->getDoubleVector();
00242     setProbabilities(distr);
00243     if (alphabet != NULL)
00244       {
00245         AlphabetPtr alpha = AlphabetPtr(new Alphabet());
00246         alpha->initializeFromVector(alphabet->getStringVector());
00247         setAlphabet(alpha);
00248         if(geometric_tail_ptr != NULL) {
00249           _geometric_tail = (geometric_tail_ptr->getInt() == 1);
00250         }
00251       }
00252     _mean = 0.0;
00253     for (int i = 0 ; i < _log_probabilities.size(); i++){
00254       _mean += exp(_log_probabilities[i]) * i;
00255     }
00256 
00257   }
00258 
00259   ProbabilisticModelParameters DiscreteIIDModel::parameters () const {
00260     AlphabetPtr alpha = alphabet();
00261     ProbabilisticModelParameters par;
00262     par.add("model_name", StringParameterValuePtr(new StringParameterValue("DiscreteIIDModel")));
00263     DoubleVector probs;
00264     for(int k = 0; k < (int)_log_probabilities.size(); k++)
00265       {
00266         probs.push_back( exp(_log_probabilities[k]));
00267       }
00268     par.add("probabilities", DoubleVectorParameterValuePtr(new DoubleVectorParameterValue(probs)));
00269     if(alpha->size() > 0)
00270       par.add("alphabet", alpha->getParameterValue());
00271     if(_geometric_tail == 1)
00272       par.add("geometric_tail", IntParameterValuePtr(new IntParameterValue(1)));
00273     return par;
00274   }
00275 }
00276 
00277