ToPS
|
00001 /* 00002 * DiscreteIIDModel.cpp 00003 * 00004 * Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br> 00005 * Ígor Bonádio <ibonadio@ime.usp.br> 00006 * Vitor Onuchic <vitoronuchic@gmail.com> 00007 * Alan Mitchell Durham <aland@usp.br> 00008 * 00009 * This program is free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 3 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00022 * MA 02110-1301, USA. 00023 */ 00024 00025 #include "DiscreteIIDModel.hpp" 00026 #include "DiscreteIIDModelCreator.hpp" 00027 #include "Symbol.hpp" 00028 #include <iostream> 00029 #include <cmath> 00030 #include <sstream> 00031 #include <vector> 00032 #include <iterator> 00033 00034 namespace tops { 00035 00036 DiscreteIIDModel:: DiscreteIIDModel(const DoubleVector & probabilities) 00037 { 00038 _size = probabilities.size() - 1; 00039 _log_probabilities.resize(_size+1); 00040 double sum = 0; 00041 for(unsigned int i = 0; i < probabilities.size(); i++) 00042 sum += probabilities[i]; 00043 for(int i = 0; (i <= _size); i++) 00044 if(i >= (int)probabilities.size()) 00045 _log_probabilities[i] = -HUGE; 00046 else if(close(probabilities[i], 0.0, 1e-10)) 00047 _log_probabilities[i] = -HUGE; 00048 else 00049 _log_probabilities[i] = log(probabilities[i]/sum); 00050 } 00051 00052 DiscreteIIDModel::DiscreteIIDModel(const Matrix & probabilities){ 00053 int size1 = probabilities.size1(); 00054 int size2 = probabilities.size2(); 00055 _size = size1 * size2 - 1; 00056 _log_probabilities_matrix.resize(size1, size2); 00057 double sum = 0; 00058 for(int i = 0; i < size1; i++){ 00059 for(int j = 0; j < size2; j++){ 00060 if(close(probabilities(i,j), 0.0, 1e-10)) 00061 continue; 00062 sum += probabilities(i,j); 00063 } 00064 } 00065 for(int i = 0; i < size1; i++){ 00066 for(int j = 0; j < size2; j++){ 00067 if(close(probabilities(i,j), 0.0, 1e-10)) 00068 _log_probabilities_matrix(i,j) = -HUGE; 00069 else 00070 _log_probabilities_matrix(i,j) = log(probabilities(i,j)/sum); 00071 } 00072 _geometric_tail = false; 00073 } 00074 } 00075 00076 void DiscreteIIDModel::setProbabilities(const DoubleVector & probabilities) { 00077 _size = probabilities.size() - 1; 00078 _log_probabilities.resize(_size+1); 00079 double sum = 0; 00080 for(unsigned int i = 0; i < probabilities.size(); i++) 00081 sum += probabilities[i]; 00082 for(int i = 0; (i <= _size); i++) 00083 if(i >= (int)probabilities.size()) 00084 _log_probabilities[i] = -HUGE; 00085 else if(close(probabilities[i], 0.0, 1e-10)) 00086 _log_probabilities[i] = -HUGE; 00087 else 00088 _log_probabilities[i] = log(probabilities[i]/sum); 00089 } 00090 double DiscreteIIDModel::choosePosition(const Sequence & s, int i ) const { 00091 return choose(); 00092 } 00093 00094 double DiscreteIIDModel::evaluatePosition(const Sequence & s, unsigned int i) const { 00095 return log_probability_of(s[i]); 00096 } 00097 00098 double DiscreteIIDModel::log_probability_of(int s) const { 00099 if ((s>=0) && (s < (int)(_log_probabilities.size() ))) 00100 return _log_probabilities[s]; 00101 return -HUGE; 00102 } 00103 00104 double DiscreteIIDModel::log_probability_of_pair(int s1, int s2) const { 00105 if ((s1>=0) && (s1 < (int)(_log_probabilities_matrix.size1())) && (s2>=0) && (s2 < (int)(_log_probabilities_matrix.size2()))) 00106 return _log_probabilities_matrix(s1,s2); 00107 else { 00108 return -HUGE; 00109 } 00110 } 00111 00112 double DiscreteIIDModel::log_probability_of(int s, double new_value) { 00113 _log_probabilities[s] = new_value; 00114 return log_probability_of(s); 00115 } 00116 00117 double DiscreteIIDModel::log_probability_of_pair(int s1, int s2, double new_value) { 00118 _log_probabilities_matrix(s1,s2) = new_value; 00119 return log_probability_of_pair(s1,s2); 00120 } 00121 00122 int DiscreteIIDModel::size() const 00123 { 00124 return _size; 00125 } 00126 00127 double DiscreteIIDModel:: choose() const { 00128 double random = ((double)rand())/ ((double) RAND_MAX + 1.0) ; 00129 int result; 00130 for(result = 0; result < (int)_log_probabilities.size(); result++) 00131 { 00132 random -= exp(_log_probabilities[result]); 00133 if(random <= 0) 00134 break; 00135 } 00136 if(result >= (int)_log_probabilities.size()) 00137 return (double)(result-1); 00138 return (double)result; 00139 } 00140 00141 void DiscreteIIDModel::choosePair(int* a, int* b) const { 00142 double random = ((double)rand())/ ((double) RAND_MAX + 1.0) ; 00143 int result1 = 0; 00144 int result2 = 0; 00145 for(result1 = 0; result1 < (int)_log_probabilities_matrix.size1(); result1++){ 00146 for(result2 = 0; result2 < (int)_log_probabilities_matrix.size2(); result2++){ 00147 random -= exp(_log_probabilities_matrix(result1,result2)); 00148 if(random <= 0) 00149 break; 00150 } 00151 if(random <= 0) 00152 break; 00153 } 00154 if(result1 >= (int)_log_probabilities_matrix.size1() && result2 >= (int)_log_probabilities_matrix.size2()){ 00155 *a = (result1 - 1); 00156 *b = (result2 - 1); 00157 } 00158 else{ 00159 *a = result1; 00160 *b = result2; 00161 } 00162 } 00163 00164 ProbabilisticModelCreatorPtr DiscreteIIDModel::getFactory () const{ 00165 return DiscreteIIDModelCreatorPtr(new DiscreteIIDModelCreator()); 00166 } 00167 00168 std::string DiscreteIIDModel::str () const { 00169 std::stringstream out; 00170 AlphabetPtr alpha = alphabet(); 00171 00172 out << "model_name = \"DiscreteIIDModel\"\n" ; 00173 out << "probabilities = ("; 00174 if(_log_probabilities.size() > 0){ 00175 out << exp(_log_probabilities[0]); 00176 for(int k = 1; k < (int)_log_probabilities.size(); k++) 00177 { 00178 out << "," << exp(_log_probabilities[k]); 00179 } 00180 out << ")\n"; 00181 } 00182 if(alpha != NULL) 00183 { 00184 out << alpha->str(); 00185 } 00186 return out.str(); 00187 } 00188 00189 00190 void DiscreteIIDModel::strMatrix () const { 00191 if(_log_probabilities_matrix.size1() > 0){ 00192 for(int i = 0; i < (int)_log_probabilities_matrix.size1(); i++){ 00193 for(int j = 0; j < (int)_log_probabilities_matrix.size2(); j++){ 00194 if(j == (int)_log_probabilities_matrix.size2() -1) 00195 std::cout << exp(_log_probabilities_matrix(i,j)) << endl; 00196 else 00197 std::cout << exp(_log_probabilities_matrix(i,j)) << " "; 00198 } 00199 } 00200 cout << endl << endl; 00201 } 00202 } 00203 00204 void DiscreteIIDModel::initializeFromMap(const std::map <std::string, double> & probabilities, AlphabetPtr alphabet) 00205 { 00206 std::map<std::string,double>::const_iterator it; 00207 DoubleVector probs; 00208 probs.resize(alphabet->size()); 00209 for(int i = 0; i < (int)alphabet->size() ; i++) 00210 probs[i] = 0.0; 00211 for(it = probabilities.begin(); it != probabilities.end(); it++) 00212 { 00213 00214 int id = alphabet->getSymbol(it->first)->id(); 00215 if(id < (int)probs.size()) 00216 probs[id] = it->second; 00217 } 00218 _log_probabilities.resize(probs.size()); 00219 double sum = 0; 00220 for(unsigned int i = 0; i < probs.size(); i++) 00221 { 00222 sum += probs[i] ; 00223 if(close(probs[i], 0.0, 1e-100)) 00224 _log_probabilities[i] = -HUGE; 00225 else 00226 _log_probabilities[i] = log(probs[i]); 00227 } 00228 if(close(sum, 1.0, 1e-5)) 00229 _log_probabilities.push_back(-HUGE); 00230 else { 00231 _log_probabilities[probs.size()-1] = log(1.0 - sum); 00232 std::cerr << "WARNING: DiscreteIIDModel parameters must sum 1.0: sum is " << sum << std::endl; 00233 } 00234 setAlphabet(alphabet); 00235 } 00236 00237 void DiscreteIIDModel::initialize(const ProbabilisticModelParameters & p) { 00238 ProbabilisticModelParameterValuePtr probs = p.getMandatoryParameterValue("probabilities"); 00239 ProbabilisticModelParameterValuePtr alphabet = p.getOptionalParameterValue("alphabet"); 00240 ProbabilisticModelParameterValuePtr geometric_tail_ptr = p.getOptionalParameterValue("geometric_tail"); 00241 DoubleVector distr = probs->getDoubleVector(); 00242 setProbabilities(distr); 00243 if (alphabet != NULL) 00244 { 00245 AlphabetPtr alpha = AlphabetPtr(new Alphabet()); 00246 alpha->initializeFromVector(alphabet->getStringVector()); 00247 setAlphabet(alpha); 00248 if(geometric_tail_ptr != NULL) { 00249 _geometric_tail = (geometric_tail_ptr->getInt() == 1); 00250 } 00251 } 00252 _mean = 0.0; 00253 for (int i = 0 ; i < _log_probabilities.size(); i++){ 00254 _mean += exp(_log_probabilities[i]) * i; 00255 } 00256 00257 } 00258 00259 ProbabilisticModelParameters DiscreteIIDModel::parameters () const { 00260 AlphabetPtr alpha = alphabet(); 00261 ProbabilisticModelParameters par; 00262 par.add("model_name", StringParameterValuePtr(new StringParameterValue("DiscreteIIDModel"))); 00263 DoubleVector probs; 00264 for(int k = 0; k < (int)_log_probabilities.size(); k++) 00265 { 00266 probs.push_back( exp(_log_probabilities[k])); 00267 } 00268 par.add("probabilities", DoubleVectorParameterValuePtr(new DoubleVectorParameterValue(probs))); 00269 if(alpha->size() > 0) 00270 par.add("alphabet", alpha->getParameterValue()); 00271 if(_geometric_tail == 1) 00272 par.add("geometric_tail", IntParameterValuePtr(new IntParameterValue(1))); 00273 return par; 00274 } 00275 } 00276 00277