ToPS
|
00001 /* 00002 * MultipleSequentialModel.cpp 00003 * 00004 * Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br> 00005 * Ígor Bonádio <ibonadio@ime.usp.br> 00006 * Vitor Onuchic <vitoronuchic@gmail.com> 00007 * Alan Mitchell Durham <aland@usp.br> 00008 * 00009 * This program is free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 3 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00022 * MA 02110-1301, USA. 00023 */ 00024 00025 #include "MultipleSequentialModel.hpp" 00026 #include "TrainDiscreteIIDModel.hpp" 00027 #include "ProbabilisticModelCreatorClient.hpp" 00028 #include "Symbol.hpp" 00029 #include <iostream> 00030 #include <cmath> 00031 #include <sstream> 00032 #include <vector> 00033 #include <iterator> 00034 00035 namespace tops { 00036 00037 void restore_submodel(std::string & model_name, std::map<std::string, ProbabilisticModelPtr> &_models, const ProbabilisticModelParameters & parameters) { 00038 ProbabilisticModelParameterValuePtr modelpar = 00039 parameters.getOptionalParameterValue(model_name); 00040 00041 if (modelpar == NULL) { 00042 std::cerr << "ERROR:: Missing definition of the model " 00043 << model_name << std::endl; 00044 return; 00045 } 00046 if (_models.find(model_name) != _models.end()) { 00047 return; 00048 } 00049 00050 std::string cfg = modelpar->getString(); 00051 ProbabilisticModelCreatorClient creator; 00052 ConfigurationReader modelreader; 00053 if ((cfg.size()) > 0 && (cfg[0] == '[')) { 00054 cfg = cfg.substr(1, cfg.size() - 2); 00055 if (modelreader.load(cfg)) { 00056 ProbabilisticModelParameterValuePtr modelpar = (modelreader.parameters())->getOptionalParameterValue("model"); 00057 if(modelpar != NULL) 00058 { 00059 std::string submodelstr = modelpar->getString(); 00060 if (!((submodelstr.size()) > 0 && (submodelstr[0] == '['))) { 00061 restore_submodel(submodelstr, _models, parameters); 00062 } 00063 } 00064 ProbabilisticModelPtr m = creator.create(*(modelreader.parameters()), _models); 00065 _models[model_name] = m; 00066 } else{ 00067 std::cerr << "/=======/\n" << cfg << "/========/" << std::endl; 00068 exit(-1); 00069 } 00070 } 00071 else 00072 { 00073 ProbabilisticModelPtr m = creator.create(cfg); 00074 if (m == NULL) { 00075 std::cerr << "Can not load model " << cfg << "! " << std::endl; 00076 return; 00077 } 00078 _models[model_name] = m; 00079 } 00080 } 00081 00082 00083 double MultipleSequentialModel::prefix_sum_array_compute(int begin, int end, int phase) { 00084 if(begin < 0) 00085 return -HUGE; 00086 if(begin > end) 00087 return -HUGE; 00088 double sum = 0; 00089 int b = begin; 00090 int e = 0; 00091 for(int i = 0; i < _idx_not_limited; i++) 00092 { 00093 e = b + _max_size[i] - 1; 00094 if (e >= _seqsize) 00095 e = _seqsize-1; 00096 sum += _sub_models[i]->prefix_sum_array_compute(b,e,phase); 00097 if( e >= (int)end) 00098 return sum; 00099 00100 phase = mod(phase + e - b + 1, 3); 00101 b = e + 1; 00102 if(e>=_seqsize) 00103 break; 00104 } 00105 int begin_of_not_limited = b; 00106 e = end; 00107 for (int i = _sub_models.size()-1; i > _idx_not_limited ; i--) 00108 { 00109 b = e - _max_size[i] + 1; 00110 int phase2 = mod(phase + b - begin_of_not_limited, 3); 00111 if(b < 0) { 00112 phase2 = mod(phase2 -b, 3); 00113 b = 0; 00114 } 00115 sum += _sub_models[i]->prefix_sum_array_compute(b,e,phase2); 00116 e = b - 1; 00117 if (e < 0) 00118 break; 00119 } 00120 int end_of_not_limited = e; 00121 if( end_of_not_limited - begin_of_not_limited + 1 > 0 ){ 00122 sum += _sub_models[_idx_not_limited]->prefix_sum_array_compute(begin_of_not_limited, end_of_not_limited, phase); 00123 } 00124 return sum; 00125 } 00126 double MultipleSequentialModel::prefix_sum_array_compute(int begin, int end) 00127 { 00128 return prefix_sum_array_compute(begin, end, 0); 00129 } 00130 bool MultipleSequentialModel::initialize_prefix_sum_array(const Sequence & s, int phase) 00131 { 00132 if(ProbabilisticModel::initialize_prefix_sum_array(s)) 00133 return true; 00134 for(int i = 0; i < (int)_sub_models.size(); i++) 00135 { 00136 _sub_models[i]->initialize_prefix_sum_array(s); 00137 } 00138 _seqsize = s.size(); 00139 return true; 00140 } 00141 00142 bool MultipleSequentialModel::initialize_prefix_sum_array(const Sequence & s) 00143 { 00144 return initialize_prefix_sum_array(s, 0); 00145 } 00146 00147 00148 00149 00150 std::string MultipleSequentialModel::str() const 00151 { 00152 std::stringstream s; 00153 std::map <std::string, ProbabilisticModelParameterValuePtr> ::const_iterator it; 00154 std::map <std::string, ProbabilisticModelParameterValuePtr> p = _parameters.parameters(); 00155 for(it = p.begin(); it != p.end(); it++) 00156 { 00157 s << it->first << " = " << (it->second)->str() << std::endl; 00158 } 00159 return s.str(); 00160 } 00161 00162 double MultipleSequentialModel::evaluate(const Sequence & s, unsigned int begin, unsigned int end, int phase) const { 00163 if(begin < 0) 00164 return -HUGE; 00165 if(begin > end) 00166 return -HUGE; 00167 double sum = 0; 00168 int b = begin; 00169 int e = 0; 00170 for(int i = 0; i < _idx_not_limited; i++) 00171 { 00172 e = b + _max_size[i] - 1; 00173 if (e >= s.size()) 00174 e = s.size()-1; 00175 sum += _sub_models[i]->evaluate(s,b,e,phase); 00176 if( e >= (int)end) 00177 return sum; 00178 00179 phase = mod(phase + e - b + 1, 3); 00180 b = e + 1; 00181 if(e >= s.size()) 00182 break; 00183 } 00184 int begin_of_not_limited = b; 00185 e = end; 00186 for (int i = _sub_models.size()-1; i > _idx_not_limited ; i--) 00187 { 00188 b = e - _max_size[i] + 1; 00189 int phase2 = mod(phase + b - begin_of_not_limited, 3); 00190 if(b < 0) { 00191 phase2 = mod(phase2 -b, 3); 00192 b = 0; 00193 } 00194 sum += _sub_models[i]->evaluate(s,b,e,phase2); 00195 e = b - 1; 00196 if (e < 0) 00197 break; 00198 00199 } 00200 int end_of_not_limited = e; 00201 if( end_of_not_limited - begin_of_not_limited + 1 > 0 ){ 00202 sum += _sub_models[_idx_not_limited]->evaluate(s,begin_of_not_limited, end_of_not_limited, phase); 00203 } 00204 return sum; 00205 } 00206 void MultipleSequentialModel::initialize (const ProbabilisticModelParameters & p ) 00207 { 00208 ProbabilisticModelParameterValuePtr modelspar = p.getMandatoryParameterValue("models"); 00209 ProbabilisticModelParameterValuePtr maxsizepar = p.getMandatoryParameterValue("max_length"); 00210 ProbabilisticModelCreatorClient creator; 00211 StringVector modelnames = modelspar->getStringVector(); 00212 DoubleVector maxlength = maxsizepar->getDoubleVector(); 00213 00214 if(maxlength.size() != modelnames.size() ) 00215 { 00216 std::cerr << "ERROR: number of models does not match the number of max_length"; 00217 exit(-1); 00218 } 00219 _sub_models.resize(modelnames.size()); 00220 _max_size.resize(modelnames.size()); 00221 _idx_not_limited = _sub_models.size() - 1; 00222 int count = 0; 00223 for(int i = 0; i < (int)modelnames.size();i++) 00224 { 00225 if(maxlength[i] < 0) 00226 { 00227 00228 count++; 00229 _idx_not_limited = i; 00230 } 00231 } 00232 if (count > 1) 00233 { 00234 std::cerr << "ERROR: Only one model can has unlimited length\n" << std::endl; 00235 exit(-1); 00236 } 00237 for(int i = 0; i < (int)modelnames.size();i++) 00238 { 00239 restore_submodel(modelnames[i], _models, p); 00240 _sub_models[i] = _models[modelnames[i]]; 00241 if(_sub_models[i] == NULL) 00242 { 00243 std::cerr << "ERROR: Cannot load model " << modelnames[i] << std::endl; 00244 exit(-1); 00245 } 00246 setAlphabet(_sub_models[i]->alphabet()); 00247 _max_size[i] = maxlength[i]; 00248 } 00249 _parameters = p; 00250 } 00252 double MultipleSequentialModel::evaluate(const Sequence & s, unsigned int begin, unsigned int end) const { 00253 return evaluate(s, begin,end,0); 00254 } 00255 00256 ProbabilisticModelParameters MultipleSequentialModel::parameters() const 00257 { 00258 return _parameters; 00259 } 00260 00261 Sequence & MultipleSequentialModel::choose(Sequence & h, int size) const 00262 { 00263 int total_size = 0; 00264 for(int i = 0;i < (int) _sub_models.size(); i++) { 00265 Sequence x; 00266 _sub_models[i]->choose(x, _max_size[i]); 00267 for(int k = 0; k < _max_size[i]; k++){ 00268 h.push_back(x[k]); 00269 total_size ++; 00270 if(total_size >= size) 00271 return h; 00272 } 00273 } 00274 return h; 00275 } 00276 00277 }