ToPS
MultipleSequentialModel.cpp
00001 /*
00002  *       MultipleSequentialModel.cpp
00003  *
00004  *       Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br>
00005  *                      Ígor Bonádio <ibonadio@ime.usp.br>
00006  *                      Vitor Onuchic <vitoronuchic@gmail.com>
00007  *                      Alan Mitchell Durham <aland@usp.br>
00008  *
00009  *       This program is free software; you can redistribute it and/or modify
00010  *       it under the terms of the GNU  General Public License as published by
00011  *       the Free Software Foundation; either version 3 of the License, or
00012  *       (at your option) any later version.
00013  *
00014  *       This program is distributed in the hope that it will be useful,
00015  *       but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  *       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  *       GNU General Public License for more details.
00018  *
00019  *       You should have received a copy of the GNU General Public License
00020  *       along with this program; if not, write to the Free Software
00021  *       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00022  *       MA 02110-1301, USA.
00023  */
00024 
00025 #include "MultipleSequentialModel.hpp"
00026 #include "TrainDiscreteIIDModel.hpp"
00027 #include "ProbabilisticModelCreatorClient.hpp"
00028 #include "Symbol.hpp"
00029 #include <iostream>
00030 #include <cmath>
00031 #include <sstream>
00032 #include <vector>
00033 #include <iterator>
00034 
00035 namespace tops {
00036 
00037     void restore_submodel(std::string & model_name, std::map<std::string, ProbabilisticModelPtr> &_models,  const ProbabilisticModelParameters & parameters) {
00038         ProbabilisticModelParameterValuePtr modelpar =
00039             parameters.getOptionalParameterValue(model_name);
00040 
00041         if (modelpar == NULL) {
00042             std::cerr << "ERROR:: Missing definition of the model  "
00043                       << model_name << std::endl;
00044             return;
00045         }
00046         if (_models.find(model_name) != _models.end()) {
00047             return;
00048         }
00049 
00050         std::string cfg = modelpar->getString();
00051         ProbabilisticModelCreatorClient creator;
00052         ConfigurationReader modelreader;
00053         if ((cfg.size()) > 0 && (cfg[0] == '[')) {
00054             cfg = cfg.substr(1, cfg.size() - 2);
00055             if (modelreader.load(cfg)) {
00056                 ProbabilisticModelParameterValuePtr modelpar =  (modelreader.parameters())->getOptionalParameterValue("model");
00057                 if(modelpar != NULL)
00058                     {
00059                         std::string submodelstr = modelpar->getString();
00060                         if (!((submodelstr.size()) > 0 && (submodelstr[0] == '['))) {
00061                             restore_submodel(submodelstr, _models, parameters);
00062                         }
00063                     }
00064                 ProbabilisticModelPtr m = creator.create(*(modelreader.parameters()), _models);
00065                 _models[model_name] = m;
00066             } else{
00067                 std::cerr << "/=======/\n" << cfg << "/========/" << std::endl;
00068                 exit(-1);
00069             }
00070         }
00071         else
00072             {
00073                 ProbabilisticModelPtr m = creator.create(cfg);
00074                 if (m == NULL) {
00075                     std::cerr << "Can not load model " << cfg << "! " << std::endl;
00076                     return;
00077                 }
00078                 _models[model_name] = m;
00079             }
00080     }
00081 
00082 
00083     double MultipleSequentialModel::prefix_sum_array_compute(int begin, int end, int phase) {
00084         if(begin < 0)
00085             return -HUGE;
00086         if(begin > end)
00087             return -HUGE;
00088         double sum = 0;
00089         int b = begin;
00090         int e = 0;
00091         for(int i = 0; i < _idx_not_limited; i++)
00092             {
00093                 e = b + _max_size[i] - 1;
00094                 if (e >= _seqsize)
00095                     e = _seqsize-1;
00096                 sum += _sub_models[i]->prefix_sum_array_compute(b,e,phase);
00097                 if( e >=  (int)end)
00098                     return sum;
00099 
00100                 phase = mod(phase + e - b + 1, 3);
00101                 b = e + 1;
00102                 if(e>=_seqsize)
00103                     break;
00104             }
00105         int begin_of_not_limited = b;
00106         e = end;
00107         for (int i = _sub_models.size()-1; i > _idx_not_limited ; i--)
00108             {
00109                 b = e - _max_size[i] + 1;
00110                 int phase2 = mod(phase + b - begin_of_not_limited, 3);
00111                 if(b < 0) {
00112                     phase2 = mod(phase2 -b, 3);
00113                     b  = 0;
00114                 }
00115                 sum += _sub_models[i]->prefix_sum_array_compute(b,e,phase2);
00116                 e = b - 1;
00117                 if (e < 0)
00118                     break;
00119             }
00120         int end_of_not_limited = e;
00121         if( end_of_not_limited - begin_of_not_limited + 1 > 0 ){
00122             sum += _sub_models[_idx_not_limited]->prefix_sum_array_compute(begin_of_not_limited, end_of_not_limited, phase);
00123         }
00124         return sum;
00125     }
00126     double MultipleSequentialModel::prefix_sum_array_compute(int begin, int end)
00127     {
00128         return prefix_sum_array_compute(begin, end, 0);
00129     }
00130     bool MultipleSequentialModel::initialize_prefix_sum_array(const Sequence & s, int phase)
00131     {
00132         if(ProbabilisticModel::initialize_prefix_sum_array(s))
00133             return true;
00134         for(int i = 0; i < (int)_sub_models.size(); i++)
00135             {
00136                 _sub_models[i]->initialize_prefix_sum_array(s);
00137             }
00138         _seqsize = s.size();
00139         return true;
00140     }
00141 
00142     bool MultipleSequentialModel::initialize_prefix_sum_array(const Sequence & s)
00143     {
00144         return initialize_prefix_sum_array(s, 0);
00145     }
00146 
00147 
00148 
00149 
00150     std::string MultipleSequentialModel::str() const
00151     {
00152         std::stringstream s;
00153         std::map <std::string, ProbabilisticModelParameterValuePtr> ::const_iterator it;
00154         std::map <std::string, ProbabilisticModelParameterValuePtr> p = _parameters.parameters();
00155         for(it = p.begin(); it != p.end(); it++)
00156             {
00157                 s << it->first << " = " << (it->second)->str() << std::endl;
00158             }
00159         return s.str();
00160     }
00161 
00162     double MultipleSequentialModel::evaluate(const Sequence & s, unsigned int begin, unsigned int end, int phase) const {
00163         if(begin < 0)
00164             return -HUGE;
00165         if(begin > end)
00166             return -HUGE;
00167         double sum = 0;
00168         int b = begin;
00169         int e = 0;
00170         for(int i = 0; i < _idx_not_limited; i++)
00171             {
00172                 e = b + _max_size[i] - 1;
00173                 if (e >= s.size())
00174                     e = s.size()-1;
00175                 sum += _sub_models[i]->evaluate(s,b,e,phase);
00176                 if( e >=  (int)end)
00177                     return sum;
00178 
00179                 phase = mod(phase + e - b + 1, 3);
00180                 b = e + 1;
00181                 if(e >= s.size())
00182                     break;
00183             }
00184         int begin_of_not_limited = b;
00185         e = end;
00186         for (int i = _sub_models.size()-1; i > _idx_not_limited ; i--)
00187             {
00188                 b = e - _max_size[i] + 1;
00189                 int phase2 = mod(phase + b - begin_of_not_limited, 3);
00190                 if(b < 0) {
00191                     phase2 = mod(phase2 -b, 3);
00192                     b  = 0;
00193                 }
00194                 sum += _sub_models[i]->evaluate(s,b,e,phase2);
00195                 e = b - 1;
00196                 if (e < 0)
00197                     break;
00198 
00199             }
00200         int end_of_not_limited = e;
00201         if( end_of_not_limited - begin_of_not_limited + 1 > 0 ){
00202             sum += _sub_models[_idx_not_limited]->evaluate(s,begin_of_not_limited, end_of_not_limited, phase);
00203         }
00204         return sum;
00205     }
00206     void MultipleSequentialModel::initialize (const ProbabilisticModelParameters & p )
00207     {
00208         ProbabilisticModelParameterValuePtr modelspar = p.getMandatoryParameterValue("models");
00209         ProbabilisticModelParameterValuePtr maxsizepar = p.getMandatoryParameterValue("max_length");
00210         ProbabilisticModelCreatorClient creator;
00211         StringVector modelnames = modelspar->getStringVector();
00212         DoubleVector maxlength = maxsizepar->getDoubleVector();
00213 
00214         if(maxlength.size() != modelnames.size() )
00215             {
00216                 std::cerr << "ERROR: number of models does not match the number of max_length";
00217                 exit(-1);
00218             }
00219         _sub_models.resize(modelnames.size());
00220         _max_size.resize(modelnames.size());
00221         _idx_not_limited = _sub_models.size() - 1;
00222         int count = 0;
00223         for(int i = 0; i < (int)modelnames.size();i++)
00224             {
00225                 if(maxlength[i] < 0)
00226                     {
00227 
00228                         count++;
00229                         _idx_not_limited = i;
00230                     }
00231             }
00232         if (count > 1)
00233             {
00234                 std::cerr << "ERROR: Only one model can has unlimited length\n" << std::endl;
00235                 exit(-1);
00236             }
00237         for(int i = 0; i < (int)modelnames.size();i++)
00238             {
00239                 restore_submodel(modelnames[i], _models, p);
00240                 _sub_models[i] = _models[modelnames[i]];
00241                 if(_sub_models[i] == NULL)
00242                     {
00243                         std::cerr << "ERROR: Cannot load model " << modelnames[i] << std::endl;
00244                         exit(-1);
00245                     }
00246                 setAlphabet(_sub_models[i]->alphabet());
00247                 _max_size[i] = maxlength[i];
00248             }
00249         _parameters =  p;
00250     }
00252     double MultipleSequentialModel::evaluate(const Sequence & s, unsigned int begin, unsigned int end) const {
00253         return evaluate(s, begin,end,0);
00254     }
00255 
00256     ProbabilisticModelParameters MultipleSequentialModel::parameters() const
00257     {
00258         return _parameters;
00259     }
00260 
00261     Sequence & MultipleSequentialModel::choose(Sequence & h, int size) const
00262     {
00263         int total_size = 0;
00264         for(int i = 0;i < (int) _sub_models.size(); i++) {
00265             Sequence x;
00266             _sub_models[i]->choose(x, _max_size[i]);
00267             for(int k = 0; k < _max_size[i]; k++){
00268                 h.push_back(x[k]);
00269                 total_size ++;
00270                 if(total_size >= size)
00271                     return h;
00272             }
00273         }
00274         return h;
00275     }
00276 
00277 }