ToPS: GeneralizedHiddenMarkovModel.cpp Source File

00001 /*
00002  *       GeneralizedHiddenMarkovModel.cpp
00003  *
00004  *       Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br>
00005  *                      �gor Bon�dio <ibonadio@ime.usp.br>
00006  *                      Vitor Onuchic <vitoronuchic@gmail.com>
00007  *                      Alan Mitchell Durham <aland@usp.br>
00008  *
00009  *       This program is free software; you can redistribute it and/or modify
00010  *       it under the terms of the GNU  General Public License as published by
00011  *       the Free Software Foundation; either version 3 of the License, or
00012  *       (at your option) any later version.
00013  *
00014  *       This program is distributed in the hope that it will be useful,
00015  *       but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  *       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  *       GNU General Public License for more details.
00018  *
00019  *       You should have received a copy of the GNU General Public License
00020  *       along with this program; if not, write to the Free Software
00021  *       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00022  *       MA 02110-1301, USA.
00023  */
00024 
00025 #include "InhomogeneousFactorableModel.hpp"
00026 #include "GeneralizedHiddenMarkovModel.hpp"
00027 #include "GeneralizedHiddenMarkovModelCreator.hpp"
00028 #include "ProbabilisticModelCreatorClient.hpp"
00029 #include "Symbol.hpp"
00030 #include "GHMMStates.hpp"
00031 #include "util.hpp"
00032 #include <sys/types.h>
00033 
00034 
00035 namespace tops {
00036 
00037   std::string GeneralizedHiddenMarkovModel::print_graph () const {
00038     std::stringstream out;
00039     for(int i = 0; i < (int)_all_states.size(); i++)
00040       {
00041         out << _all_states[i]->name() << " " << _all_states[i]->name() << std::endl;
00042       }
00043     out << "#" << std::endl;
00044     for(int i = 0; i < (int)_all_states.size(); i++)
00045       {
00046         for(int j = 0; j < (int)_all_states.size(); j++)
00047           {
00048             if(!close( exp(_all_states[i]->transition()->log_probability_of(j)),0.0, 1-1))
00049               out << _all_states[i]->name() << " " << _all_states[j]->name() << " " << exp(_all_states[i]->transition()->log_probability_of(j)) <<  std::endl;
00050           }
00051       }
00052     return out.str();
00053   }
00054   void GeneralizedHiddenMarkovModel::restore_model(std::string & model_name,const ProbabilisticModelParameters & parameters) {
00055     ProbabilisticModelParameterValuePtr modelpar =
00056       parameters.getOptionalParameterValue(model_name);
00057     if (modelpar == NULL) {
00058       std::cerr << "ERROR: Missing definition of the model  "
00059                 << model_name << std::endl;
00060       return;
00061     }
00062     if (_models.find(model_name) != _models.end()) {
00063       return;
00064     }
00065 
00066     std::string cfg = modelpar->getString();
00067     ProbabilisticModelCreatorClient creator;
00068     ConfigurationReader modelreader;
00069     if ((cfg.size()) > 0 && (cfg[0] == '[')) {
00070       cfg = cfg.substr(1, cfg.size() - 2);
00071       if (modelreader.load(cfg)) {
00072         ProbabilisticModelParameterValuePtr modelpar =  (modelreader.parameters())->getOptionalParameterValue("model");
00073         if(modelpar != NULL)
00074           {
00075             std::string submodelstr = modelpar->getString();
00076             if ((submodelstr.size()) > 0 && (submodelstr[0] == '[')) {
00077             } else {
00078               restore_model(submodelstr, parameters);
00079             }
00080           }
00081         ProbabilisticModelPtr m = creator.create(*(modelreader.parameters()), _models);
00082         _models[model_name] = m;
00083       } else{
00084         std::cerr << "/=======/\n" << cfg << "/========/" << std::endl;
00085         exit(-1);
00086       }
00087 
00088     }
00089      else
00090         {
00091 
00092           ProbabilisticModelPtr m = creator.create(cfg);
00093           if (m == NULL) {
00094             std::cerr << "Can not load model " << cfg << "! " << std::endl;
00095             return;
00096           }
00097 
00098           _models[model_name] = m;
00099         }
00100   }
00101 
00102   void GeneralizedHiddenMarkovModel::setStateNames(AlphabetPtr alphabet) {
00103         _state_names = alphabet;
00104         _all_states.resize(alphabet->size());
00105   }
00106   void GeneralizedHiddenMarkovModel::setInitialProbability(
00107                  DiscreteIIDModelPtr init) {
00108     _initial_probabilities = init;
00109   }
00110 
00111   void GeneralizedHiddenMarkovModel::setNumClasses(int nclasses){
00112     _nclasses = nclasses;
00113   }
00114 
00115   int GeneralizedHiddenMarkovModel::getNumClasses(){
00116     return _nclasses;
00117   }
00118 
00119   void GeneralizedHiddenMarkovModel::setTerminalProbability(
00120                   DiscreteIIDModelPtr term) {
00121         _terminal_probabilities = term;
00122   }
00123 
00124 int GeneralizedHiddenMarkovModel::configureSignalState(std::string observation_model_name,
00125                                                         DiscreteIIDModelPtr transition_distr,
00126                    int size, std::string state_name, int iphase, int ophase, vector<int> classes){
00127   SymbolPtr symbol = _state_names->getSymbol(state_name);
00128   ProbabilisticModelPtr model = _models[observation_model_name];
00129   GHMMSignalStatePtr signal = GHMMSignalStatePtr(new GHMMSignalState(model,transition_distr, symbol));
00130   signal->setSize(size);
00131   signal->observationModelName(observation_model_name);
00132   signal->setInputPhase(iphase);
00133   signal->setOutputPhase(ophase);
00134   signal->setClasses(classes);
00135  _all_states[symbol->id()] = signal;
00136   _signal_states.push_back(signal);
00137   return symbol->id();
00138 }
00139 
00140 int GeneralizedHiddenMarkovModel::configureGeometricDurationState(std::string model_name,
00141                   DiscreteIIDModelPtr transition_distr, std::string state_name, int iphase, int ophase, vector<int> classes) {
00142   ProbabilisticModelPtr model = _models[model_name];
00143   SymbolPtr symbol = _state_names->getSymbol(state_name);
00144   GHMMStatePtr state = GHMMStatePtr(new GHMMState(model, transition_distr,
00145                                                   symbol));
00146   state->setInputPhase(iphase);
00147   state->setOutputPhase(ophase);
00148   state->setClasses(classes);
00149   _all_states[symbol->id()] = state;
00150   state->observationModelName(model_name);
00151   _geometric_duration_states.push_back(state);
00152   return symbol->id();
00153 }
00154 
00155 
00156 int GeneralizedHiddenMarkovModel::configureExplicitDurationState(std::string observation_model_name, DiscreteIIDModelPtr transition_distr,
00157                  std::string duration_model_name, std::string state_name, int iphase, int ophase, vector<int> classes)
00158 {
00159 
00160   ProbabilisticModelPtr model = _models[observation_model_name];
00161   ProbabilisticModelPtr duration = _models[duration_model_name];
00162   SymbolPtr symbol = _state_names->getSymbol(state_name);
00163   GHMMExplicitDurationStatePtr state = GHMMExplicitDurationStatePtr(
00164                                                                     new GHMMExplicitDurationState(model, transition_distr, symbol));
00165   state->observationModelName(observation_model_name);
00166   state->durationModelName(duration_model_name);
00167   state->setDuration(duration);
00168   state->setInputPhase(iphase);
00169   state->setOutputPhase(ophase);
00170   state->setClasses(classes);
00171  _all_states[symbol->id()] = state;
00172   return symbol->id();
00173 }
00174 
00175 
00176     void GeneralizedHiddenMarkovModel::initializeChoosePathAlgorithm(const Sequence &s)
00177     {
00178         if(s != _last)
00179             {
00180                 forward(s, _alpha);
00181                 _last = s;
00182             }
00183     }
00184 
00185   void GeneralizedHiddenMarkovModel::choosePath(const Sequence &s, Sequence &path) {
00186     initializeChoosePathAlgorithm(s);
00187     int size = s.size();
00188     int nstates =_all_states.size();
00189     DoubleVector lastStateProbability(nstates);
00190     double sum = -HUGE;
00191     int L = size - 1;
00192     path.resize(s.size());
00193 #if 0
00194     for(int i = 0; i < size; i++)
00195       {
00196   std::cerr << i << " " << std::endl;
00197   for (int j = 0; j < nstates; j++)
00198     {
00199       std::cerr << " " << _all_states[j]->name() << " " << _alpha(j, i) << std::endl;
00200     }
00201       }
00202 #endif
00203     for(int k = 0; k < nstates; k++)
00204       {
00205   sum = log_sum(sum, _alpha(k,L) );
00206       }
00207     for(int k = 0; k < nstates; k++)
00208       {
00209   lastStateProbability[k] = exp(_alpha(k, L) - sum);
00210       }
00211     DiscreteIIDModelPtr m = DiscreteIIDModelPtr(new DiscreteIIDModel(lastStateProbability));
00212 
00213     int q = m->choose();
00214     int position = L;
00215 
00216     while (position > 0 ){
00217       int new_position;
00218       int state;
00219       new_position = 0;
00220       state = q;
00221       _all_states[q]->choosePredecessor(_alpha, position, state, new_position, _all_states);
00222       for(int p = new_position + 1; p <= position; p++)
00223   {
00224     path[p] = q;
00225   }
00226       position = new_position;
00227       q = state;
00228     }
00229   }
00230 
00231   float GeneralizedHiddenMarkovModel::MEAPred(const Sequence &s, Sequence &path){
00232     fMatrix probs;
00233     int size = s.size();
00234     int nstates = _all_states.size();
00235     posteriorProbabilitiesNoClasses(s,probs);
00236     Matrix ea(size,nstates);
00237     IntMatrix ptrs(size,nstates);
00238 
00239     //Initialization
00240     for(int k = 0; k < nstates; k++){
00241       float prob = probs(0,k);
00242       ea(0,k) = prob;
00243     }
00244 
00245     //Recursion
00246     for(int i = 1; i < size; i++){
00247       for(int k = 0; k < nstates; k++){
00248   float max = 0.0;
00249   int state = -1;
00250   std::vector<int> preds = _all_states[k]->predecessors();
00251   preds.push_back(k);
00252   for(int p = 0; p < (int)preds.size(); p++){
00253     int id = preds[p];
00254     if(max < ea(i-1,id)){
00255       max = ea(i-1,id);
00256       state = id;
00257     }
00258   }
00259   ea(i,k) = max + probs(i,k);
00260   ptrs(i,k) = state;
00261       }
00262     }
00263 
00264     //Traceback
00265     path.resize(size);
00266     float mea = 0.0;
00267     int state = -1;
00268     for(int k = 0; k < nstates; k++){
00269       if(ea(size-1,k) > mea){
00270   mea = ea(size-1,k);
00271   state = k;
00272       }
00273     }
00274     path[size-1] = state;
00275     for(int i = size-1; i > 0; i--){
00276       state = ptrs(i,path[i]);
00277       path[i-1] = state;
00278     }
00279     return mea/size;
00280   }
00281 
00282   float GeneralizedHiddenMarkovModel::MEAPred(const Sequence &s, Sequence &path, SparseMatrixPtr ppPred){
00283     fMatrix probs;
00284     ppPred->getfMatrixPred(probs);
00285     int size = s.size();
00286     int nstates = _all_states.size();
00287     Matrix ea(size,nstates);
00288     IntMatrix ptrs(size,nstates);
00289 
00290     //Initialization
00291     for(int k = 0; k < nstates; k++){
00292       ea(0,k) = probs(0,k);
00293     }
00294 
00295     //Recursion
00296     for(int i = 1; i < size; i++){
00297       for(int k = 0; k < nstates; k++){
00298   float max = 0.0;
00299   int state = -1;
00300   std::vector<int> preds = _all_states[k]->predecessors();
00301   preds.push_back(k);
00302   for(int p = 0; p < (int)preds.size(); p++){
00303     int id = preds[p];
00304     if(max < ea(i-1,id)){
00305       max = ea(i-1,id);
00306       state = id;
00307     }
00308   }
00309   ea(i,k) = max + probs(i,k);
00310   ptrs(i,k) = state;
00311       }
00312     }
00313 
00314     //Traceback
00315     path.resize(size);
00316     float mea = 0.0;
00317     int state = -1;
00318     for(int k = 0; k < nstates; k++){
00319       if(ea(size-1,k) > mea){
00320   mea = ea(size-1,k);
00321   state = k;
00322       }
00323     }
00324     path[size-1] = state;
00325     for(int i = size-1; i > 0; i--){
00326       state = ptrs(i,path[i]);
00327       path[i-1] = state;
00328     }
00329     return mea/size;
00330   }
00331 
00332   void GeneralizedHiddenMarkovModel::posteriorProbabilities(const Sequence &s, fMatrix &postProbs) const{
00333     posteriorProbabilitiesNoClasses(s,postProbs);
00334   }
00335 
00336   void GeneralizedHiddenMarkovModel::posteriorProbabilitiesNoClasses (const Sequence &s, fMatrix &postProbs) const{
00337     int size = s.size();
00338     int nstates = _all_states.size();
00339     initialize_prefix_sum_arrays(s);
00340 
00341     Matrix beta;
00342     double prob = backward(s,beta);
00343 
00344     Matrix alpha (nstates, size);
00345 
00346     postProbs.resize(size,nstates);
00347 
00348     std::vector< std::list<int> > valid_positions;
00349     valid_positions.resize(nstates);
00350     std::list <int> e;
00351     for(int k = 0; k <nstates; k++)
00352       valid_positions[k] = e;
00353 
00354     // initialization
00355     for (int c = 0; c < nstates; c++) {
00356       postProbs(0,c) = 0.0;
00357     }
00358     for (int k = 0; k < nstates; k++) {
00359       alpha(k, 0) = getInitialProbabilities()->log_probability_of(k)
00360   + _all_states[k]->observation()->prefix_sum_array_compute(0, 0, _all_states[k]->getInputPhase()) + _all_states[k]->duration_probability(1);
00361       if(alpha(k,0) > -HUGE){
00362   postProbs(0,k) = exp(alpha(k,0) + beta(k,0) - prob);
00363   std::vector<int> succ = _all_states[k]->successors();
00364   for(int p = 0; p < (int)succ.size(); p++){
00365     int id = succ[p];
00366     if(!_all_states[id]->isGeometricDuration())
00367       valid_positions[id].push_back(0);
00368   }
00369       }
00370     }
00371 
00372     for (int i = 1; i < size; i++) {
00373       for(int c = 0; c < nstates; c++)
00374   postProbs(i,c) = 0.0;
00375       for(int k = 0; k < nstates; k++) {
00376   _all_states[k]->posteriorSum(alpha, beta, postProbs, s, i, _all_states, valid_positions, prob, k);
00377       }
00378     }
00379 
00380     /*double sum = alpha(0, size-1) + _terminal_probabilities->log_probability_of(0);
00381     for(int k = 1; k < nstates; k++)
00382       sum = log_sum(sum, alpha(k, size-1) + _terminal_probabilities->log_probability_of(k));
00383       cout << "prob = " << prob << "  postProb = " << sum << endl;*/
00384       //cout << (probabilities.value_data()).size() << "  " << postProbs.size1()*postProbs.size2() << endl;
00385   }
00386 
00387   void GeneralizedHiddenMarkovModel::posteriorProbabilitiesWithClasses (const Sequence &s, SparseMatrixPtr probabilities) const{
00388     int size = s.size();
00389     int nstates = _all_states.size();
00390     initialize_prefix_sum_arrays(s);
00391 
00392     Matrix beta;
00393     double prob = backward(s,beta);
00394 
00395     Matrix alpha (nstates, size);
00396 
00397     fMatrix postProbs(size,_nclasses);
00398 
00399     std::vector< std::list<int> > valid_positions;
00400     valid_positions.resize(nstates);
00401     std::list <int> e;
00402     for(int k = 0; k <nstates; k++)
00403       valid_positions[k] = e;
00404 
00405     // initialization
00406     for (int c = 0; c < _nclasses; c++) {
00407       postProbs(0,c) = 0.0;
00408     }
00409     for (int k = 0; k < nstates; k++) {
00410       alpha(k, 0) = getInitialProbabilities()->log_probability_of(k)
00411   + _all_states[k]->observation()->prefix_sum_array_compute(0, 0, _all_states[k]->getInputPhase()) + _all_states[k]->duration_probability(1);
00412       if(alpha(k,0) > -HUGE){
00413   for(int c = 0; c < (int)(_all_states[k]->classes()).size(); c++)
00414     postProbs(0,_all_states[k]->classes()[c]) = exp(alpha(k,0) + beta(k,0) - prob);
00415   std::vector<int> succ = _all_states[k]->successors();
00416   for(int p = 0; p < (int)succ.size(); p++){
00417     int id = succ[p];
00418   if(!_all_states[id]->isGeometricDuration())
00419     valid_positions[id].push_back(0);
00420   }
00421       }
00422     }
00423 
00424     for (int i = 1; i < size; i++) {
00425       for(int c = 0; c < _nclasses; c++)
00426   postProbs(i,c) = 0.0;
00427       for(int k = 0; k < nstates; k++) {
00428   _all_states[k]->posteriorSum(alpha, beta, postProbs, s, i, _all_states, valid_positions, prob, -1);
00429       }
00430     }
00431 
00432     probabilities->buildPredMatrix(size,_nclasses,postProbs);
00433 
00434 
00435     /*double sum = alpha(0, size-1) + _terminal_probabilities->log_probability_of(0);
00436     for(int k = 1; k < nstates; k++)
00437       sum = log_sum(sum, alpha(k, size-1) + _terminal_probabilities->log_probability_of(k));
00438 
00439       cout << "prob = " << prob << "  postProb = " << sum << endl;*/
00440       //cout << (probabilities.value_data()).size() << "  " << postProbs.size1()*postProbs.size2() << endl;
00441   }
00442 
00443 
00444 double GeneralizedHiddenMarkovModel::forward(const Sequence & s, Matrix &a) const {
00445   int size = s.size();
00446   int nstates = _all_states.size();
00447   initialize_prefix_sum_arrays(s);
00448 
00449   Matrix alpha (nstates, size);
00450 
00451   std::vector< std::list<int> > valid_positions;
00452   valid_positions.resize(nstates);
00453   std::list <int> e;
00454   for(int k = 0; k <nstates; k++)
00455       valid_positions[k] = e;
00456 
00457   // initialization
00458   for (int k = 0; k < nstates; k++) {
00459     alpha(k, 0) = getInitialProbabilities()->log_probability_of(k)
00460       + _all_states[k]->observation()->prefix_sum_array_compute(0, 0, _all_states[k]->getInputPhase()) + _all_states[k]->duration_probability(1);
00461     if(alpha(k,0) > -HUGE){
00462       std::vector<int> succ = _all_states[k]->successors();
00463       for(int p = 0; p < (int)succ.size(); p++){
00464   int id = succ[p];
00465   if(!_all_states[id]->isGeometricDuration())
00466     valid_positions[id].push_back(0);
00467       }
00468     }
00469   }
00470 
00471   for (int i = 1; i < size; i++) {
00472       for(int k = 0; k < nstates; k++) {
00473   _all_states[k]->forwardSum(alpha, s, i, _all_states, valid_positions);
00474       }
00475   }
00476 
00477   a = alpha;
00478   double sum = alpha(0, size-1) + _terminal_probabilities->log_probability_of(0);
00479   for(int k = 1; k < nstates; k++)
00480     sum = log_sum(sum, alpha(k, size-1) + _terminal_probabilities->log_probability_of(k));
00481 
00482   return sum;
00483 }
00484 
00485 
00486 
00487 double GeneralizedHiddenMarkovModel::inefficient_forward(const Sequence & s, Matrix &a) const {
00488   int size = s.size();
00489   int nstates = _all_states.size();
00490   initialize_prefix_sum_arrays(s);
00491 
00492   Matrix alpha (nstates, size);
00493 
00494   // initialization
00495   for (int k = 0; k < nstates; k++) {
00496     alpha(k, 0) = getInitialProbabilities()->log_probability_of(k) + _all_states[k]->observation()->prefix_sum_array_compute(0, 0);
00497   }
00498 
00499   for (int i = 1; i < size; i++) {
00500     printf("i = %d\n",i);
00501     for(int k = 0; k < nstates; k++) {
00502       alpha(k, i) = -HUGE;
00503       for(int d = i; d > 0; d--){
00504   int nphase = _all_states[k]->getInputPhase();
00505   if(_all_states[k]->observation()->inhomogeneous() != NULL)
00506     nphase = _all_states[k]->observation()->inhomogeneous()->maximumTimeValue() + 1;
00507   if(_all_states[k]->getStart() > 0 && _all_states[k]->getStop() > 0) {
00508     if((i-d+1-_all_states[k]->getStart() >= 0) && (i + _all_states[k]->getStop() < (int)s.size())) {
00509       double joinable = _all_states[k]->observation()->prefix_sum_array_compute(i-d+1-_all_states[k]->getStart(),i+_all_states[k]->getStop(), mod(_all_states[k]->getInputPhase()-_all_states[k]->getStart(), nphase));
00510       if(joinable <= -HUGE) {
00511         continue;
00512       }
00513     }
00514   }
00515         for(int p = 0; p < nstates; p++){
00516           alpha(k, i) = log_sum(alpha(k, i), alpha(p, i-d)
00517         + _all_states[p]->transition()->log_probability_of(k)
00518         + _all_states[k]->duration_probability(d)
00519         + _all_states[k]->observation()->prefix_sum_array_compute(i-d+1, i, _all_states[k]->getInputPhase()));
00520         }
00521       }
00522     }
00523   }
00524 
00525   a = alpha;
00526 
00527   double sum = alpha(0, size-1) + _terminal_probabilities->log_probability_of(0);
00528   for(int k = 1; k < nstates; k++)
00529     sum = log_sum(sum, alpha(k, size-1) + _terminal_probabilities->log_probability_of(k));
00530 
00531   printf("forward: %f\n\n", sum);
00532 
00533   return sum;
00534 
00535 }
00536 
00537 
00538 
00539 
00541 double GeneralizedHiddenMarkovModel::backward(const Sequence & s, Matrix &b) const {
00542   int size = s.size();
00543   int nstates = _all_states.size();
00544   initialize_prefix_sum_arrays(s);
00545 
00546   Matrix beta (nstates, size);
00547 
00548   std::vector< std::list<int> > valid_positions;
00549   valid_positions.resize(nstates);
00550   std::list <int> e;
00551   for(int k = 0; k <nstates; k++)
00552       valid_positions[k] = e;
00553 
00554   // initialization
00555   for (int k = 0; k < nstates; k++) {
00556     beta(k, size-1) = _terminal_probabilities->log_probability_of(k);
00557     if(beta(k,size-1) > -HUGE){
00558       if(!_all_states[k]->isGeometricDuration())
00559   valid_positions[k].push_back(size-1);
00560     }
00561   }
00562   for (int i = size-2; i >= 0; i--){
00563     for (int k = 0; k < nstates; k++){
00564       beta(k,i) = -HUGE;
00565       for(int p = 0; p < (int)(_all_states[k]->successors()).size(); p++){
00566   int id = _all_states[k]->successors()[p];
00567   beta(k,i) = log_sum(beta(k,i), _all_states[k]->transition()->log_probability_of(id) + _all_states[id]->backwardSum(beta,s,i,valid_positions));
00568       }
00569       if(beta(k,i) > -HUGE){
00570   if(!_all_states[k]->isGeometricDuration())
00571       valid_positions[k].push_front(i);
00572       }
00573     }
00574   }
00575 
00576   b = beta;
00577 
00578   double sum = -HUGE;
00579   for(int k = 0; k < nstates; k++)
00580     sum = log_sum(sum, getInitialProbabilities()->log_probability_of(k) + _all_states[k]->backwardSum(beta,s,-1,valid_positions)) ;
00581 
00582   return sum;
00583 }
00584 
00585   void GeneralizedHiddenMarkovModel::initialize_prefix_sum_arrays(const Sequence & s) const {
00586 #if 0
00587     struct timeval start, stop;
00588     gettimeofday(&start, (struct timezone *) NULL);
00589 #endif
00590 
00591     for (int i = 0; i < (int) _all_states.size(); i++) {
00592       _all_states[i]->observation()->initialize_prefix_sum_array(s);
00593     }
00594 #if 0
00595     gettimeofday(&stop, (struct timezone *)NULL);
00596     stop.tv_sec -= start.tv_sec;
00597     stop.tv_usec -= start.tv_usec;
00598     if(stop.tv_usec  < 0){
00599       stop.tv_sec --;
00600       stop.tv_usec += 1000000;
00601     }
00602     fprintf(stderr, "PSA Elapsed time %ld%c%02d seconds\n", stop.tv_sec, '.', stop.tv_usec/1000);
00603 #endif
00604 
00605   }
00606 
00607 
00608 
00609 void GeneralizedHiddenMarkovModel::fixStatesPredecessorSuccessor() {
00610   for (int i = 0; i < (int) _all_states.size(); i++)
00611     {
00612       if (_all_states[i] != NULL)
00613         {
00614           for (int j = 0; j < (int) _all_states.size(); j++) {
00615             if ((!close(exp(_all_states[i]->transition()->log_probability_of(j)), 0.0, 1e-1)) &&  (_all_states[j] != NULL)) {
00616               _all_states[j]->addPredecessor(i);
00617             }
00618 
00619             if ((_all_states[j] != NULL) && (!close(exp( _all_states[i]->transition()->log_probability_of(j)),
00620                                                    0.0, 1e-1)))
00621               _all_states[i]->addSuccessor(j);
00622           }
00623         }
00624     }
00625   for (int i = 0; i < (int) _all_states.size(); i++)
00626     {
00627     if (_all_states[i] != NULL)
00628       {
00629         _all_states[i]->fixTransitionDistribution();
00630         _all_states[i]->clearPredecessorSuccessor();
00631       }
00632     }
00633   for (int i = 0; i < (int) _all_states.size(); i++)
00634     {
00635       if (_all_states[i] != NULL)
00636         {
00637 
00638           for (int j = 0; j < (int) _all_states.size(); j++) {
00639             double transprob = exp(_all_states[i]->transition()->log_probability_of(j));
00640             if ((!close(transprob, 0.0, 1e-1)) &&  (_all_states[j] != NULL)) {
00641               _all_states[j]->addPredecessor(i);
00642               if((!_all_states[j]->isGeometricDuration())  && (!_all_states[i]->isGeometricDuration()))
00643                 {
00644                   std::cerr << "WARNING: Transitions between two non-geometric run-length states make viterbi decoding slow: " << _all_states[i]->name() << "->" << _all_states[j]->name() << ": "  << transprob << std::endl;
00645                 }
00646             }
00647 
00648             if ((_all_states[j] != NULL) && (!close(transprob,0.0, 1e-1))) {
00649               _all_states[i]->addSuccessor(j);
00650               if((!_all_states[i]->isGeometricDuration())  && (!_all_states[j]->isGeometricDuration()))
00651                 {
00652                   std::cerr << "WARNING: Transitions between two non-geometric run-length states make viterbi decoding slow: " << _all_states[i]->name() << "->" << _all_states[j]->name() << ": "  << transprob << std::endl;
00653                 }
00654             }
00655           }
00656         }
00657     }
00658 
00659 }
00660 
00661 
00663 double GeneralizedHiddenMarkovModel::_viterbi(const Sequence &s, Sequence &path,
00664                 Matrix & g) const {
00665   int size = s.size();
00666   int nstates = _all_states.size();
00667   initialize_prefix_sum_arrays(s);
00668 
00669   Matrix gamma(nstates, size);
00670   Matrix psi(nstates, size);
00671   Matrix psilen(nstates, size);
00672 
00673   // initialization
00674   for (int k = 0; k < nstates; k++) {
00675     gamma(k, 0) = getInitialProbabilities()->log_probability_of(k)
00676       + _all_states[k]->observation()->prefix_sum_array_compute(0, 0);
00677   }
00678 
00679   for(int i = 1; i < size; i++){
00680     for(int k = 0; k < nstates; k++){
00681       gamma(k, i) = - HUGE;
00682       for(int d = i; d > 0; d--){
00683         double gmax = gamma(0, i-d) + _all_states[0]->transition()->log_probability_of(k);
00684         int pmax = 0;
00685         for(int p = 1; p < nstates; p++){
00686           double g = gamma(p, i-d) + _all_states[p]->transition()->log_probability_of(k);
00687           if(gmax < g){
00688             gmax = g;
00689             pmax = p;
00690           }
00691         }
00692         gmax = gmax + _all_states[k]->duration_probability(d) + _all_states[k]->observation()->prefix_sum_array_compute(i-d+1, i);
00693         if(gamma(k, i) < gmax){
00694           gamma(k, i) = gmax;
00695           psi(k, i) = pmax;
00696           psilen(k, i) = d;
00697         }
00698       }
00699     }
00700   }
00701 
00702   //backtracing
00703   path.resize(size);
00704   int L = size-1;
00705   int state = 0;
00706   double max = gamma(0, L);
00707   for(int k = 1; k < nstates; k++){
00708     if(max < gamma(k, L)){
00709       max = gamma(k, L);
00710       state = k;
00711     }
00712   }
00713   while(L > 0){
00714     int d = psilen(state, L);
00715     int p = psi(state, L);
00716     for(int i = 0; i < d; i++){
00717       path[L] = state;
00718       L--;
00719     }
00720     state = p;
00721   }
00722   return max;
00723 }
00724 
00725 
00727 double GeneralizedHiddenMarkovModel::viterbi(const Sequence &s, Sequence &path,
00728                 Matrix & g) const {
00729   int size = s.size();
00730   int nstates = _all_states.size();
00731 
00732   initialize_prefix_sum_arrays(s);
00733 
00734   Matrix gamma(nstates, size);
00735   Matrix psi(nstates, size);
00736   Matrix psilen(nstates, size);
00737 
00738   std::map < int, std::list<int>  > valid_positions;
00739   std::list <int> e;
00740   for(int k = 0; k <nstates; k++)
00741       valid_positions[k] = e;
00742 
00743 
00744   std::vector<bool> possible_path;
00745   possible_path.resize(nstates);
00746 
00747   // initialization
00748   for (int k = 0; k < nstates; k++) {
00749       gamma(k, 0) = getInitialProbabilities()->log_probability_of(k)
00750           + _all_states[k]->observation()->prefix_sum_array_compute(0, 0);
00751       possible_path[k] = 1;
00752       if(gamma(k,0) <= -HUGE)
00753           {
00754               possible_path[k] = 0;
00755           }
00756   }
00757 
00758   for(int i = 0; i < (int)possible_path.size(); i++)
00759       {
00760           std::vector <int> next_states = _all_states[i]->successors();
00761           if(!possible_path[i])
00762               continue;
00763           for(int p = 0; p < (int)next_states.size(); p++)
00764               {
00765                   int succ = next_states[p];
00766                   if (!_all_states[succ]->isGeometricDuration()) {
00767                       (valid_positions.find(succ)->second).push_back(0);
00768                   }
00769               }
00770       }
00771 
00772 
00773   for(int i = 1; i < size; i++){
00774       for(int k = 0; k < nstates; k++){
00775 
00776           gamma(k, i) = -HUGE;
00777           possible_path[k] = 1;
00778           _all_states[k]->findBestPredecessor (gamma, psi, psilen,  s, i, _all_states, valid_positions);
00779           if(gamma(k,i) <= -HUGE)
00780               {
00781                   possible_path[k] = 0;
00782               }
00783 
00784       }
00785 
00786       for(int s = 0; s < (int)possible_path.size(); s++)
00787           {
00788               std::vector <int> next_states = _all_states[s]->successors();
00789               if(!possible_path[s])
00790                   continue;
00791               for(int p = 0; p < (int)next_states.size(); p++)
00792                   {
00793                       int succ = next_states[p];
00794                       if (!_all_states[succ]->isGeometricDuration()) {
00795                           (valid_positions.find(succ)->second).push_back(i);
00796                       }
00797                   }
00798           }
00799 
00800   }
00801   int L = size-1;
00802 
00803   if(_terminal_probabilities != NULL){
00804       for(int k = 0; k < nstates; k++)
00805           gamma(k,L) += _terminal_probabilities->log_probability_of(k);
00806   }
00807   //backtracing
00808   path.resize(size);
00809 
00810   int state = 0;
00811   double max = gamma(0, L);
00812   for(int k = 1; k < nstates; k++){
00813     if(max < gamma(k, L)){
00814         max = gamma(k, L) ;
00815         state = k;
00816     }
00817   }
00818 #if 0
00819   for(int i = 0; i < size; i++) {
00820       std::cerr << "i: " << i << std::endl;
00821       for(int k = 0; k < nstates; k++){
00822           std::cerr << " "  << getStateName(k) << " " << gamma(k, i) << " " << getStateName(psi(k, i)) << " " << psilen(k,i) << std::endl;
00823       }
00824 
00825   }
00826 #endif
00827   while(L > 0){
00828       int d = psilen(state, L);
00829       int p = psi(state, L);
00830       for(int i = 0; i < d; i++){
00831           path[L] = state;
00832           L--;
00833       }
00834       if(d == 0)
00835           {
00836               std::cerr << "Something wrong: [ predicted state duration equals to " << d << "]" << std::endl;
00837               break;
00838           }
00839       state = p;
00840   }
00841 #if 0
00842   for(int i = 0; i < size ; i ++) {
00843       std::cerr << "[" << i << "]" << std::endl; ;
00844       for(int k = 0; k < nstates; k++)
00845           {
00846 
00847               std::cerr << " " << _all_states[k] -> name() << " " << gamma(k,i) << " " << _all_states[psi(k, i)]->name() << " " << psilen(k, i) << std::endl;
00848           }
00849   }
00850 #endif
00851   return max;
00852 
00853 }
00854 
00856 Sequence & GeneralizedHiddenMarkovModel::chooseObservation(Sequence & h, int i,
00857                 int state) const {
00858   assert(state >= 0 && state < (int)_all_states.size());
00859   int d = _all_states[state]->chooseDuration();
00860   int iphase = _all_states[state]->getInputPhase();
00861   _all_states[state]->observation()->chooseWithHistory(h, i, iphase, d);
00862   return h;
00863 }
00864 
00866   int GeneralizedHiddenMarkovModel::chooseState(int state) const {
00867       return _all_states[state]->transition()->choose();
00868   }
00869 
00871   int GeneralizedHiddenMarkovModel::chooseFirstState() const {
00872     return _initial_probabilities->choose();
00873   }
00874 
00876   std::string GeneralizedHiddenMarkovModel::getStateName(int state) const {
00877     return _all_states[state]->name();
00878   }
00879 
00880 
00882   AlphabetPtr GeneralizedHiddenMarkovModel::getStateNames() const {
00883     return _state_names;
00884   }
00885 
00886   ProbabilisticModelCreatorPtr GeneralizedHiddenMarkovModel::getFactory() const {
00887     return GeneralizedHiddenMarkovModelCreatorPtr(
00888                                                   new GeneralizedHiddenMarkovModelCreator());
00889   }
00890 
00891 
00892 
00893 
00894   std::string GeneralizedHiddenMarkovModel::str() const {
00895     int nstates = getStateNames()->size();
00896     std::stringstream out;
00897     out << "model_name = \"" << model_name() << "\"" << std::endl;
00898     out << "state_names = (";
00899     int i = 0;
00900     if (i < nstates) {
00901       out << "\"" << getStateName(i) << "\"";
00902       for (i = 1; i < nstates; i++)
00903         out << ",\"" << getStateName(i) << "\"";
00904       out << ")" << std::endl;
00905     }
00906 
00907     out << "observation_symbols = (";
00908     out << "\"" << alphabet()->getSymbol(0)->name() << "\"";
00909     for (int i = 1; i < (int) alphabet()->size(); i++)
00910       out << ",\"" << alphabet()->getSymbol(i)->name() << "\"";
00911     out << ")" << std::endl;
00912 
00913     out << "transitions = (";
00914     if(!close(exp(_all_states[0]->transition()->log_probability_of(0)), 0, 1e-10))
00915       out << "\"" << getStateName(0) << "\" | \"" << getStateName(0) << "\": "
00916           << exp(_all_states[0]->transition()->log_probability_of(0));
00917     for (int i = 0; i < nstates; i++)
00918       for (int j = 0; j < nstates; j++)
00919         if (((i != 0) || (j != 0)) && (!close(exp( _all_states[i]->transition()->log_probability_of(j)), 0, 1e-10) ))
00920           out << ";\n \"" << getStateName(j) << "\" | \""
00921               << getStateName(i) << "\": " << exp( _all_states[i]->transition()->log_probability_of(j));
00922     out << ")" << std::endl;
00923 
00924     out << "initial_probabilities = (";
00925     if(!close(exp(_initial_probabilities->log_probability_of(0)), 0, 1e-10))
00926       out << "\"" << getStateName(0) << "\":  " << exp(_initial_probabilities->log_probability_of(0));
00927     for (int i = 1; i < nstates; i++)
00928       {
00929         if(!close(exp(_initial_probabilities->log_probability_of(i)), 0.0, 1e-10))
00930           out << ";\n \"" << getStateName(i) << "\": " << exp(
00931                                                               _initial_probabilities->log_probability_of(i));
00932       }
00933     out << ")" << std::endl;
00934 
00935     if(_terminal_probabilities != NULL) {
00936         out << "terminal_probabilities = (";
00937         if(!close(exp(_terminal_probabilities->log_probability_of(0)), 0, 1e-10))
00938             out << "\"" << getStateName(0) << "\":  " << exp(_terminal_probabilities->log_probability_of(0));
00939         for (int i = 1; i < nstates; i++)
00940             {
00941                 if(!close(exp(_terminal_probabilities->log_probability_of(i)), 0.0, 1e-10))
00942                     out << ";\n \"" << getStateName(i) << "\": " << exp(_terminal_probabilities->log_probability_of(i));
00943             }
00944         out << ")" << std::endl;
00945     }
00946 
00947     std::map <std::string, ProbabilisticModelPtr> ::const_iterator it;
00948 
00949     for(it = _models.begin(); it != _models.end(); it++)
00950       {
00951         out << it->first << " = [ "  << (it->second)->str() << " ] " << std::endl;
00952       }
00953 
00954     for (int i = 0; i < nstates; i++)
00955       out << _all_states[i]->str();
00956 
00957     return out.str();
00958   }
00959 
00960     void GeneralizedHiddenMarkovModel::buildDoubleParameterValue(DiscreteIIDModelPtr distr, ProbabilisticModelParameters & answer, const char * name) const
00961     {
00962         int nstates = getStateNames()->size();
00963         double sum = 0.0;
00964         std::vector <double> probs;
00965         probs.resize(nstates);
00966         for(int i = 0; i < nstates; i++){
00967             probs[i] = exp(distr->log_probability_of(i));
00968             sum += probs[i];
00969         }
00970         std::map <std::string, double> aux;
00971         std::stringstream out5;
00972         out5 << getStateName(0);
00973         aux[out5.str()] =  probs[0]/sum;
00974         for(int i = 0; i < nstates; i++)
00975             for(int j = 0; j < (int)alphabet()->size(); j++)
00976                 if((i != 0) || (j != 0)){
00977                     std::stringstream out6;
00978                     out6 << getStateName(i) ;
00979                     aux[out6.str()] = probs[i]/sum;
00980                 }
00981         answer.add(name, DoubleMapParameterValuePtr(new DoubleMapParameterValue(aux)));
00982 
00983     }
00984 
00985   ProbabilisticModelParameters GeneralizedHiddenMarkovModel::parameters() const
00986   {
00987     int nstates = getStateNames()->size();
00988     ProbabilisticModelParameters answer;
00989 
00990     answer.add("model_name", StringParameterValuePtr(new StringParameterValue(model_name())));
00991     answer.add("state_names", _state_names->getParameterValue());
00992     answer.add("observation_symbols", alphabet()->getParameterValue());
00993 
00994     std::map <std::string, double> trans;
00995     std::stringstream out;
00996     out << getStateName(0) << "|" << getStateName(0);
00997     trans[out.str()] =  exp(_all_states[0]->transition()->log_probability_of(0)) ;
00998     for(int i = 0; i < nstates; i++)
00999       for(int j = 0; j < nstates; j++)
01000         if((i != 0) || (j != 0)) {
01001           std::stringstream out2;
01002           out2 <<  getStateName(j) << "|" << getStateName(i) ;
01003           trans[out2.str()] = exp(_all_states[i]->transition()->log_probability_of(j));
01004         }
01005 
01006     answer.add("transitions", DoubleMapParameterValuePtr(new DoubleMapParameterValue(trans)));
01007 
01008     buildDoubleParameterValue(_initial_probabilities, answer, "initial_probabilities");
01009     if(_terminal_probabilities != NULL)
01010         buildDoubleParameterValue(_terminal_probabilities, answer, "terminal_probabilities");
01011     std::map <std::string, ProbabilisticModelPtr> ::const_iterator it;
01012 
01013     for(it = _models.begin(); it != _models.end(); it++)
01014       {
01015         answer.add(it->first, ProbabilisticModelParameterListValuePtr(new ProbabilisticModelParameterListValue((it->second)->parameters())));
01016       }
01017 
01018     for (int i = 0; i < nstates; i++)
01019       {
01020         answer.add(_all_states[i]->name(), ProbabilisticModelParameterListValuePtr(new ProbabilisticModelParameterListValue(_all_states[i]->parameters())));
01021       }
01022 
01023     return answer;
01024   }
01025 
01026   void GeneralizedHiddenMarkovModel::initialize(const ProbabilisticModelParameters & parameters)
01027   {
01028 
01029     ProbabilisticModelParameterValuePtr state_names_par =
01030       parameters.getMandatoryParameterValue("state_names");
01031     ProbabilisticModelParameterValuePtr initial_probabilities_par =
01032       parameters.getMandatoryParameterValue("initial_probabilities");
01033     ProbabilisticModelParameterValuePtr transitions_par =
01034       parameters.getMandatoryParameterValue("transitions");
01035     ProbabilisticModelParameterValuePtr observation_symbols_par =
01036       parameters.getMandatoryParameterValue("observation_symbols");
01037     ProbabilisticModelParameterValuePtr terminal_probabilities_par =
01038         parameters.getOptionalParameterValue("terminal_probabilities");
01039 
01040 
01041     std::vector<std::string> state_names = state_names_par->getStringVector();
01042     AlphabetPtr states = AlphabetPtr(new Alphabet());
01043     states->initializeFromVector(state_names);
01044     setStateNames(states);
01045     AlphabetPtr observation_symbols = AlphabetPtr(new Alphabet());
01046     observation_symbols->initializeFromVector(observation_symbols_par->getStringVector());
01047 
01048     DiscreteIIDModelPtr pi = DiscreteIIDModelPtr(new DiscreteIIDModel());
01049     pi->initializeFromMap(initial_probabilities_par->getDoubleMap(), states);
01050 
01051     if(terminal_probabilities_par != NULL){
01052         DiscreteIIDModelPtr terminalprob = DiscreteIIDModelPtr(new DiscreteIIDModel());
01053         terminalprob->initializeFromMap(terminal_probabilities_par->getDoubleMap(), states);
01054         setTerminalProbability(terminalprob);
01055     }
01056 
01057 
01058     std::map<std::string, double> transpar = transitions_par->getDoubleMap();
01059     std::map<std::string, DoubleVector> trans;
01060     std::map<std::string, double>::const_iterator it;
01061     boost::regex separator("\\|");
01062 
01063     for (it = transpar.begin(); it != transpar.end(); it++) {
01064       std::vector<std::string> splited;
01065       split_regex(it->first, splited, separator);
01066       if(splited.size() == 1) {
01067         splited.push_back("");
01068       }
01069       std::string from (splited[1]);
01070       std::string to ( splited[0]);
01071       if(!states->has(from) ) {
01072          std::cerr << "ERROR: The state " << from << " is not in state list !\n" << std::endl;
01073          exit(-1);
01074       }
01075       if(!states->has(to) ) {
01076          std::cerr << "ERROR: The state " << to << " is not in state list !\n" << std::endl;
01077          exit(-1);
01078       }
01079 
01080       if (trans.find(from) == trans.end()) {
01081         int id = states->getSymbol(to)->id();
01082         DoubleVector probs;
01083         probs.resize(states->size() );
01084         trans[from] = probs;
01085         if (id < (int) trans[from].size())
01086           trans[from][id] = it->second;
01087       } else {
01088         int id = states->getSymbol(to)->id();
01089         if (id < (int) trans[from].size())
01090           trans[from][id] = it->second;
01091       }
01092     }
01093 
01094     setAlphabet(observation_symbols);
01095     int nclasses = -1;
01096     for (int i = 0; i < (int) state_names.size(); i++) {
01097       trim_spaces(state_names[i]);
01098       int id;
01099       ProbabilisticModelParameterValuePtr statepar =
01100         parameters.getOptionalParameterValue(state_names[i]);
01101       if (statepar == NULL) {
01102         std::cerr << "ERROR: Missing configuration for the state: "
01103                   << state_names[i] << std::endl;
01104         return;
01105       }
01106       ConfigurationReader reader;
01107       std::string config = statepar->getString();
01108       if(config[0] == '[')
01109         config = config.substr(1, config.size() - 2);
01110       if (!reader.load(config)) {
01111         std::cerr << "ERROR: Configuring " << state_names[i] << "!" << std::endl;
01112         std::cerr << config << std::endl;
01113         return;
01114       } else {
01115         ProbabilisticModelParametersPtr statepars = reader.parameters();
01116         ProbabilisticModelParameterValuePtr observationpar =
01117           statepars->getOptionalParameterValue("observation");
01118         ProbabilisticModelParameterValuePtr durationpar =
01119           statepars->getOptionalParameterValue("duration");
01120         ProbabilisticModelParameterValuePtr lengthpar =
01121           statepars->getOptionalParameterValue("sequence_length");
01122         ProbabilisticModelParameterValuePtr outputphasepar =
01123           statepars->getOptionalParameterValue("output_phase");
01124         ProbabilisticModelParameterValuePtr inputphasepar =
01125           statepars->getOptionalParameterValue("input_phase");
01126 
01127   ProbabilisticModelParameterValuePtr extendEmissionpar =
01128           statepars->getOptionalParameterValue("extend_emission");
01129   ProbabilisticModelParameterValuePtr classespar =
01130     statepars->getOptionalParameterValue("classes");
01131 
01132 
01133         if (observationpar == NULL) {
01134           std::cerr << "ERROR: Missing  observation model for state "
01135                     << state_names[i] << std::endl;
01136           return;
01137         }
01138 
01139         std::string model_name = observationpar->getString();
01140         restore_model(model_name, parameters);
01141 
01142         DiscreteIIDModelPtr transition =
01143           DiscreteIIDModelPtr(
01144                                         new DiscreteIIDModel(
01145                                                                        trans[state_names[i]]));
01146 
01147         int iphase = -1;
01148         int ophase = -1;
01149   std::vector<int> classes;
01150   if(classespar != NULL){
01151     std::vector<double> cl = classespar->getDoubleVector();
01152     classes.resize(cl.size());
01153     for(int c = 0; c < (int)cl.size(); c++){
01154       classes[c] = (int)cl[c];
01155       if(classes[c] > nclasses)
01156         nclasses = classes[c];
01157     }
01158   }
01159         if(inputphasepar != NULL)
01160             iphase = inputphasepar->getInt();
01161         if(outputphasepar != NULL)
01162             ophase = outputphasepar->getInt();
01163         if (durationpar == NULL) {
01164             if (lengthpar == NULL) {
01165         id = configureGeometricDurationState(model_name, transition, state_names[i], iphase, ophase, classes);
01166             } else {
01167                 id = configureSignalState(model_name,
01168                                      transition,
01169                                      lengthpar->getDouble(),
01170             state_names[i], iphase, ophase, classes);
01171             }
01172 
01173         } else {
01174 
01175             // Explicit duration state
01176             std::string duration_model_name = durationpar->getString();
01177             restore_model(duration_model_name,  parameters);
01178             id = configureExplicitDurationState(model_name, transition, duration_model_name, state_names[i], iphase, ophase, classes);
01179             if(extendEmissionpar != NULL) {
01180                 int startEmissionOffset = (int)(extendEmissionpar->getDoubleVector())[0];
01181                 int endEmissionOffset = (int)(extendEmissionpar->getDoubleVector())[1];
01182                 _all_states[id]->setStart(startEmissionOffset);
01183                 _all_states[id]->setStop(endEmissionOffset);
01184             }
01185         }
01186       }
01187     }
01188     nclasses++;
01189     setNumClasses(nclasses);
01190     setObservationSymbols(observation_symbols);
01191     setInitialProbability(pi);
01192 
01193     fixStatesPredecessorSuccessor();
01194 
01195     for (int i = 0; i < (int)_all_states.size(); i++)
01196         if (_all_states[i]->transition()->size() <= 0)
01197             {
01198                 std::cerr << "ERROR: GHMM initialization, state " << _all_states[i]->name() << " has outdegree equal a zero !\n";
01199                 exit(-1);
01200             }
01201   }
01202 
01203 
01204 }
01205