ToPS
|
00001 /* 00002 * GeneralizedHiddenMarkovModel.cpp 00003 * 00004 * Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br> 00005 * Ígor Bonádio <ibonadio@ime.usp.br> 00006 * Vitor Onuchic <vitoronuchic@gmail.com> 00007 * Alan Mitchell Durham <aland@usp.br> 00008 * 00009 * This program is free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 3 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00022 * MA 02110-1301, USA. 00023 */ 00024 00025 #include "InhomogeneousFactorableModel.hpp" 00026 #include "GeneralizedHiddenMarkovModel.hpp" 00027 #include "GeneralizedHiddenMarkovModelCreator.hpp" 00028 #include "ProbabilisticModelCreatorClient.hpp" 00029 #include "Symbol.hpp" 00030 #include "GHMMStates.hpp" 00031 #include "util.hpp" 00032 #include <sys/types.h> 00033 00034 00035 namespace tops { 00036 00037 std::string GeneralizedHiddenMarkovModel::print_graph () const { 00038 std::stringstream out; 00039 for(int i = 0; i < (int)_all_states.size(); i++) 00040 { 00041 out << _all_states[i]->name() << " " << _all_states[i]->name() << std::endl; 00042 } 00043 out << "#" << std::endl; 00044 for(int i = 0; i < (int)_all_states.size(); i++) 00045 { 00046 for(int j = 0; j < (int)_all_states.size(); j++) 00047 { 00048 if(!close( exp(_all_states[i]->transition()->log_probability_of(j)),0.0, 1-1)) 00049 out << _all_states[i]->name() << " " << _all_states[j]->name() << " " << exp(_all_states[i]->transition()->log_probability_of(j)) << std::endl; 00050 } 00051 } 00052 return out.str(); 00053 } 00054 void GeneralizedHiddenMarkovModel::restore_model(std::string & model_name,const ProbabilisticModelParameters & parameters) { 00055 ProbabilisticModelParameterValuePtr modelpar = 00056 parameters.getOptionalParameterValue(model_name); 00057 if (modelpar == NULL) { 00058 std::cerr << "ERROR: Missing definition of the model " 00059 << model_name << std::endl; 00060 return; 00061 } 00062 if (_models.find(model_name) != _models.end()) { 00063 return; 00064 } 00065 00066 std::string cfg = modelpar->getString(); 00067 ProbabilisticModelCreatorClient creator; 00068 ConfigurationReader modelreader; 00069 if ((cfg.size()) > 0 && (cfg[0] == '[')) { 00070 cfg = cfg.substr(1, cfg.size() - 2); 00071 if (modelreader.load(cfg)) { 00072 ProbabilisticModelParameterValuePtr modelpar = (modelreader.parameters())->getOptionalParameterValue("model"); 00073 if(modelpar != NULL) 00074 { 00075 std::string submodelstr = modelpar->getString(); 00076 if ((submodelstr.size()) > 0 && (submodelstr[0] == '[')) { 00077 } else { 00078 restore_model(submodelstr, parameters); 00079 } 00080 } 00081 ProbabilisticModelPtr m = creator.create(*(modelreader.parameters()), _models); 00082 _models[model_name] = m; 00083 } else{ 00084 std::cerr << "/=======/\n" << cfg << "/========/" << std::endl; 00085 exit(-1); 00086 } 00087 00088 } 00089 else 00090 { 00091 00092 ProbabilisticModelPtr m = creator.create(cfg); 00093 if (m == NULL) { 00094 std::cerr << "Can not load model " << cfg << "! " << std::endl; 00095 return; 00096 } 00097 00098 _models[model_name] = m; 00099 } 00100 } 00101 00102 void GeneralizedHiddenMarkovModel::setStateNames(AlphabetPtr alphabet) { 00103 _state_names = alphabet; 00104 _all_states.resize(alphabet->size()); 00105 } 00106 void GeneralizedHiddenMarkovModel::setInitialProbability( 00107 DiscreteIIDModelPtr init) { 00108 _initial_probabilities = init; 00109 } 00110 00111 void GeneralizedHiddenMarkovModel::setNumClasses(int nclasses){ 00112 _nclasses = nclasses; 00113 } 00114 00115 int GeneralizedHiddenMarkovModel::getNumClasses(){ 00116 return _nclasses; 00117 } 00118 00119 void GeneralizedHiddenMarkovModel::setTerminalProbability( 00120 DiscreteIIDModelPtr term) { 00121 _terminal_probabilities = term; 00122 } 00123 00124 int GeneralizedHiddenMarkovModel::configureSignalState(std::string observation_model_name, 00125 DiscreteIIDModelPtr transition_distr, 00126 int size, std::string state_name, int iphase, int ophase, vector<int> classes){ 00127 SymbolPtr symbol = _state_names->getSymbol(state_name); 00128 ProbabilisticModelPtr model = _models[observation_model_name]; 00129 GHMMSignalStatePtr signal = GHMMSignalStatePtr(new GHMMSignalState(model,transition_distr, symbol)); 00130 signal->setSize(size); 00131 signal->observationModelName(observation_model_name); 00132 signal->setInputPhase(iphase); 00133 signal->setOutputPhase(ophase); 00134 signal->setClasses(classes); 00135 _all_states[symbol->id()] = signal; 00136 _signal_states.push_back(signal); 00137 return symbol->id(); 00138 } 00139 00140 int GeneralizedHiddenMarkovModel::configureGeometricDurationState(std::string model_name, 00141 DiscreteIIDModelPtr transition_distr, std::string state_name, int iphase, int ophase, vector<int> classes) { 00142 ProbabilisticModelPtr model = _models[model_name]; 00143 SymbolPtr symbol = _state_names->getSymbol(state_name); 00144 GHMMStatePtr state = GHMMStatePtr(new GHMMState(model, transition_distr, 00145 symbol)); 00146 state->setInputPhase(iphase); 00147 state->setOutputPhase(ophase); 00148 state->setClasses(classes); 00149 _all_states[symbol->id()] = state; 00150 state->observationModelName(model_name); 00151 _geometric_duration_states.push_back(state); 00152 return symbol->id(); 00153 } 00154 00155 00156 int GeneralizedHiddenMarkovModel::configureExplicitDurationState(std::string observation_model_name, DiscreteIIDModelPtr transition_distr, 00157 std::string duration_model_name, std::string state_name, int iphase, int ophase, vector<int> classes) 00158 { 00159 00160 ProbabilisticModelPtr model = _models[observation_model_name]; 00161 ProbabilisticModelPtr duration = _models[duration_model_name]; 00162 SymbolPtr symbol = _state_names->getSymbol(state_name); 00163 GHMMExplicitDurationStatePtr state = GHMMExplicitDurationStatePtr( 00164 new GHMMExplicitDurationState(model, transition_distr, symbol)); 00165 state->observationModelName(observation_model_name); 00166 state->durationModelName(duration_model_name); 00167 state->setDuration(duration); 00168 state->setInputPhase(iphase); 00169 state->setOutputPhase(ophase); 00170 state->setClasses(classes); 00171 _all_states[symbol->id()] = state; 00172 return symbol->id(); 00173 } 00174 00175 00176 void GeneralizedHiddenMarkovModel::initializeChoosePathAlgorithm(const Sequence &s) 00177 { 00178 if(s != _last) 00179 { 00180 forward(s, _alpha); 00181 _last = s; 00182 } 00183 } 00184 00185 void GeneralizedHiddenMarkovModel::choosePath(const Sequence &s, Sequence &path) { 00186 initializeChoosePathAlgorithm(s); 00187 int size = s.size(); 00188 int nstates =_all_states.size(); 00189 DoubleVector lastStateProbability(nstates); 00190 double sum = -HUGE; 00191 int L = size - 1; 00192 path.resize(s.size()); 00193 #if 0 00194 for(int i = 0; i < size; i++) 00195 { 00196 std::cerr << i << " " << std::endl; 00197 for (int j = 0; j < nstates; j++) 00198 { 00199 std::cerr << " " << _all_states[j]->name() << " " << _alpha(j, i) << std::endl; 00200 } 00201 } 00202 #endif 00203 for(int k = 0; k < nstates; k++) 00204 { 00205 sum = log_sum(sum, _alpha(k,L) ); 00206 } 00207 for(int k = 0; k < nstates; k++) 00208 { 00209 lastStateProbability[k] = exp(_alpha(k, L) - sum); 00210 } 00211 DiscreteIIDModelPtr m = DiscreteIIDModelPtr(new DiscreteIIDModel(lastStateProbability)); 00212 00213 int q = m->choose(); 00214 int position = L; 00215 00216 while (position > 0 ){ 00217 int new_position; 00218 int state; 00219 new_position = 0; 00220 state = q; 00221 _all_states[q]->choosePredecessor(_alpha, position, state, new_position, _all_states); 00222 for(int p = new_position + 1; p <= position; p++) 00223 { 00224 path[p] = q; 00225 } 00226 position = new_position; 00227 q = state; 00228 } 00229 } 00230 00231 float GeneralizedHiddenMarkovModel::MEAPred(const Sequence &s, Sequence &path){ 00232 fMatrix probs; 00233 int size = s.size(); 00234 int nstates = _all_states.size(); 00235 posteriorProbabilitiesNoClasses(s,probs); 00236 Matrix ea(size,nstates); 00237 IntMatrix ptrs(size,nstates); 00238 00239 //Initialization 00240 for(int k = 0; k < nstates; k++){ 00241 float prob = probs(0,k); 00242 ea(0,k) = prob; 00243 } 00244 00245 //Recursion 00246 for(int i = 1; i < size; i++){ 00247 for(int k = 0; k < nstates; k++){ 00248 float max = 0.0; 00249 int state = -1; 00250 std::vector<int> preds = _all_states[k]->predecessors(); 00251 preds.push_back(k); 00252 for(int p = 0; p < (int)preds.size(); p++){ 00253 int id = preds[p]; 00254 if(max < ea(i-1,id)){ 00255 max = ea(i-1,id); 00256 state = id; 00257 } 00258 } 00259 ea(i,k) = max + probs(i,k); 00260 ptrs(i,k) = state; 00261 } 00262 } 00263 00264 //Traceback 00265 path.resize(size); 00266 float mea = 0.0; 00267 int state = -1; 00268 for(int k = 0; k < nstates; k++){ 00269 if(ea(size-1,k) > mea){ 00270 mea = ea(size-1,k); 00271 state = k; 00272 } 00273 } 00274 path[size-1] = state; 00275 for(int i = size-1; i > 0; i--){ 00276 state = ptrs(i,path[i]); 00277 path[i-1] = state; 00278 } 00279 return mea/size; 00280 } 00281 00282 float GeneralizedHiddenMarkovModel::MEAPred(const Sequence &s, Sequence &path, SparseMatrixPtr ppPred){ 00283 fMatrix probs; 00284 ppPred->getfMatrixPred(probs); 00285 int size = s.size(); 00286 int nstates = _all_states.size(); 00287 Matrix ea(size,nstates); 00288 IntMatrix ptrs(size,nstates); 00289 00290 //Initialization 00291 for(int k = 0; k < nstates; k++){ 00292 ea(0,k) = probs(0,k); 00293 } 00294 00295 //Recursion 00296 for(int i = 1; i < size; i++){ 00297 for(int k = 0; k < nstates; k++){ 00298 float max = 0.0; 00299 int state = -1; 00300 std::vector<int> preds = _all_states[k]->predecessors(); 00301 preds.push_back(k); 00302 for(int p = 0; p < (int)preds.size(); p++){ 00303 int id = preds[p]; 00304 if(max < ea(i-1,id)){ 00305 max = ea(i-1,id); 00306 state = id; 00307 } 00308 } 00309 ea(i,k) = max + probs(i,k); 00310 ptrs(i,k) = state; 00311 } 00312 } 00313 00314 //Traceback 00315 path.resize(size); 00316 float mea = 0.0; 00317 int state = -1; 00318 for(int k = 0; k < nstates; k++){ 00319 if(ea(size-1,k) > mea){ 00320 mea = ea(size-1,k); 00321 state = k; 00322 } 00323 } 00324 path[size-1] = state; 00325 for(int i = size-1; i > 0; i--){ 00326 state = ptrs(i,path[i]); 00327 path[i-1] = state; 00328 } 00329 return mea/size; 00330 } 00331 00332 void GeneralizedHiddenMarkovModel::posteriorProbabilities(const Sequence &s, fMatrix &postProbs) const{ 00333 posteriorProbabilitiesNoClasses(s,postProbs); 00334 } 00335 00336 void GeneralizedHiddenMarkovModel::posteriorProbabilitiesNoClasses (const Sequence &s, fMatrix &postProbs) const{ 00337 int size = s.size(); 00338 int nstates = _all_states.size(); 00339 initialize_prefix_sum_arrays(s); 00340 00341 Matrix beta; 00342 double prob = backward(s,beta); 00343 00344 Matrix alpha (nstates, size); 00345 00346 postProbs.resize(size,nstates); 00347 00348 std::vector< std::list<int> > valid_positions; 00349 valid_positions.resize(nstates); 00350 std::list <int> e; 00351 for(int k = 0; k <nstates; k++) 00352 valid_positions[k] = e; 00353 00354 // initialization 00355 for (int c = 0; c < nstates; c++) { 00356 postProbs(0,c) = 0.0; 00357 } 00358 for (int k = 0; k < nstates; k++) { 00359 alpha(k, 0) = getInitialProbabilities()->log_probability_of(k) 00360 + _all_states[k]->observation()->prefix_sum_array_compute(0, 0, _all_states[k]->getInputPhase()) + _all_states[k]->duration_probability(1); 00361 if(alpha(k,0) > -HUGE){ 00362 postProbs(0,k) = exp(alpha(k,0) + beta(k,0) - prob); 00363 std::vector<int> succ = _all_states[k]->successors(); 00364 for(int p = 0; p < (int)succ.size(); p++){ 00365 int id = succ[p]; 00366 if(!_all_states[id]->isGeometricDuration()) 00367 valid_positions[id].push_back(0); 00368 } 00369 } 00370 } 00371 00372 for (int i = 1; i < size; i++) { 00373 for(int c = 0; c < nstates; c++) 00374 postProbs(i,c) = 0.0; 00375 for(int k = 0; k < nstates; k++) { 00376 _all_states[k]->posteriorSum(alpha, beta, postProbs, s, i, _all_states, valid_positions, prob, k); 00377 } 00378 } 00379 00380 /*double sum = alpha(0, size-1) + _terminal_probabilities->log_probability_of(0); 00381 for(int k = 1; k < nstates; k++) 00382 sum = log_sum(sum, alpha(k, size-1) + _terminal_probabilities->log_probability_of(k)); 00383 cout << "prob = " << prob << " postProb = " << sum << endl;*/ 00384 //cout << (probabilities.value_data()).size() << " " << postProbs.size1()*postProbs.size2() << endl; 00385 } 00386 00387 void GeneralizedHiddenMarkovModel::posteriorProbabilitiesWithClasses (const Sequence &s, SparseMatrixPtr probabilities) const{ 00388 int size = s.size(); 00389 int nstates = _all_states.size(); 00390 initialize_prefix_sum_arrays(s); 00391 00392 Matrix beta; 00393 double prob = backward(s,beta); 00394 00395 Matrix alpha (nstates, size); 00396 00397 fMatrix postProbs(size,_nclasses); 00398 00399 std::vector< std::list<int> > valid_positions; 00400 valid_positions.resize(nstates); 00401 std::list <int> e; 00402 for(int k = 0; k <nstates; k++) 00403 valid_positions[k] = e; 00404 00405 // initialization 00406 for (int c = 0; c < _nclasses; c++) { 00407 postProbs(0,c) = 0.0; 00408 } 00409 for (int k = 0; k < nstates; k++) { 00410 alpha(k, 0) = getInitialProbabilities()->log_probability_of(k) 00411 + _all_states[k]->observation()->prefix_sum_array_compute(0, 0, _all_states[k]->getInputPhase()) + _all_states[k]->duration_probability(1); 00412 if(alpha(k,0) > -HUGE){ 00413 for(int c = 0; c < (int)(_all_states[k]->classes()).size(); c++) 00414 postProbs(0,_all_states[k]->classes()[c]) = exp(alpha(k,0) + beta(k,0) - prob); 00415 std::vector<int> succ = _all_states[k]->successors(); 00416 for(int p = 0; p < (int)succ.size(); p++){ 00417 int id = succ[p]; 00418 if(!_all_states[id]->isGeometricDuration()) 00419 valid_positions[id].push_back(0); 00420 } 00421 } 00422 } 00423 00424 for (int i = 1; i < size; i++) { 00425 for(int c = 0; c < _nclasses; c++) 00426 postProbs(i,c) = 0.0; 00427 for(int k = 0; k < nstates; k++) { 00428 _all_states[k]->posteriorSum(alpha, beta, postProbs, s, i, _all_states, valid_positions, prob, -1); 00429 } 00430 } 00431 00432 probabilities->buildPredMatrix(size,_nclasses,postProbs); 00433 00434 00435 /*double sum = alpha(0, size-1) + _terminal_probabilities->log_probability_of(0); 00436 for(int k = 1; k < nstates; k++) 00437 sum = log_sum(sum, alpha(k, size-1) + _terminal_probabilities->log_probability_of(k)); 00438 00439 cout << "prob = " << prob << " postProb = " << sum << endl;*/ 00440 //cout << (probabilities.value_data()).size() << " " << postProbs.size1()*postProbs.size2() << endl; 00441 } 00442 00443 00444 double GeneralizedHiddenMarkovModel::forward(const Sequence & s, Matrix &a) const { 00445 int size = s.size(); 00446 int nstates = _all_states.size(); 00447 initialize_prefix_sum_arrays(s); 00448 00449 Matrix alpha (nstates, size); 00450 00451 std::vector< std::list<int> > valid_positions; 00452 valid_positions.resize(nstates); 00453 std::list <int> e; 00454 for(int k = 0; k <nstates; k++) 00455 valid_positions[k] = e; 00456 00457 // initialization 00458 for (int k = 0; k < nstates; k++) { 00459 alpha(k, 0) = getInitialProbabilities()->log_probability_of(k) 00460 + _all_states[k]->observation()->prefix_sum_array_compute(0, 0, _all_states[k]->getInputPhase()) + _all_states[k]->duration_probability(1); 00461 if(alpha(k,0) > -HUGE){ 00462 std::vector<int> succ = _all_states[k]->successors(); 00463 for(int p = 0; p < (int)succ.size(); p++){ 00464 int id = succ[p]; 00465 if(!_all_states[id]->isGeometricDuration()) 00466 valid_positions[id].push_back(0); 00467 } 00468 } 00469 } 00470 00471 for (int i = 1; i < size; i++) { 00472 for(int k = 0; k < nstates; k++) { 00473 _all_states[k]->forwardSum(alpha, s, i, _all_states, valid_positions); 00474 } 00475 } 00476 00477 a = alpha; 00478 double sum = alpha(0, size-1) + _terminal_probabilities->log_probability_of(0); 00479 for(int k = 1; k < nstates; k++) 00480 sum = log_sum(sum, alpha(k, size-1) + _terminal_probabilities->log_probability_of(k)); 00481 00482 return sum; 00483 } 00484 00485 00486 00487 double GeneralizedHiddenMarkovModel::inefficient_forward(const Sequence & s, Matrix &a) const { 00488 int size = s.size(); 00489 int nstates = _all_states.size(); 00490 initialize_prefix_sum_arrays(s); 00491 00492 Matrix alpha (nstates, size); 00493 00494 // initialization 00495 for (int k = 0; k < nstates; k++) { 00496 alpha(k, 0) = getInitialProbabilities()->log_probability_of(k) + _all_states[k]->observation()->prefix_sum_array_compute(0, 0); 00497 } 00498 00499 for (int i = 1; i < size; i++) { 00500 printf("i = %d\n",i); 00501 for(int k = 0; k < nstates; k++) { 00502 alpha(k, i) = -HUGE; 00503 for(int d = i; d > 0; d--){ 00504 int nphase = _all_states[k]->getInputPhase(); 00505 if(_all_states[k]->observation()->inhomogeneous() != NULL) 00506 nphase = _all_states[k]->observation()->inhomogeneous()->maximumTimeValue() + 1; 00507 if(_all_states[k]->getStart() > 0 && _all_states[k]->getStop() > 0) { 00508 if((i-d+1-_all_states[k]->getStart() >= 0) && (i + _all_states[k]->getStop() < (int)s.size())) { 00509 double joinable = _all_states[k]->observation()->prefix_sum_array_compute(i-d+1-_all_states[k]->getStart(),i+_all_states[k]->getStop(), mod(_all_states[k]->getInputPhase()-_all_states[k]->getStart(), nphase)); 00510 if(joinable <= -HUGE) { 00511 continue; 00512 } 00513 } 00514 } 00515 for(int p = 0; p < nstates; p++){ 00516 alpha(k, i) = log_sum(alpha(k, i), alpha(p, i-d) 00517 + _all_states[p]->transition()->log_probability_of(k) 00518 + _all_states[k]->duration_probability(d) 00519 + _all_states[k]->observation()->prefix_sum_array_compute(i-d+1, i, _all_states[k]->getInputPhase())); 00520 } 00521 } 00522 } 00523 } 00524 00525 a = alpha; 00526 00527 double sum = alpha(0, size-1) + _terminal_probabilities->log_probability_of(0); 00528 for(int k = 1; k < nstates; k++) 00529 sum = log_sum(sum, alpha(k, size-1) + _terminal_probabilities->log_probability_of(k)); 00530 00531 printf("forward: %f\n\n", sum); 00532 00533 return sum; 00534 00535 } 00536 00537 00538 00539 00541 double GeneralizedHiddenMarkovModel::backward(const Sequence & s, Matrix &b) const { 00542 int size = s.size(); 00543 int nstates = _all_states.size(); 00544 initialize_prefix_sum_arrays(s); 00545 00546 Matrix beta (nstates, size); 00547 00548 std::vector< std::list<int> > valid_positions; 00549 valid_positions.resize(nstates); 00550 std::list <int> e; 00551 for(int k = 0; k <nstates; k++) 00552 valid_positions[k] = e; 00553 00554 // initialization 00555 for (int k = 0; k < nstates; k++) { 00556 beta(k, size-1) = _terminal_probabilities->log_probability_of(k); 00557 if(beta(k,size-1) > -HUGE){ 00558 if(!_all_states[k]->isGeometricDuration()) 00559 valid_positions[k].push_back(size-1); 00560 } 00561 } 00562 for (int i = size-2; i >= 0; i--){ 00563 for (int k = 0; k < nstates; k++){ 00564 beta(k,i) = -HUGE; 00565 for(int p = 0; p < (int)(_all_states[k]->successors()).size(); p++){ 00566 int id = _all_states[k]->successors()[p]; 00567 beta(k,i) = log_sum(beta(k,i), _all_states[k]->transition()->log_probability_of(id) + _all_states[id]->backwardSum(beta,s,i,valid_positions)); 00568 } 00569 if(beta(k,i) > -HUGE){ 00570 if(!_all_states[k]->isGeometricDuration()) 00571 valid_positions[k].push_front(i); 00572 } 00573 } 00574 } 00575 00576 b = beta; 00577 00578 double sum = -HUGE; 00579 for(int k = 0; k < nstates; k++) 00580 sum = log_sum(sum, getInitialProbabilities()->log_probability_of(k) + _all_states[k]->backwardSum(beta,s,-1,valid_positions)) ; 00581 00582 return sum; 00583 } 00584 00585 void GeneralizedHiddenMarkovModel::initialize_prefix_sum_arrays(const Sequence & s) const { 00586 #if 0 00587 struct timeval start, stop; 00588 gettimeofday(&start, (struct timezone *) NULL); 00589 #endif 00590 00591 for (int i = 0; i < (int) _all_states.size(); i++) { 00592 _all_states[i]->observation()->initialize_prefix_sum_array(s); 00593 } 00594 #if 0 00595 gettimeofday(&stop, (struct timezone *)NULL); 00596 stop.tv_sec -= start.tv_sec; 00597 stop.tv_usec -= start.tv_usec; 00598 if(stop.tv_usec < 0){ 00599 stop.tv_sec --; 00600 stop.tv_usec += 1000000; 00601 } 00602 fprintf(stderr, "PSA Elapsed time %ld%c%02d seconds\n", stop.tv_sec, '.', stop.tv_usec/1000); 00603 #endif 00604 00605 } 00606 00607 00608 00609 void GeneralizedHiddenMarkovModel::fixStatesPredecessorSuccessor() { 00610 for (int i = 0; i < (int) _all_states.size(); i++) 00611 { 00612 if (_all_states[i] != NULL) 00613 { 00614 for (int j = 0; j < (int) _all_states.size(); j++) { 00615 if ((!close(exp(_all_states[i]->transition()->log_probability_of(j)), 0.0, 1e-1)) && (_all_states[j] != NULL)) { 00616 _all_states[j]->addPredecessor(i); 00617 } 00618 00619 if ((_all_states[j] != NULL) && (!close(exp( _all_states[i]->transition()->log_probability_of(j)), 00620 0.0, 1e-1))) 00621 _all_states[i]->addSuccessor(j); 00622 } 00623 } 00624 } 00625 for (int i = 0; i < (int) _all_states.size(); i++) 00626 { 00627 if (_all_states[i] != NULL) 00628 { 00629 _all_states[i]->fixTransitionDistribution(); 00630 _all_states[i]->clearPredecessorSuccessor(); 00631 } 00632 } 00633 for (int i = 0; i < (int) _all_states.size(); i++) 00634 { 00635 if (_all_states[i] != NULL) 00636 { 00637 00638 for (int j = 0; j < (int) _all_states.size(); j++) { 00639 double transprob = exp(_all_states[i]->transition()->log_probability_of(j)); 00640 if ((!close(transprob, 0.0, 1e-1)) && (_all_states[j] != NULL)) { 00641 _all_states[j]->addPredecessor(i); 00642 if((!_all_states[j]->isGeometricDuration()) && (!_all_states[i]->isGeometricDuration())) 00643 { 00644 std::cerr << "WARNING: Transitions between two non-geometric run-length states make viterbi decoding slow: " << _all_states[i]->name() << "->" << _all_states[j]->name() << ": " << transprob << std::endl; 00645 } 00646 } 00647 00648 if ((_all_states[j] != NULL) && (!close(transprob,0.0, 1e-1))) { 00649 _all_states[i]->addSuccessor(j); 00650 if((!_all_states[i]->isGeometricDuration()) && (!_all_states[j]->isGeometricDuration())) 00651 { 00652 std::cerr << "WARNING: Transitions between two non-geometric run-length states make viterbi decoding slow: " << _all_states[i]->name() << "->" << _all_states[j]->name() << ": " << transprob << std::endl; 00653 } 00654 } 00655 } 00656 } 00657 } 00658 00659 } 00660 00661 00663 double GeneralizedHiddenMarkovModel::_viterbi(const Sequence &s, Sequence &path, 00664 Matrix & g) const { 00665 int size = s.size(); 00666 int nstates = _all_states.size(); 00667 initialize_prefix_sum_arrays(s); 00668 00669 Matrix gamma(nstates, size); 00670 Matrix psi(nstates, size); 00671 Matrix psilen(nstates, size); 00672 00673 // initialization 00674 for (int k = 0; k < nstates; k++) { 00675 gamma(k, 0) = getInitialProbabilities()->log_probability_of(k) 00676 + _all_states[k]->observation()->prefix_sum_array_compute(0, 0); 00677 } 00678 00679 for(int i = 1; i < size; i++){ 00680 for(int k = 0; k < nstates; k++){ 00681 gamma(k, i) = - HUGE; 00682 for(int d = i; d > 0; d--){ 00683 double gmax = gamma(0, i-d) + _all_states[0]->transition()->log_probability_of(k); 00684 int pmax = 0; 00685 for(int p = 1; p < nstates; p++){ 00686 double g = gamma(p, i-d) + _all_states[p]->transition()->log_probability_of(k); 00687 if(gmax < g){ 00688 gmax = g; 00689 pmax = p; 00690 } 00691 } 00692 gmax = gmax + _all_states[k]->duration_probability(d) + _all_states[k]->observation()->prefix_sum_array_compute(i-d+1, i); 00693 if(gamma(k, i) < gmax){ 00694 gamma(k, i) = gmax; 00695 psi(k, i) = pmax; 00696 psilen(k, i) = d; 00697 } 00698 } 00699 } 00700 } 00701 00702 //backtracing 00703 path.resize(size); 00704 int L = size-1; 00705 int state = 0; 00706 double max = gamma(0, L); 00707 for(int k = 1; k < nstates; k++){ 00708 if(max < gamma(k, L)){ 00709 max = gamma(k, L); 00710 state = k; 00711 } 00712 } 00713 while(L > 0){ 00714 int d = psilen(state, L); 00715 int p = psi(state, L); 00716 for(int i = 0; i < d; i++){ 00717 path[L] = state; 00718 L--; 00719 } 00720 state = p; 00721 } 00722 return max; 00723 } 00724 00725 00727 double GeneralizedHiddenMarkovModel::viterbi(const Sequence &s, Sequence &path, 00728 Matrix & g) const { 00729 int size = s.size(); 00730 int nstates = _all_states.size(); 00731 00732 initialize_prefix_sum_arrays(s); 00733 00734 Matrix gamma(nstates, size); 00735 Matrix psi(nstates, size); 00736 Matrix psilen(nstates, size); 00737 00738 std::map < int, std::list<int> > valid_positions; 00739 std::list <int> e; 00740 for(int k = 0; k <nstates; k++) 00741 valid_positions[k] = e; 00742 00743 00744 std::vector<bool> possible_path; 00745 possible_path.resize(nstates); 00746 00747 // initialization 00748 for (int k = 0; k < nstates; k++) { 00749 gamma(k, 0) = getInitialProbabilities()->log_probability_of(k) 00750 + _all_states[k]->observation()->prefix_sum_array_compute(0, 0); 00751 possible_path[k] = 1; 00752 if(gamma(k,0) <= -HUGE) 00753 { 00754 possible_path[k] = 0; 00755 } 00756 } 00757 00758 for(int i = 0; i < (int)possible_path.size(); i++) 00759 { 00760 std::vector <int> next_states = _all_states[i]->successors(); 00761 if(!possible_path[i]) 00762 continue; 00763 for(int p = 0; p < (int)next_states.size(); p++) 00764 { 00765 int succ = next_states[p]; 00766 if (!_all_states[succ]->isGeometricDuration()) { 00767 (valid_positions.find(succ)->second).push_back(0); 00768 } 00769 } 00770 } 00771 00772 00773 for(int i = 1; i < size; i++){ 00774 for(int k = 0; k < nstates; k++){ 00775 00776 gamma(k, i) = -HUGE; 00777 possible_path[k] = 1; 00778 _all_states[k]->findBestPredecessor (gamma, psi, psilen, s, i, _all_states, valid_positions); 00779 if(gamma(k,i) <= -HUGE) 00780 { 00781 possible_path[k] = 0; 00782 } 00783 00784 } 00785 00786 for(int s = 0; s < (int)possible_path.size(); s++) 00787 { 00788 std::vector <int> next_states = _all_states[s]->successors(); 00789 if(!possible_path[s]) 00790 continue; 00791 for(int p = 0; p < (int)next_states.size(); p++) 00792 { 00793 int succ = next_states[p]; 00794 if (!_all_states[succ]->isGeometricDuration()) { 00795 (valid_positions.find(succ)->second).push_back(i); 00796 } 00797 } 00798 } 00799 00800 } 00801 int L = size-1; 00802 00803 if(_terminal_probabilities != NULL){ 00804 for(int k = 0; k < nstates; k++) 00805 gamma(k,L) += _terminal_probabilities->log_probability_of(k); 00806 } 00807 //backtracing 00808 path.resize(size); 00809 00810 int state = 0; 00811 double max = gamma(0, L); 00812 for(int k = 1; k < nstates; k++){ 00813 if(max < gamma(k, L)){ 00814 max = gamma(k, L) ; 00815 state = k; 00816 } 00817 } 00818 #if 0 00819 for(int i = 0; i < size; i++) { 00820 std::cerr << "i: " << i << std::endl; 00821 for(int k = 0; k < nstates; k++){ 00822 std::cerr << " " << getStateName(k) << " " << gamma(k, i) << " " << getStateName(psi(k, i)) << " " << psilen(k,i) << std::endl; 00823 } 00824 00825 } 00826 #endif 00827 while(L > 0){ 00828 int d = psilen(state, L); 00829 int p = psi(state, L); 00830 for(int i = 0; i < d; i++){ 00831 path[L] = state; 00832 L--; 00833 } 00834 if(d == 0) 00835 { 00836 std::cerr << "Something wrong: [ predicted state duration equals to " << d << "]" << std::endl; 00837 break; 00838 } 00839 state = p; 00840 } 00841 #if 0 00842 for(int i = 0; i < size ; i ++) { 00843 std::cerr << "[" << i << "]" << std::endl; ; 00844 for(int k = 0; k < nstates; k++) 00845 { 00846 00847 std::cerr << " " << _all_states[k] -> name() << " " << gamma(k,i) << " " << _all_states[psi(k, i)]->name() << " " << psilen(k, i) << std::endl; 00848 } 00849 } 00850 #endif 00851 return max; 00852 00853 } 00854 00856 Sequence & GeneralizedHiddenMarkovModel::chooseObservation(Sequence & h, int i, 00857 int state) const { 00858 assert(state >= 0 && state < (int)_all_states.size()); 00859 int d = _all_states[state]->chooseDuration(); 00860 int iphase = _all_states[state]->getInputPhase(); 00861 _all_states[state]->observation()->chooseWithHistory(h, i, iphase, d); 00862 return h; 00863 } 00864 00866 int GeneralizedHiddenMarkovModel::chooseState(int state) const { 00867 return _all_states[state]->transition()->choose(); 00868 } 00869 00871 int GeneralizedHiddenMarkovModel::chooseFirstState() const { 00872 return _initial_probabilities->choose(); 00873 } 00874 00876 std::string GeneralizedHiddenMarkovModel::getStateName(int state) const { 00877 return _all_states[state]->name(); 00878 } 00879 00880 00882 AlphabetPtr GeneralizedHiddenMarkovModel::getStateNames() const { 00883 return _state_names; 00884 } 00885 00886 ProbabilisticModelCreatorPtr GeneralizedHiddenMarkovModel::getFactory() const { 00887 return GeneralizedHiddenMarkovModelCreatorPtr( 00888 new GeneralizedHiddenMarkovModelCreator()); 00889 } 00890 00891 00892 00893 00894 std::string GeneralizedHiddenMarkovModel::str() const { 00895 int nstates = getStateNames()->size(); 00896 std::stringstream out; 00897 out << "model_name = \"" << model_name() << "\"" << std::endl; 00898 out << "state_names = ("; 00899 int i = 0; 00900 if (i < nstates) { 00901 out << "\"" << getStateName(i) << "\""; 00902 for (i = 1; i < nstates; i++) 00903 out << ",\"" << getStateName(i) << "\""; 00904 out << ")" << std::endl; 00905 } 00906 00907 out << "observation_symbols = ("; 00908 out << "\"" << alphabet()->getSymbol(0)->name() << "\""; 00909 for (int i = 1; i < (int) alphabet()->size(); i++) 00910 out << ",\"" << alphabet()->getSymbol(i)->name() << "\""; 00911 out << ")" << std::endl; 00912 00913 out << "transitions = ("; 00914 if(!close(exp(_all_states[0]->transition()->log_probability_of(0)), 0, 1e-10)) 00915 out << "\"" << getStateName(0) << "\" | \"" << getStateName(0) << "\": " 00916 << exp(_all_states[0]->transition()->log_probability_of(0)); 00917 for (int i = 0; i < nstates; i++) 00918 for (int j = 0; j < nstates; j++) 00919 if (((i != 0) || (j != 0)) && (!close(exp( _all_states[i]->transition()->log_probability_of(j)), 0, 1e-10) )) 00920 out << ";\n \"" << getStateName(j) << "\" | \"" 00921 << getStateName(i) << "\": " << exp( _all_states[i]->transition()->log_probability_of(j)); 00922 out << ")" << std::endl; 00923 00924 out << "initial_probabilities = ("; 00925 if(!close(exp(_initial_probabilities->log_probability_of(0)), 0, 1e-10)) 00926 out << "\"" << getStateName(0) << "\": " << exp(_initial_probabilities->log_probability_of(0)); 00927 for (int i = 1; i < nstates; i++) 00928 { 00929 if(!close(exp(_initial_probabilities->log_probability_of(i)), 0.0, 1e-10)) 00930 out << ";\n \"" << getStateName(i) << "\": " << exp( 00931 _initial_probabilities->log_probability_of(i)); 00932 } 00933 out << ")" << std::endl; 00934 00935 if(_terminal_probabilities != NULL) { 00936 out << "terminal_probabilities = ("; 00937 if(!close(exp(_terminal_probabilities->log_probability_of(0)), 0, 1e-10)) 00938 out << "\"" << getStateName(0) << "\": " << exp(_terminal_probabilities->log_probability_of(0)); 00939 for (int i = 1; i < nstates; i++) 00940 { 00941 if(!close(exp(_terminal_probabilities->log_probability_of(i)), 0.0, 1e-10)) 00942 out << ";\n \"" << getStateName(i) << "\": " << exp(_terminal_probabilities->log_probability_of(i)); 00943 } 00944 out << ")" << std::endl; 00945 } 00946 00947 std::map <std::string, ProbabilisticModelPtr> ::const_iterator it; 00948 00949 for(it = _models.begin(); it != _models.end(); it++) 00950 { 00951 out << it->first << " = [ " << (it->second)->str() << " ] " << std::endl; 00952 } 00953 00954 for (int i = 0; i < nstates; i++) 00955 out << _all_states[i]->str(); 00956 00957 return out.str(); 00958 } 00959 00960 void GeneralizedHiddenMarkovModel::buildDoubleParameterValue(DiscreteIIDModelPtr distr, ProbabilisticModelParameters & answer, const char * name) const 00961 { 00962 int nstates = getStateNames()->size(); 00963 double sum = 0.0; 00964 std::vector <double> probs; 00965 probs.resize(nstates); 00966 for(int i = 0; i < nstates; i++){ 00967 probs[i] = exp(distr->log_probability_of(i)); 00968 sum += probs[i]; 00969 } 00970 std::map <std::string, double> aux; 00971 std::stringstream out5; 00972 out5 << getStateName(0); 00973 aux[out5.str()] = probs[0]/sum; 00974 for(int i = 0; i < nstates; i++) 00975 for(int j = 0; j < (int)alphabet()->size(); j++) 00976 if((i != 0) || (j != 0)){ 00977 std::stringstream out6; 00978 out6 << getStateName(i) ; 00979 aux[out6.str()] = probs[i]/sum; 00980 } 00981 answer.add(name, DoubleMapParameterValuePtr(new DoubleMapParameterValue(aux))); 00982 00983 } 00984 00985 ProbabilisticModelParameters GeneralizedHiddenMarkovModel::parameters() const 00986 { 00987 int nstates = getStateNames()->size(); 00988 ProbabilisticModelParameters answer; 00989 00990 answer.add("model_name", StringParameterValuePtr(new StringParameterValue(model_name()))); 00991 answer.add("state_names", _state_names->getParameterValue()); 00992 answer.add("observation_symbols", alphabet()->getParameterValue()); 00993 00994 std::map <std::string, double> trans; 00995 std::stringstream out; 00996 out << getStateName(0) << "|" << getStateName(0); 00997 trans[out.str()] = exp(_all_states[0]->transition()->log_probability_of(0)) ; 00998 for(int i = 0; i < nstates; i++) 00999 for(int j = 0; j < nstates; j++) 01000 if((i != 0) || (j != 0)) { 01001 std::stringstream out2; 01002 out2 << getStateName(j) << "|" << getStateName(i) ; 01003 trans[out2.str()] = exp(_all_states[i]->transition()->log_probability_of(j)); 01004 } 01005 01006 answer.add("transitions", DoubleMapParameterValuePtr(new DoubleMapParameterValue(trans))); 01007 01008 buildDoubleParameterValue(_initial_probabilities, answer, "initial_probabilities"); 01009 if(_terminal_probabilities != NULL) 01010 buildDoubleParameterValue(_terminal_probabilities, answer, "terminal_probabilities"); 01011 std::map <std::string, ProbabilisticModelPtr> ::const_iterator it; 01012 01013 for(it = _models.begin(); it != _models.end(); it++) 01014 { 01015 answer.add(it->first, ProbabilisticModelParameterListValuePtr(new ProbabilisticModelParameterListValue((it->second)->parameters()))); 01016 } 01017 01018 for (int i = 0; i < nstates; i++) 01019 { 01020 answer.add(_all_states[i]->name(), ProbabilisticModelParameterListValuePtr(new ProbabilisticModelParameterListValue(_all_states[i]->parameters()))); 01021 } 01022 01023 return answer; 01024 } 01025 01026 void GeneralizedHiddenMarkovModel::initialize(const ProbabilisticModelParameters & parameters) 01027 { 01028 01029 ProbabilisticModelParameterValuePtr state_names_par = 01030 parameters.getMandatoryParameterValue("state_names"); 01031 ProbabilisticModelParameterValuePtr initial_probabilities_par = 01032 parameters.getMandatoryParameterValue("initial_probabilities"); 01033 ProbabilisticModelParameterValuePtr transitions_par = 01034 parameters.getMandatoryParameterValue("transitions"); 01035 ProbabilisticModelParameterValuePtr observation_symbols_par = 01036 parameters.getMandatoryParameterValue("observation_symbols"); 01037 ProbabilisticModelParameterValuePtr terminal_probabilities_par = 01038 parameters.getOptionalParameterValue("terminal_probabilities"); 01039 01040 01041 std::vector<std::string> state_names = state_names_par->getStringVector(); 01042 AlphabetPtr states = AlphabetPtr(new Alphabet()); 01043 states->initializeFromVector(state_names); 01044 setStateNames(states); 01045 AlphabetPtr observation_symbols = AlphabetPtr(new Alphabet()); 01046 observation_symbols->initializeFromVector(observation_symbols_par->getStringVector()); 01047 01048 DiscreteIIDModelPtr pi = DiscreteIIDModelPtr(new DiscreteIIDModel()); 01049 pi->initializeFromMap(initial_probabilities_par->getDoubleMap(), states); 01050 01051 if(terminal_probabilities_par != NULL){ 01052 DiscreteIIDModelPtr terminalprob = DiscreteIIDModelPtr(new DiscreteIIDModel()); 01053 terminalprob->initializeFromMap(terminal_probabilities_par->getDoubleMap(), states); 01054 setTerminalProbability(terminalprob); 01055 } 01056 01057 01058 std::map<std::string, double> transpar = transitions_par->getDoubleMap(); 01059 std::map<std::string, DoubleVector> trans; 01060 std::map<std::string, double>::const_iterator it; 01061 boost::regex separator("\\|"); 01062 01063 for (it = transpar.begin(); it != transpar.end(); it++) { 01064 std::vector<std::string> splited; 01065 split_regex(it->first, splited, separator); 01066 if(splited.size() == 1) { 01067 splited.push_back(""); 01068 } 01069 std::string from (splited[1]); 01070 std::string to ( splited[0]); 01071 if(!states->has(from) ) { 01072 std::cerr << "ERROR: The state " << from << " is not in state list !\n" << std::endl; 01073 exit(-1); 01074 } 01075 if(!states->has(to) ) { 01076 std::cerr << "ERROR: The state " << to << " is not in state list !\n" << std::endl; 01077 exit(-1); 01078 } 01079 01080 if (trans.find(from) == trans.end()) { 01081 int id = states->getSymbol(to)->id(); 01082 DoubleVector probs; 01083 probs.resize(states->size() ); 01084 trans[from] = probs; 01085 if (id < (int) trans[from].size()) 01086 trans[from][id] = it->second; 01087 } else { 01088 int id = states->getSymbol(to)->id(); 01089 if (id < (int) trans[from].size()) 01090 trans[from][id] = it->second; 01091 } 01092 } 01093 01094 setAlphabet(observation_symbols); 01095 int nclasses = -1; 01096 for (int i = 0; i < (int) state_names.size(); i++) { 01097 trim_spaces(state_names[i]); 01098 int id; 01099 ProbabilisticModelParameterValuePtr statepar = 01100 parameters.getOptionalParameterValue(state_names[i]); 01101 if (statepar == NULL) { 01102 std::cerr << "ERROR: Missing configuration for the state: " 01103 << state_names[i] << std::endl; 01104 return; 01105 } 01106 ConfigurationReader reader; 01107 std::string config = statepar->getString(); 01108 if(config[0] == '[') 01109 config = config.substr(1, config.size() - 2); 01110 if (!reader.load(config)) { 01111 std::cerr << "ERROR: Configuring " << state_names[i] << "!" << std::endl; 01112 std::cerr << config << std::endl; 01113 return; 01114 } else { 01115 ProbabilisticModelParametersPtr statepars = reader.parameters(); 01116 ProbabilisticModelParameterValuePtr observationpar = 01117 statepars->getOptionalParameterValue("observation"); 01118 ProbabilisticModelParameterValuePtr durationpar = 01119 statepars->getOptionalParameterValue("duration"); 01120 ProbabilisticModelParameterValuePtr lengthpar = 01121 statepars->getOptionalParameterValue("sequence_length"); 01122 ProbabilisticModelParameterValuePtr outputphasepar = 01123 statepars->getOptionalParameterValue("output_phase"); 01124 ProbabilisticModelParameterValuePtr inputphasepar = 01125 statepars->getOptionalParameterValue("input_phase"); 01126 01127 ProbabilisticModelParameterValuePtr extendEmissionpar = 01128 statepars->getOptionalParameterValue("extend_emission"); 01129 ProbabilisticModelParameterValuePtr classespar = 01130 statepars->getOptionalParameterValue("classes"); 01131 01132 01133 if (observationpar == NULL) { 01134 std::cerr << "ERROR: Missing observation model for state " 01135 << state_names[i] << std::endl; 01136 return; 01137 } 01138 01139 std::string model_name = observationpar->getString(); 01140 restore_model(model_name, parameters); 01141 01142 DiscreteIIDModelPtr transition = 01143 DiscreteIIDModelPtr( 01144 new DiscreteIIDModel( 01145 trans[state_names[i]])); 01146 01147 int iphase = -1; 01148 int ophase = -1; 01149 std::vector<int> classes; 01150 if(classespar != NULL){ 01151 std::vector<double> cl = classespar->getDoubleVector(); 01152 classes.resize(cl.size()); 01153 for(int c = 0; c < (int)cl.size(); c++){ 01154 classes[c] = (int)cl[c]; 01155 if(classes[c] > nclasses) 01156 nclasses = classes[c]; 01157 } 01158 } 01159 if(inputphasepar != NULL) 01160 iphase = inputphasepar->getInt(); 01161 if(outputphasepar != NULL) 01162 ophase = outputphasepar->getInt(); 01163 if (durationpar == NULL) { 01164 if (lengthpar == NULL) { 01165 id = configureGeometricDurationState(model_name, transition, state_names[i], iphase, ophase, classes); 01166 } else { 01167 id = configureSignalState(model_name, 01168 transition, 01169 lengthpar->getDouble(), 01170 state_names[i], iphase, ophase, classes); 01171 } 01172 01173 } else { 01174 01175 // Explicit duration state 01176 std::string duration_model_name = durationpar->getString(); 01177 restore_model(duration_model_name, parameters); 01178 id = configureExplicitDurationState(model_name, transition, duration_model_name, state_names[i], iphase, ophase, classes); 01179 if(extendEmissionpar != NULL) { 01180 int startEmissionOffset = (int)(extendEmissionpar->getDoubleVector())[0]; 01181 int endEmissionOffset = (int)(extendEmissionpar->getDoubleVector())[1]; 01182 _all_states[id]->setStart(startEmissionOffset); 01183 _all_states[id]->setStop(endEmissionOffset); 01184 } 01185 } 01186 } 01187 } 01188 nclasses++; 01189 setNumClasses(nclasses); 01190 setObservationSymbols(observation_symbols); 01191 setInitialProbability(pi); 01192 01193 fixStatesPredecessorSuccessor(); 01194 01195 for (int i = 0; i < (int)_all_states.size(); i++) 01196 if (_all_states[i]->transition()->size() <= 0) 01197 { 01198 std::cerr << "ERROR: GHMM initialization, state " << _all_states[i]->name() << " has outdegree equal a zero !\n"; 01199 exit(-1); 01200 } 01201 } 01202 01203 01204 } 01205