ToPS
HiddenMarkovModel.hpp
00001 /*
00002  *       HiddenMarkovModel.hpp
00003  *
00004  *       Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br>
00005  *                      Ígor Bonádio <ibonadio@ime.usp.br>
00006  *                      Vitor Onuchic <vitoronuchic@gmail.com>
00007  *                      Alan Mitchell Durham <aland@usp.br>
00008  *
00009  *       This program is free software; you can redistribute it and/or modify
00010  *       it under the terms of the GNU  General Public License as published by
00011  *       the Free Software Foundation; either version 3 of the License, or
00012  *       (at your option) any later version.
00013  *
00014  *       This program is distributed in the hope that it will be useful,
00015  *       but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  *       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  *       GNU General Public License for more details.
00018  *
00019  *       You should have received a copy of the GNU General Public License
00020  *       along with this program; if not, write to the Free Software
00021  *       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00022  *       MA 02110-1301, USA.
00023  */
00024 
00025 #ifndef HIDDEN_MARKOV_MODEL_HPP
00026 #define HIDDEN_MARKOV_MODEL_HPP
00027 
00028 #include "crossplatform.hpp"
00029 
00030 #include "ProbabilisticModel.hpp"
00031 #include "DecodableModel.hpp"
00032 #include "Sequence.hpp"
00033 #include "Alphabet.hpp"
00034 #include "ContextTree.hpp"
00035 
00036 #include "HiddenMarkovModelCreator.hpp"
00037 #include "util.hpp"
00038 #include <cstdarg>
00039 #include <vector>
00040 
00041 namespace tops {
00042 
00043   class DLLEXPORT HMMState {
00044   protected:
00045     int _id;
00046     SymbolPtr _name;
00047     DiscreteIIDModelPtr _emission;
00048     DiscreteIIDModelPtr _transitions;
00049   public:
00050     HMMState(){}
00051     HMMState (int id, SymbolPtr name, DiscreteIIDModelPtr emission,  DiscreteIIDModelPtr transitions) : _id(id), _name(name), _emission(emission), _transitions(transitions) {}
00052     void setName (SymbolPtr name) {
00053       _name = name;
00054     }
00055     void setEmissions (DiscreteIIDModelPtr e)
00056     {
00057       _emission = e;
00058     }
00059     void setTransition (DiscreteIIDModelPtr t)
00060     {
00061       _transitions = t;
00062     }
00063     DiscreteIIDModelPtr &emission() {
00064       return _emission;
00065     }
00066     DiscreteIIDModelPtr &transitions() {
00067       return _transitions;
00068     }
00069     bool isSilent() {
00070       return (_emission == NULL);
00071     }
00072     SymbolPtr getName() const {
00073       return _name;
00074     }
00075     int getId() {
00076       return _id;
00077     }
00078     void setId(int i) {
00079       _id = i;
00080     }
00081 
00082 
00083   };
00084   typedef boost::shared_ptr <HMMState> HMMStatePtr;
00085 
00087   class DLLEXPORT HiddenMarkovModel :   public DecodableModel
00088   {
00089   public:
00090 
00091     HiddenMarkovModel() {
00092     };
00093 
00094     HiddenMarkovModel( std::vector <HMMStatePtr> states, DiscreteIIDModelPtr initial_probability, AlphabetPtr state_names, AlphabetPtr observation_symbols) :    _states(states) , _initial_probability(initial_probability), _state_names (state_names) {
00095       tops::ProbabilisticModel::setAlphabet(observation_symbols);
00096 
00097 
00098     }
00099 
00100 
00101     void setStates(std::vector<HMMStatePtr> states) {
00102       _states = states;
00103     }
00104 
00105 
00106     virtual ~HiddenMarkovModel(){}
00107 
00109     virtual Sequence &  chooseObservation ( Sequence & h,int i,  int state) const ;
00111     virtual int chooseState(int state ) const ;
00113     virtual int chooseFirstState() const ;
00114     virtual AlphabetPtr getStateNames() const {
00115       return _state_names;
00116     }
00117     virtual std::string getStateName(int state) const;
00118 
00119     virtual std::string str () const ;
00120 
00121     virtual void setState (int id, HMMStatePtr state)
00122     {
00123       if(_states.size() < _state_names->size())
00124         _states.resize(_state_names->size());
00125       _states[id] = state;
00126       state->setId(id);
00127     }
00128 
00129     virtual HMMStatePtr getState(int id) const
00130     {
00131       return _states[id];
00132     }
00134     virtual double forward(const Sequence & s, Matrix &alpha) const;
00135 
00137     virtual double backward(const Sequence & s, Matrix &beta) const;
00138 
00140     virtual double viterbi (const Sequence &s, Sequence &path, Matrix & gamma) const ;
00141 
00142     virtual std::string model_name() const {
00143       return "HiddenMarkovModel";
00144     }
00145     virtual ProbabilisticModelCreatorPtr getFactory() const {
00146       return HiddenMarkovModelCreatorPtr(new HiddenMarkovModelCreator());
00147     }
00148     virtual DecodableModel * decodable()  {
00149       return this;
00150     }
00151     virtual void trainBaumWelch (SequenceList & training_set, int maxiterations, double diff) ;
00152 
00153     virtual void initialize(const ProbabilisticModelParameters & par) ;
00154 
00155     virtual ProbabilisticModelParameters parameters() const ;
00156 
00157     void setInitialProbability(DiscreteIIDModelPtr initial) ;
00158     void setObservationSymbols(AlphabetPtr obs) ;
00159     void setStates(std::vector<HMMStatePtr> states, AlphabetPtr state_names) ;
00160 
00161 
00162 
00163   private:
00164     std::vector <HMMStatePtr> _states;
00165     DiscreteIIDModelPtr _initial_probability;
00166     std::vector<double> _ctFactors;
00167     AlphabetPtr _state_names;
00168     void scale(std::vector<double> & in, int t);
00169     std::vector<double> iterate(Sequence & obs);
00170   };
00171 
00172   typedef boost::shared_ptr<HiddenMarkovModel> HiddenMarkovModelPtr;
00173 }
00174 
00175 
00176 #endif