ToPS
PairHiddenMarkovModel.hpp
00001 /*
00002  *       PairHiddenMarkovModel.hpp
00003  *
00004  *       Copyright 2011 Vitor Onuchic <vitoronuchic@gmail.com>
00005  *                      André Yoshiaki Kashiwabara <akashiwabara@usp.br>
00006  *                      Ígor Bonádio <ibonadio@ime.usp.br>
00007  *                      Vitor Onuchic <vitoronuchic@gmail.com>
00008  *                      Alan Mitchell Durham <aland@usp.br>
00009  *
00010  *       This program is free software; you can redistribute it and/or modify
00011  *       it under the terms of the GNU  General Public License as published by
00012  *       the Free Software Foundation; either version 3 of the License, or
00013  *       (at your option) any later version.
00014  *
00015  *       This program is distributed in the hope that it will be useful,
00016  *       but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  *       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018  *       GNU General Public License for more details.
00019  *
00020  *       You should have received a copy of the GNU General Public License
00021  *       along with this program; if not, write to the Free Software
00022  *       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00023  *       MA 02110-1301, USA.
00024  */
00025 
00026 #ifndef PAIR_HIDDEN_MARKOV_MODEL
00027 #define PAIR_HIDDEN_MARKOV_MODEL
00028 
00029 #include "crossplatform.hpp"
00030 
00031 #include "HiddenMarkovModel.hpp"
00032 #include "ProbabilisticModel.hpp"
00033 #include "DecodableModel.hpp"
00034 #include "Sequence.hpp"
00035 #include "Alphabet.hpp"
00036 #include "ContextTree.hpp"
00037 #include "Symbol.hpp"
00038 #include "SparseMatrix.hpp"
00039 #include "PairHiddenMarkovModelCreator.hpp"
00040 #include "util.hpp"
00041 #include "ProbabilisticModelParameter.hpp"
00042 #include "Symbol.hpp"
00043 #include <cstdarg>
00044 #include <vector>
00045 #include <iostream>
00046 #include <cmath>
00047 #include <sstream>
00048 #include <vector>
00049 #include <iterator>
00050 #include <stdio.h>
00051 #include <algorithm>
00052 #include <boost/timer.hpp>
00053 
00054 namespace tops{
00055   class DLLEXPORT PHMMState: public HMMState {
00056 
00057   protected:
00058     IntVector _outgoingTransitions;
00059     IntVector _incomingTransitions;
00060 
00061   public:
00062     typedef boost::shared_ptr <PHMMState> PHMMStatePtr;
00063 
00064     PHMMState(){}
00065 
00066     PHMMState(int id, SymbolPtr name, DiscreteIIDModelPtr emission,  DiscreteIIDModelPtr transitions, IntVector iTransitions, IntVector oTransitions){
00067       _id = id;
00068       _name = name;
00069       _emission = emission;
00070       _transitions = transitions;
00071       _outgoingTransitions = oTransitions;
00072       _incomingTransitions = iTransitions;
00073     }
00074 
00075     virtual void forwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int begin_id){
00076       cerr << "Sub-class responsability: forwardSum()" << endl;
00077       exit(-1);
00078     }
00079 
00080     virtual void backwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int currStateId, double *accumulator){
00081       cerr << "Sub-class responsability: backwardSum()" << endl;
00082       exit(-1);
00083     }
00084 
00085     virtual void postProbSum(fMatrix &ppMatch, fMatrix &ppGap1, fMatrix &ppGap2, Matrix &alpha, Matrix &beta, double full, int i, int j){
00086       return;
00087     }
00088 
00089     virtual bool isSilent(){
00090       return false;
00091     }
00092 
00093     IntVector &outTransitions(){
00094       return _outgoingTransitions;
00095     }
00096 
00097     void removeEndId(int end_id){
00098       for(IntVector::iterator it = _outgoingTransitions.begin(); it != _outgoingTransitions.end(); it++){
00099   if((*it) == end_id){
00100     _outgoingTransitions.erase(it);
00101     break;
00102   }
00103       }
00104     }
00105 
00106     void removeBeginId(int begin_id){
00107       for(IntVector::iterator it = _incomingTransitions.begin(); it != _incomingTransitions.end(); it++){
00108   if((*it) == begin_id){
00109     _incomingTransitions.erase(it);
00110     break;
00111   }
00112       }
00113     }
00114   };
00115 
00116   typedef boost::shared_ptr <PHMMState> PHMMStatePtr;
00117 
00118   class DLLEXPORT MatchState: public PHMMState {
00119   public:
00120     MatchState(int id, SymbolPtr name, DiscreteIIDModelPtr emission,  DiscreteIIDModelPtr transitions, IntVector iTransitions, IntVector oTransitions){
00121       _id = id;
00122       _name = name;
00123       _emission = emission;
00124       _transitions = transitions;
00125       _outgoingTransitions = oTransitions;
00126       _incomingTransitions = iTransitions;
00127     }
00128 
00129     void forwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1,const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int begin_id);
00130 
00131     void backwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int currStateId, double *accumulator);
00132 
00133     void postProbSum(fMatrix &ppMatch, fMatrix &ppGap1, fMatrix &ppGap2, Matrix &alpha, Matrix &beta, double full, int i, int j);
00134   };
00135 
00136   class DLLEXPORT Gap1State: public PHMMState {
00137   public:
00138     Gap1State(int id, SymbolPtr name, DiscreteIIDModelPtr emission,  DiscreteIIDModelPtr transitions, IntVector iTransitions, IntVector oTransitions){
00139       _id = id;
00140       _name = name;
00141       _emission = emission;
00142       _transitions = transitions;
00143       _incomingTransitions = iTransitions;
00144       _outgoingTransitions = oTransitions;
00145     }
00146 
00147     void forwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int begin_id);
00148 
00149     void backwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int currStateId, double *accumulator);
00150 
00151     void postProbSum(fMatrix &ppMatch, fMatrix &ppGap1, fMatrix &ppGap2, Matrix &alpha, Matrix &beta, double full, int i, int j);
00152   };
00153 
00154   class DLLEXPORT Gap2State: public PHMMState {
00155   public:
00156     Gap2State(int id, SymbolPtr name, DiscreteIIDModelPtr emission,  DiscreteIIDModelPtr transitions, IntVector iTransitions, IntVector oTransitions){
00157       _id = id;
00158       _name = name;
00159       _emission = emission;
00160       _transitions = transitions;
00161       _incomingTransitions = iTransitions;
00162       _outgoingTransitions = oTransitions;
00163     }
00164 
00165     void forwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int begin_id);
00166 
00167     void backwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int currStateId, double *accumulator);
00168 
00169     void postProbSum(fMatrix &ppMatch, fMatrix &ppGap1, fMatrix &ppGap2, Matrix &alpha, Matrix &beta, double full, int i, int j);
00170   };
00171 
00172   class DLLEXPORT SilentState: public PHMMState {
00173   public:
00174     SilentState(int id, SymbolPtr name, DiscreteIIDModelPtr emission,  DiscreteIIDModelPtr transitions, IntVector iTransitions, IntVector oTransitions){
00175       _id = id;
00176       _name = name;
00177       _emission = emission;
00178       _transitions = transitions;
00179       _incomingTransitions = iTransitions;
00180       _outgoingTransitions = oTransitions;
00181     }
00182 
00183     void forwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int begin_id);
00184 
00185     bool isSilent(){
00186       return true;
00187     }
00188   };
00189 
00190   class DLLEXPORT PairHiddenMarkovModel : public ProbabilisticModel{
00191   private:
00192     int _gap_id;
00193     int _end_id;
00194     int _begin_id;
00195     std::vector<PHMMStatePtr> _states;
00196     AlphabetPtr _state_names;
00197     std::vector<PHMMStatePtr> _silent_states;
00198 
00199   public:
00200 
00201     PairHiddenMarkovModel() {
00202     };
00203 
00204     PairHiddenMarkovModel( std::vector <PHMMStatePtr> states, AlphabetPtr state_names, AlphabetPtr observation_symbols) {
00205       _states = states;
00206       _state_names = state_names;
00207       tops::ProbabilisticModel::setAlphabet(observation_symbols);
00208     }
00209 
00210     int getGapId(){
00211       return _gap_id;
00212     }
00213 
00214     virtual ~PairHiddenMarkovModel(){}
00215 
00216     //Forward, Backward and Viterbi algorithms. We assume there are no cycles of silent states.
00217     virtual double forward(const Sequence & seq1, const Sequence & seq2, vector<Matrix> &a);
00218 
00219     virtual double backward(const Sequence & seq1, const Sequence & seq2, vector<Matrix> &a);
00220 
00221     //virtual double backwardSum(int k,int i,int j,vector<Matrix> &a,const Sequence &seq1, const Sequence &seq2);
00222 
00223     virtual double viterbi(const Sequence & seq1, const Sequence & seq2, Sequence & path, Sequence & al1, Sequence & al2, vector<Matrix> &a){
00224       return 0.0;
00225     };
00226 
00227     virtual float posteriorProbabilities (const Sequence &seq1, const Sequence &seq2, SparseMatrixPtr &ppMatch,SparseMatrixPtr &ppGap1, SparseMatrixPtr &ppGap2);
00228 
00229     virtual float expectedAccuracy(int size1, int size2, fMatrix &postProbs);
00230 
00231     //virtual float expectedAccuracyWithGaps(SparseMatrixPtr postProbs, SparseMatrixPtr postProbsGap1, SparseMatrixPtr postProbsGap2);
00232 
00233     virtual void generateSequence(Sequence &seq1, Sequence &seq2, Sequence &path){};
00234 
00235     //virtual void trainBaumWelch(SequenceList & sample, int maxiterations, double diff_threshold);
00236 
00237     virtual std::string model_name() const {
00238       return "PairHiddenMarkovModel";
00239     }
00240 
00241     virtual ProbabilisticModelCreatorPtr getFactory() const {
00242       return PairHiddenMarkovModelCreatorPtr(new PairHiddenMarkovModelCreator());
00243     }
00244 
00245     virtual void initialize(const ProbabilisticModelParameters & par) ;
00246 
00247     void setStates(std::vector<PHMMStatePtr> states, AlphabetPtr state_names){
00248       _states = states;
00249       for(vector<PHMMStatePtr>::iterator it = _states.begin(); it != _states.end(); it++){
00250   (*it)->removeEndId(_end_id);
00251   (*it)->removeBeginId(_begin_id);
00252       }
00253       _state_names = state_names;
00254     }
00255 
00256     void setObservationSymbols(AlphabetPtr obs){
00257       tops::ProbabilisticModel::setAlphabet(obs);
00258     }
00259 
00260     void setSilentStates(std::vector<PHMMStatePtr> states){
00261       _silent_states = states;
00262     }
00263 
00264     PHMMStatePtr getPHMMState(int id) const {
00265       return _states[id];
00266     }
00267 
00268     int getSilentId(int k) const {
00269       return _silent_states[k]->getId();
00270     }
00271 
00272     virtual AlphabetPtr getStateNames() const  {
00273       return _state_names;
00274     }
00275 
00276     virtual string getStateName(int state) const{
00277       return _state_names->getSymbol(state)->name();
00278     }
00279 
00280     //std::string str () const;
00281 
00282     //void silentStatesSort(vector<PHMMStatePtr> silStates);
00283 
00284     virtual PairHiddenMarkovModel * pairDecodable() {
00285       return this;
00286     }
00287 
00288   };
00289 
00290   typedef boost::shared_ptr<PairHiddenMarkovModel> PairHiddenMarkovModelPtr;
00291 }
00292 
00293 #endif
00294