ToPS
|
00001 /* 00002 * PairHiddenMarkovModel.hpp 00003 * 00004 * Copyright 2011 Vitor Onuchic <vitoronuchic@gmail.com> 00005 * André Yoshiaki Kashiwabara <akashiwabara@usp.br> 00006 * Ígor Bonádio <ibonadio@ime.usp.br> 00007 * Vitor Onuchic <vitoronuchic@gmail.com> 00008 * Alan Mitchell Durham <aland@usp.br> 00009 * 00010 * This program is free software; you can redistribute it and/or modify 00011 * it under the terms of the GNU General Public License as published by 00012 * the Free Software Foundation; either version 3 of the License, or 00013 * (at your option) any later version. 00014 * 00015 * This program is distributed in the hope that it will be useful, 00016 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00018 * GNU General Public License for more details. 00019 * 00020 * You should have received a copy of the GNU General Public License 00021 * along with this program; if not, write to the Free Software 00022 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00023 * MA 02110-1301, USA. 00024 */ 00025 00026 #ifndef PAIR_HIDDEN_MARKOV_MODEL 00027 #define PAIR_HIDDEN_MARKOV_MODEL 00028 00029 #include "crossplatform.hpp" 00030 00031 #include "HiddenMarkovModel.hpp" 00032 #include "ProbabilisticModel.hpp" 00033 #include "DecodableModel.hpp" 00034 #include "Sequence.hpp" 00035 #include "Alphabet.hpp" 00036 #include "ContextTree.hpp" 00037 #include "Symbol.hpp" 00038 #include "SparseMatrix.hpp" 00039 #include "PairHiddenMarkovModelCreator.hpp" 00040 #include "util.hpp" 00041 #include "ProbabilisticModelParameter.hpp" 00042 #include "Symbol.hpp" 00043 #include <cstdarg> 00044 #include <vector> 00045 #include <iostream> 00046 #include <cmath> 00047 #include <sstream> 00048 #include <vector> 00049 #include <iterator> 00050 #include <stdio.h> 00051 #include <algorithm> 00052 #include <boost/timer.hpp> 00053 00054 namespace tops{ 00055 class DLLEXPORT PHMMState: public HMMState { 00056 00057 protected: 00058 IntVector _outgoingTransitions; 00059 IntVector _incomingTransitions; 00060 00061 public: 00062 typedef boost::shared_ptr <PHMMState> PHMMStatePtr; 00063 00064 PHMMState(){} 00065 00066 PHMMState(int id, SymbolPtr name, DiscreteIIDModelPtr emission, DiscreteIIDModelPtr transitions, IntVector iTransitions, IntVector oTransitions){ 00067 _id = id; 00068 _name = name; 00069 _emission = emission; 00070 _transitions = transitions; 00071 _outgoingTransitions = oTransitions; 00072 _incomingTransitions = iTransitions; 00073 } 00074 00075 virtual void forwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int begin_id){ 00076 cerr << "Sub-class responsability: forwardSum()" << endl; 00077 exit(-1); 00078 } 00079 00080 virtual void backwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int currStateId, double *accumulator){ 00081 cerr << "Sub-class responsability: backwardSum()" << endl; 00082 exit(-1); 00083 } 00084 00085 virtual void postProbSum(fMatrix &ppMatch, fMatrix &ppGap1, fMatrix &ppGap2, Matrix &alpha, Matrix &beta, double full, int i, int j){ 00086 return; 00087 } 00088 00089 virtual bool isSilent(){ 00090 return false; 00091 } 00092 00093 IntVector &outTransitions(){ 00094 return _outgoingTransitions; 00095 } 00096 00097 void removeEndId(int end_id){ 00098 for(IntVector::iterator it = _outgoingTransitions.begin(); it != _outgoingTransitions.end(); it++){ 00099 if((*it) == end_id){ 00100 _outgoingTransitions.erase(it); 00101 break; 00102 } 00103 } 00104 } 00105 00106 void removeBeginId(int begin_id){ 00107 for(IntVector::iterator it = _incomingTransitions.begin(); it != _incomingTransitions.end(); it++){ 00108 if((*it) == begin_id){ 00109 _incomingTransitions.erase(it); 00110 break; 00111 } 00112 } 00113 } 00114 }; 00115 00116 typedef boost::shared_ptr <PHMMState> PHMMStatePtr; 00117 00118 class DLLEXPORT MatchState: public PHMMState { 00119 public: 00120 MatchState(int id, SymbolPtr name, DiscreteIIDModelPtr emission, DiscreteIIDModelPtr transitions, IntVector iTransitions, IntVector oTransitions){ 00121 _id = id; 00122 _name = name; 00123 _emission = emission; 00124 _transitions = transitions; 00125 _outgoingTransitions = oTransitions; 00126 _incomingTransitions = iTransitions; 00127 } 00128 00129 void forwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1,const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int begin_id); 00130 00131 void backwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int currStateId, double *accumulator); 00132 00133 void postProbSum(fMatrix &ppMatch, fMatrix &ppGap1, fMatrix &ppGap2, Matrix &alpha, Matrix &beta, double full, int i, int j); 00134 }; 00135 00136 class DLLEXPORT Gap1State: public PHMMState { 00137 public: 00138 Gap1State(int id, SymbolPtr name, DiscreteIIDModelPtr emission, DiscreteIIDModelPtr transitions, IntVector iTransitions, IntVector oTransitions){ 00139 _id = id; 00140 _name = name; 00141 _emission = emission; 00142 _transitions = transitions; 00143 _incomingTransitions = iTransitions; 00144 _outgoingTransitions = oTransitions; 00145 } 00146 00147 void forwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int begin_id); 00148 00149 void backwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int currStateId, double *accumulator); 00150 00151 void postProbSum(fMatrix &ppMatch, fMatrix &ppGap1, fMatrix &ppGap2, Matrix &alpha, Matrix &beta, double full, int i, int j); 00152 }; 00153 00154 class DLLEXPORT Gap2State: public PHMMState { 00155 public: 00156 Gap2State(int id, SymbolPtr name, DiscreteIIDModelPtr emission, DiscreteIIDModelPtr transitions, IntVector iTransitions, IntVector oTransitions){ 00157 _id = id; 00158 _name = name; 00159 _emission = emission; 00160 _transitions = transitions; 00161 _incomingTransitions = iTransitions; 00162 _outgoingTransitions = oTransitions; 00163 } 00164 00165 void forwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int begin_id); 00166 00167 void backwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int currStateId, double *accumulator); 00168 00169 void postProbSum(fMatrix &ppMatch, fMatrix &ppGap1, fMatrix &ppGap2, Matrix &alpha, Matrix &beta, double full, int i, int j); 00170 }; 00171 00172 class DLLEXPORT SilentState: public PHMMState { 00173 public: 00174 SilentState(int id, SymbolPtr name, DiscreteIIDModelPtr emission, DiscreteIIDModelPtr transitions, IntVector iTransitions, IntVector oTransitions){ 00175 _id = id; 00176 _name = name; 00177 _emission = emission; 00178 _transitions = transitions; 00179 _incomingTransitions = iTransitions; 00180 _outgoingTransitions = oTransitions; 00181 } 00182 00183 void forwardSum(vector<PHMMStatePtr> &states, const Sequence &seq1, const Sequence &seq2, vector<Matrix> &a, int i, int j, int gap_id, int begin_id); 00184 00185 bool isSilent(){ 00186 return true; 00187 } 00188 }; 00189 00190 class DLLEXPORT PairHiddenMarkovModel : public ProbabilisticModel{ 00191 private: 00192 int _gap_id; 00193 int _end_id; 00194 int _begin_id; 00195 std::vector<PHMMStatePtr> _states; 00196 AlphabetPtr _state_names; 00197 std::vector<PHMMStatePtr> _silent_states; 00198 00199 public: 00200 00201 PairHiddenMarkovModel() { 00202 }; 00203 00204 PairHiddenMarkovModel( std::vector <PHMMStatePtr> states, AlphabetPtr state_names, AlphabetPtr observation_symbols) { 00205 _states = states; 00206 _state_names = state_names; 00207 tops::ProbabilisticModel::setAlphabet(observation_symbols); 00208 } 00209 00210 int getGapId(){ 00211 return _gap_id; 00212 } 00213 00214 virtual ~PairHiddenMarkovModel(){} 00215 00216 //Forward, Backward and Viterbi algorithms. We assume there are no cycles of silent states. 00217 virtual double forward(const Sequence & seq1, const Sequence & seq2, vector<Matrix> &a); 00218 00219 virtual double backward(const Sequence & seq1, const Sequence & seq2, vector<Matrix> &a); 00220 00221 //virtual double backwardSum(int k,int i,int j,vector<Matrix> &a,const Sequence &seq1, const Sequence &seq2); 00222 00223 virtual double viterbi(const Sequence & seq1, const Sequence & seq2, Sequence & path, Sequence & al1, Sequence & al2, vector<Matrix> &a){ 00224 return 0.0; 00225 }; 00226 00227 virtual float posteriorProbabilities (const Sequence &seq1, const Sequence &seq2, SparseMatrixPtr &ppMatch,SparseMatrixPtr &ppGap1, SparseMatrixPtr &ppGap2); 00228 00229 virtual float expectedAccuracy(int size1, int size2, fMatrix &postProbs); 00230 00231 //virtual float expectedAccuracyWithGaps(SparseMatrixPtr postProbs, SparseMatrixPtr postProbsGap1, SparseMatrixPtr postProbsGap2); 00232 00233 virtual void generateSequence(Sequence &seq1, Sequence &seq2, Sequence &path){}; 00234 00235 //virtual void trainBaumWelch(SequenceList & sample, int maxiterations, double diff_threshold); 00236 00237 virtual std::string model_name() const { 00238 return "PairHiddenMarkovModel"; 00239 } 00240 00241 virtual ProbabilisticModelCreatorPtr getFactory() const { 00242 return PairHiddenMarkovModelCreatorPtr(new PairHiddenMarkovModelCreator()); 00243 } 00244 00245 virtual void initialize(const ProbabilisticModelParameters & par) ; 00246 00247 void setStates(std::vector<PHMMStatePtr> states, AlphabetPtr state_names){ 00248 _states = states; 00249 for(vector<PHMMStatePtr>::iterator it = _states.begin(); it != _states.end(); it++){ 00250 (*it)->removeEndId(_end_id); 00251 (*it)->removeBeginId(_begin_id); 00252 } 00253 _state_names = state_names; 00254 } 00255 00256 void setObservationSymbols(AlphabetPtr obs){ 00257 tops::ProbabilisticModel::setAlphabet(obs); 00258 } 00259 00260 void setSilentStates(std::vector<PHMMStatePtr> states){ 00261 _silent_states = states; 00262 } 00263 00264 PHMMStatePtr getPHMMState(int id) const { 00265 return _states[id]; 00266 } 00267 00268 int getSilentId(int k) const { 00269 return _silent_states[k]->getId(); 00270 } 00271 00272 virtual AlphabetPtr getStateNames() const { 00273 return _state_names; 00274 } 00275 00276 virtual string getStateName(int state) const{ 00277 return _state_names->getSymbol(state)->name(); 00278 } 00279 00280 //std::string str () const; 00281 00282 //void silentStatesSort(vector<PHMMStatePtr> silStates); 00283 00284 virtual PairHiddenMarkovModel * pairDecodable() { 00285 return this; 00286 } 00287 00288 }; 00289 00290 typedef boost::shared_ptr<PairHiddenMarkovModel> PairHiddenMarkovModelPtr; 00291 } 00292 00293 #endif 00294