ToPS
|
00001 /* 00002 * ContextTree.hpp 00003 * 00004 * Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br> 00005 * Ígor Bonádio <ibonadio@ime.usp.br> 00006 * Vitor Onuchic <vitoronuchic@gmail.com> 00007 * Alan Mitchell Durham <aland@usp.br> 00008 * 00009 * This program is free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 3 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00022 * MA 02110-1301, USA. 00023 */ 00024 00025 #ifndef CONTEXT_TREE_HPP 00026 #define CONTEXT_TREE_HPP 00027 00028 #include "crossplatform.hpp" 00029 00030 #include "DiscreteIIDModel.hpp" 00031 #include "ProbabilisticModelParameter.hpp" 00032 #include <boost/shared_ptr.hpp> 00033 #include <set> 00034 namespace tops { 00035 class DLLEXPORT ContextTreeNode; 00036 typedef boost::shared_ptr <ContextTreeNode> ContextTreeNodePtr; 00037 typedef std::vector< boost::shared_ptr<tops::ContextTreeNode> > ContextTreeNodeVector; 00038 class DLLEXPORT ContextTree; 00039 typedef boost::shared_ptr<ContextTree> ContextTreePtr; 00041 class DLLEXPORT ContextTreeNode { 00042 private: 00043 DiscreteIIDModelPtr _distribution; 00044 ContextTreeNodeVector _child; 00045 int _alphabet_size; 00046 int _symbol; 00047 bool _leaf; 00048 std::vector<double> _counter; 00049 int _id; 00050 int _parent_id; 00051 public: 00052 ~ContextTreeNode(){ } 00053 00055 ContextTreeNode(int alphabet_size); 00056 00058 ContextTreeNode() ; 00059 00061 void addCount (int s); 00062 00064 void addCount (int s, double w); 00065 00066 00068 void setCount (int s, double v); 00069 00071 std::vector<double> & getCounter (); 00072 00074 int alphabet_size(); 00075 00076 00078 void setParent(int parent) ; 00079 00081 int getParent() ; 00082 00083 int id (); 00085 void setId(int id); 00086 00088 void setChild(ContextTreeNodePtr child, int symbol); 00089 00091 int symbol(); 00092 00094 void setSymbol(int symbol); 00095 00096 00098 void setDistribution(DiscreteIIDModelPtr distribution); 00099 00101 ContextTreeNodePtr getChild(int symbol); 00102 00104 DiscreteIIDModelPtr getDistribution(); 00105 00107 void deleteChildren() ; 00108 00110 ContextTreeNodeVector getChildren() ; 00111 00113 bool isLeaf(); 00114 00115 00116 std::string str() const ; 00117 00118 00119 }; 00120 00121 00123 class DLLEXPORT ContextTree { 00124 public: 00125 ~ContextTree() { 00126 } 00127 ContextTree(){} 00128 ContextTree(AlphabetPtr alphabet); 00129 00130 ContextTreeNodeVector & all_context() { 00131 return _all_context; 00132 } 00133 00135 ContextTreeNodePtr getRoot() const ; 00136 00137 00139 ContextTreeNodePtr createContext() ; 00140 00141 00142 ContextTreeNodePtr getContext (int id) ; 00143 00145 ContextTreeNodePtr getContext(const Sequence & s, int i); 00146 00147 std::set <int> getLevelOneNodes(); 00148 00149 void removeContextNotUsed(); 00150 00151 void normalize(); 00152 00154 void normalize(ProbabilisticModelPtr old, double pseudocount, int i); 00155 00157 void normalize(ProbabilisticModelPtr old, double pseudocount){ 00158 normalize(old,pseudocount,0); 00159 } 00160 00161 std::string str() const; 00162 00163 void initializeCounter(const SequenceEntryList & sequences, int order, const std::map<std::string, double> & weights); 00164 00165 void initializeCounter(const SequenceEntryList & sequences, int order, double pseudocounts, const std::map<std::string, double> & weights); 00166 00168 void pruneTree(double delta) ; 00169 00170 00172 void pruneTreeSmallSampleSize(int small_) ; 00173 00175 void initializeContextTreeRissanen(const SequenceEntryList & sequences); 00176 00177 DoubleMapParameterValuePtr getParameterValue () const; 00178 00179 int getNumberOfNodes() const 00180 { 00181 return _all_context.size(); 00182 } 00183 00184 private: 00185 void printTree(ContextTreeNodePtr node, std::stringstream & out) const; 00186 void buildParameters(ContextTreeNodePtr node, std::map<std::string, double> & parameters) const; 00187 ContextTreeNodeVector _all_context; 00188 AlphabetPtr _alphabet; 00189 00190 }; 00191 typedef boost::shared_ptr<ContextTree> ContextTreePtr; 00192 00193 00194 } 00195 00196 #endif