ToPS
ContextTree.hpp
00001 /*
00002  *       ContextTree.hpp
00003  *
00004  *       Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br>
00005  *                      Ígor Bonádio <ibonadio@ime.usp.br>
00006  *                      Vitor Onuchic <vitoronuchic@gmail.com>
00007  *                      Alan Mitchell Durham <aland@usp.br>
00008  *
00009  *       This program is free software; you can redistribute it and/or modify
00010  *       it under the terms of the GNU  General Public License as published by
00011  *       the Free Software Foundation; either version 3 of the License, or
00012  *       (at your option) any later version.
00013  *
00014  *       This program is distributed in the hope that it will be useful,
00015  *       but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  *       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  *       GNU General Public License for more details.
00018  *
00019  *       You should have received a copy of the GNU General Public License
00020  *       along with this program; if not, write to the Free Software
00021  *       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00022  *       MA 02110-1301, USA.
00023  */
00024 
00025 #ifndef CONTEXT_TREE_HPP
00026 #define CONTEXT_TREE_HPP
00027 
00028 #include "crossplatform.hpp"
00029 
00030 #include "DiscreteIIDModel.hpp"
00031 #include "ProbabilisticModelParameter.hpp"
00032 #include <boost/shared_ptr.hpp>
00033 #include <set>
00034 namespace tops {
00035   class DLLEXPORT ContextTreeNode;
00036   typedef boost::shared_ptr <ContextTreeNode> ContextTreeNodePtr;
00037   typedef std::vector< boost::shared_ptr<tops::ContextTreeNode> > ContextTreeNodeVector;
00038     class DLLEXPORT ContextTree;
00039    typedef boost::shared_ptr<ContextTree> ContextTreePtr;
00041   class DLLEXPORT ContextTreeNode {
00042   private:
00043     DiscreteIIDModelPtr _distribution;
00044     ContextTreeNodeVector _child;
00045     int _alphabet_size;
00046     int _symbol;
00047     bool _leaf;
00048     std::vector<double> _counter;
00049     int _id;
00050     int _parent_id;
00051   public:
00052     ~ContextTreeNode(){ }
00053 
00055     ContextTreeNode(int alphabet_size);
00056 
00058     ContextTreeNode() ;
00059 
00061     void addCount (int s);
00062 
00064     void addCount (int s, double w);
00065 
00066 
00068     void setCount (int s, double v);
00069 
00071     std::vector<double> & getCounter ();
00072 
00074     int alphabet_size();
00075 
00076 
00078     void setParent(int parent) ;
00079 
00081     int getParent() ;
00082 
00083     int id ();
00085     void setId(int id);
00086 
00088     void setChild(ContextTreeNodePtr child, int symbol);
00089 
00091     int symbol();
00092 
00094     void setSymbol(int symbol);
00095 
00096 
00098     void setDistribution(DiscreteIIDModelPtr distribution);
00099 
00101     ContextTreeNodePtr getChild(int symbol);
00102 
00104     DiscreteIIDModelPtr getDistribution();
00105 
00107       void deleteChildren() ;
00108 
00110     ContextTreeNodeVector getChildren() ;
00111 
00113     bool isLeaf();
00114 
00115 
00116     std::string str() const ;
00117 
00118 
00119   };
00120 
00121 
00123   class DLLEXPORT ContextTree {
00124   public:
00125     ~ContextTree() {
00126     }
00127     ContextTree(){}
00128     ContextTree(AlphabetPtr alphabet);
00129 
00130     ContextTreeNodeVector & all_context() {
00131       return _all_context;
00132     }
00133 
00135     ContextTreeNodePtr getRoot() const ;
00136 
00137 
00139     ContextTreeNodePtr createContext() ;
00140 
00141 
00142     ContextTreeNodePtr getContext (int id) ;
00143 
00145     ContextTreeNodePtr getContext(const Sequence & s, int i);
00146 
00147     std::set <int> getLevelOneNodes();
00148 
00149     void removeContextNotUsed();
00150 
00151     void normalize();
00152 
00154       void normalize(ProbabilisticModelPtr old, double pseudocount, int i);
00155 
00157       void normalize(ProbabilisticModelPtr old, double pseudocount){
00158           normalize(old,pseudocount,0);
00159       }
00160 
00161     std::string str() const;
00162 
00163     void initializeCounter(const SequenceEntryList & sequences, int order, const std::map<std::string, double> & weights);
00164 
00165     void initializeCounter(const SequenceEntryList & sequences, int order, double pseudocounts, const std::map<std::string, double> & weights);
00166 
00168     void pruneTree(double delta) ;
00169 
00170 
00172     void pruneTreeSmallSampleSize(int small_) ;
00173 
00175     void initializeContextTreeRissanen(const SequenceEntryList & sequences);
00176 
00177     DoubleMapParameterValuePtr getParameterValue () const;
00178 
00179     int getNumberOfNodes() const
00180     {
00181       return _all_context.size();
00182     }
00183 
00184   private:
00185     void printTree(ContextTreeNodePtr node, std::stringstream & out) const;
00186     void buildParameters(ContextTreeNodePtr node, std::map<std::string, double> & parameters) const;
00187     ContextTreeNodeVector _all_context;
00188     AlphabetPtr _alphabet;
00189 
00190   };
00191   typedef boost::shared_ptr<ContextTree> ContextTreePtr;
00192 
00193 
00194 }
00195 
00196 #endif