ToPS
AkaikeInformationCriteria.cpp
00001 /*
00002  *       AkaikeInformationCriteria.cpp
00003  *
00004  *       Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br>
00005  *                      Ígor Bonádio <ibonadio@ime.usp.br>
00006  *                      Vitor Onuchic <vitoronuchic@gmail.com>
00007  *                      Alan Mitchell Durham <aland@usp.br>
00008  *
00009  *       This program is free software; you can redistribute it and/or modify
00010  *       it under the terms of the GNU  General Public License as published by
00011  *       the Free Software Foundation; either version 3 of the License, or
00012  *       (at your option) any later version.
00013  *
00014  *       This program is distributed in the hope that it will be useful,
00015  *       but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  *       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  *       GNU General Public License for more details.
00018  *
00019  *       You should have received a copy of the GNU General Public License
00020  *       along with this program; if not, write to the Free Software
00021  *       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00022  *       MA 02110-1301, USA.
00023  */
00024 
00025 #include "AkaikeInformationCriteria.hpp"
00026 #include <map>
00027 #include <vector>
00028 
00029 namespace tops
00030 {
00031 
00032   std::string AkaikeInformationCriteria::help() const {
00033     std::stringstream out;
00034     out << "\nUSAGE: " << std::endl;
00035     out << "Mandatory parameters: " << std::endl;
00036     out << "\nbegin" << std::endl;
00037     out << "\tend" << std::endl;
00038     out << "\tstep" << std::endl;
00039     out << "Example: " << std::endl;
00040     out << "The configuration file below specify the AIC to select the WAM with the best order" << std::endl;
00041     out << "training_algorithm=\"WeightArrayModel\"" << std::endl;
00042     out << "training_set=\"dataset/sequences.txt\"" << std::endl;
00043     out << "alphabet=(\"A\", \"C\", \"G\", \"T\")" << std::endl;
00044     out << "length=31" << std::endl;
00045     out << "vicinity_length = 0" << std::endl;
00046     out << "pseudo_counts=0" << std::endl;
00047     out << "model_selection_criteria = \"AIC\"" << std::endl;
00048     out << "begin = (\"order\": 0)" << std::endl;
00049     out << "end = (\"order\": 3)" << std::endl;
00050     out << "step = (\"order\": 1)" << std::endl;
00051     return out.str();
00052   }
00053 
00054 
00055   ProbabilisticModelPtr AkaikeInformationCriteria::create( ProbabilisticModelParameters & parameters) const
00056   {
00057     ProbabilisticModelParameterValuePtr beginpar = parameters.getMandatoryParameterValue("begin");
00058     ProbabilisticModelParameterValuePtr endpar = parameters.getMandatoryParameterValue("end");
00059     ProbabilisticModelParameterValuePtr steppar = parameters.getMandatoryParameterValue("step");
00060     ProbabilisticModelParameterValuePtr trainpar = parameters.getMandatoryParameterValue("training_algorithm");
00061 
00062     if((beginpar == NULL) ||
00063        (endpar == NULL) ||
00064        (steppar == NULL))
00065       {
00066         std::cerr << help() << std::endl;
00067         exit(-1);
00068       }
00069     std::map<std::string, double> beginmap = beginpar->getDoubleMap();
00070     std::map<std::string, double> endmap = endpar->getDoubleMap();
00071     std::map<std::string, double> stepmap = steppar->getDoubleMap();
00072     std::vector <std::string> parnames;
00073     std::map <std::string, double>::const_iterator it;
00074     for(it = beginmap.begin(); it!=beginmap.end(); it++)
00075       parnames.push_back(it->first);
00076 
00077     // generates all possible combinations of parameters
00078     std::vector <std::string>::const_iterator p;
00079     std::vector <std::vector <double> > combinations;
00080     combinations.resize(1);
00081     for(p = parnames.begin(); p != parnames.end(); p++)
00082       {
00083         std::vector <std::vector <double> > new_combinations;
00084         for (double i = beginmap[*p] ; i <= endmap[*p]; i += stepmap[*p])
00085           {
00086             for(int j = 0; j < (int) combinations.size() ; j++)
00087               {
00088                 std::vector <double> comb;
00089                 for(int k = 0; k < (int)combinations[j].size(); k++)
00090                   comb.push_back(combinations[j][k]);
00091                 comb.push_back(i);
00092                 new_combinations.push_back(comb);
00093               }
00094           }
00095         combinations = new_combinations;
00096       }
00097     double aic = HUGE;
00098     ProbabilisticModelPtr result;
00099     for(int i = 0; i < (int) combinations.size(); i++)
00100       {
00101         for(int j = 0; j < (int) combinations[i].size(); j++)
00102           {
00103             DoubleParameterValuePtr value = DoubleParameterValuePtr(new DoubleParameterValue(combinations[i][j]));
00104 
00105             parameters.set(parnames[j], value);
00106           }
00107         double loglikelihood;
00108         int sample_size;
00109         ProbabilisticModelPtr m = _creator->create(parameters, loglikelihood, sample_size);
00110         double model_aic = 2 * m->size()  - 2* loglikelihood ;
00111         if(aic > model_aic)
00112           {
00113             aic = model_aic;
00114             result = m;
00115           }
00116 
00117         int k = 0;
00118         if( k < (int) combinations[i].size())
00119           {
00120             std::cerr << combinations[i][k];
00121             for(k = 1; k < (int) combinations[i].size(); k++)
00122               std::cerr << "\t" << combinations[i][k];
00123           }
00124         std::cerr << "\t" << model_aic  << std::endl;
00125 
00126       }
00127     std::cout << "# Model AIC: " << aic  << std::endl;
00128     return result;
00129   }
00130 }