ToPS
|
00001 /* 00002 * AkaikeInformationCriteria.cpp 00003 * 00004 * Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br> 00005 * Ígor Bonádio <ibonadio@ime.usp.br> 00006 * Vitor Onuchic <vitoronuchic@gmail.com> 00007 * Alan Mitchell Durham <aland@usp.br> 00008 * 00009 * This program is free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 3 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00022 * MA 02110-1301, USA. 00023 */ 00024 00025 #include "AkaikeInformationCriteria.hpp" 00026 #include <map> 00027 #include <vector> 00028 00029 namespace tops 00030 { 00031 00032 std::string AkaikeInformationCriteria::help() const { 00033 std::stringstream out; 00034 out << "\nUSAGE: " << std::endl; 00035 out << "Mandatory parameters: " << std::endl; 00036 out << "\nbegin" << std::endl; 00037 out << "\tend" << std::endl; 00038 out << "\tstep" << std::endl; 00039 out << "Example: " << std::endl; 00040 out << "The configuration file below specify the AIC to select the WAM with the best order" << std::endl; 00041 out << "training_algorithm=\"WeightArrayModel\"" << std::endl; 00042 out << "training_set=\"dataset/sequences.txt\"" << std::endl; 00043 out << "alphabet=(\"A\", \"C\", \"G\", \"T\")" << std::endl; 00044 out << "length=31" << std::endl; 00045 out << "vicinity_length = 0" << std::endl; 00046 out << "pseudo_counts=0" << std::endl; 00047 out << "model_selection_criteria = \"AIC\"" << std::endl; 00048 out << "begin = (\"order\": 0)" << std::endl; 00049 out << "end = (\"order\": 3)" << std::endl; 00050 out << "step = (\"order\": 1)" << std::endl; 00051 return out.str(); 00052 } 00053 00054 00055 ProbabilisticModelPtr AkaikeInformationCriteria::create( ProbabilisticModelParameters & parameters) const 00056 { 00057 ProbabilisticModelParameterValuePtr beginpar = parameters.getMandatoryParameterValue("begin"); 00058 ProbabilisticModelParameterValuePtr endpar = parameters.getMandatoryParameterValue("end"); 00059 ProbabilisticModelParameterValuePtr steppar = parameters.getMandatoryParameterValue("step"); 00060 ProbabilisticModelParameterValuePtr trainpar = parameters.getMandatoryParameterValue("training_algorithm"); 00061 00062 if((beginpar == NULL) || 00063 (endpar == NULL) || 00064 (steppar == NULL)) 00065 { 00066 std::cerr << help() << std::endl; 00067 exit(-1); 00068 } 00069 std::map<std::string, double> beginmap = beginpar->getDoubleMap(); 00070 std::map<std::string, double> endmap = endpar->getDoubleMap(); 00071 std::map<std::string, double> stepmap = steppar->getDoubleMap(); 00072 std::vector <std::string> parnames; 00073 std::map <std::string, double>::const_iterator it; 00074 for(it = beginmap.begin(); it!=beginmap.end(); it++) 00075 parnames.push_back(it->first); 00076 00077 // generates all possible combinations of parameters 00078 std::vector <std::string>::const_iterator p; 00079 std::vector <std::vector <double> > combinations; 00080 combinations.resize(1); 00081 for(p = parnames.begin(); p != parnames.end(); p++) 00082 { 00083 std::vector <std::vector <double> > new_combinations; 00084 for (double i = beginmap[*p] ; i <= endmap[*p]; i += stepmap[*p]) 00085 { 00086 for(int j = 0; j < (int) combinations.size() ; j++) 00087 { 00088 std::vector <double> comb; 00089 for(int k = 0; k < (int)combinations[j].size(); k++) 00090 comb.push_back(combinations[j][k]); 00091 comb.push_back(i); 00092 new_combinations.push_back(comb); 00093 } 00094 } 00095 combinations = new_combinations; 00096 } 00097 double aic = HUGE; 00098 ProbabilisticModelPtr result; 00099 for(int i = 0; i < (int) combinations.size(); i++) 00100 { 00101 for(int j = 0; j < (int) combinations[i].size(); j++) 00102 { 00103 DoubleParameterValuePtr value = DoubleParameterValuePtr(new DoubleParameterValue(combinations[i][j])); 00104 00105 parameters.set(parnames[j], value); 00106 } 00107 double loglikelihood; 00108 int sample_size; 00109 ProbabilisticModelPtr m = _creator->create(parameters, loglikelihood, sample_size); 00110 double model_aic = 2 * m->size() - 2* loglikelihood ; 00111 if(aic > model_aic) 00112 { 00113 aic = model_aic; 00114 result = m; 00115 } 00116 00117 int k = 0; 00118 if( k < (int) combinations[i].size()) 00119 { 00120 std::cerr << combinations[i][k]; 00121 for(k = 1; k < (int) combinations[i].size(); k++) 00122 std::cerr << "\t" << combinations[i][k]; 00123 } 00124 std::cerr << "\t" << model_aic << std::endl; 00125 00126 } 00127 std::cout << "# Model AIC: " << aic << std::endl; 00128 return result; 00129 } 00130 }