ToPS
|
00001 /* 00002 * BayesianInformationCriteria.cpp 00003 * 00004 * Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br> 00005 * Ígor Bonádio <ibonadio@ime.usp.br> 00006 * Vitor Onuchic <vitoronuchic@gmail.com> 00007 * Alan Mitchell Durham <aland@usp.br> 00008 * 00009 * This program is free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 3 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00022 * MA 02110-1301, USA. 00023 */ 00024 00025 #include "BayesianInformationCriteria.hpp" 00026 #include <map> 00027 #include <vector> 00028 00029 namespace tops 00030 { 00031 std::string BayesianInformationCriteria::help() const { 00032 std::stringstream out; 00033 out << "\nUSAGE: " << std::endl; 00034 out << "Mandatory parameters: " << std::endl; 00035 out << "\nbegin" << std::endl; 00036 out << "\tend" << std::endl; 00037 out << "\tstep" << std::endl; 00038 out << "Example: " << std::endl; 00039 out << "The configuration file below specify the BIC to select the WAM with the best order" << std::endl; 00040 out << "training_algorithm=\"WeightArrayModel\"" << std::endl; 00041 out << "training_set=\"dataset/sequences.txt\"" << std::endl; 00042 out << "alphabet=(\"A\", \"C\", \"G\", \"T\")" << std::endl; 00043 out << "length=31" << std::endl; 00044 out << "vicinity_length = 0" << std::endl; 00045 out << "pseudo_counts=0" << std::endl; 00046 out << "model_selection_criteria = \"BIC\"" << std::endl; 00047 out << "begin = (\"order\": 0)" << std::endl; 00048 out << "end = (\"order\": 3)" << std::endl; 00049 out << "step = (\"order\": 1)" << std::endl; 00050 return out.str(); 00051 } 00052 00053 ProbabilisticModelPtr BayesianInformationCriteria::create( ProbabilisticModelParameters & parameters) const 00054 { 00055 ProbabilisticModelParameterValuePtr beginpar = parameters.getMandatoryParameterValue("begin"); 00056 ProbabilisticModelParameterValuePtr endpar = parameters.getMandatoryParameterValue("end"); 00057 ProbabilisticModelParameterValuePtr steppar = parameters.getMandatoryParameterValue("step"); 00058 ProbabilisticModelParameterValuePtr trainpar = parameters.getMandatoryParameterValue("training_algorithm"); 00059 00060 if((beginpar == NULL) || 00061 (endpar == NULL) || 00062 (steppar == NULL)) 00063 { 00064 std::cerr << help() << std::endl; 00065 exit(-1); 00066 } 00067 std::map<std::string, double> beginmap = beginpar->getDoubleMap(); 00068 std::map<std::string, double> endmap = endpar->getDoubleMap(); 00069 std::map<std::string, double> stepmap = steppar->getDoubleMap(); 00070 std::vector <std::string> parnames; 00071 std::map <std::string, double>::const_iterator it; 00072 for(it = beginmap.begin(); it!=beginmap.end(); it++) 00073 parnames.push_back(it->first); 00074 00075 // generates all possible combinations of parameters 00076 std::vector <std::string>::const_iterator p; 00077 std::vector <std::vector <double> > combinations; 00078 combinations.resize(1); 00079 for(p = parnames.begin(); p != parnames.end(); p++) 00080 { 00081 std::vector <std::vector <double> > new_combinations; 00082 for (double i = beginmap[*p] ; i <= endmap[*p]; i += stepmap[*p]) 00083 { 00084 for(int j = 0; j < (int) combinations.size() ; j++) 00085 { 00086 std::vector <double> comb; 00087 for(int k = 0; k < (int)combinations[j].size(); k++) 00088 comb.push_back(combinations[j][k]); 00089 comb.push_back(i); 00090 new_combinations.push_back(comb); 00091 } 00092 } 00093 combinations = new_combinations; 00094 } 00095 double bic = -HUGE; 00096 ProbabilisticModelPtr result; 00097 for(int i = 0; i < (int) combinations.size(); i++) 00098 { 00099 for(int j = 0; j < (int) combinations[i].size(); j++) 00100 { 00101 DoubleParameterValuePtr value = DoubleParameterValuePtr(new DoubleParameterValue(combinations[i][j])); 00102 00103 parameters.set(parnames[j], value); 00104 } 00105 double loglikelihood; 00106 int sample_size; 00107 ProbabilisticModelPtr m = _creator->create(parameters, loglikelihood, sample_size); 00108 if(m == NULL) 00109 { 00110 std::cerr << "ERROR (BIC): could not estimate the model size using BIC " << std::endl; 00111 exit(-1); 00112 } 00113 double model_bic = loglikelihood - 0.5* m->size() * log((double) sample_size); 00114 if((bic < model_bic) || (result == NULL)) 00115 { 00116 bic = model_bic; 00117 result = m; 00118 } 00119 00120 int k = 0; 00121 if( k < (int) combinations[i].size()) 00122 { 00123 std::cerr << combinations[i][k]; 00124 for(k = 1; k < (int) combinations[i].size(); k++) 00125 std::cerr << "\t" << combinations[i][k]; 00126 } 00127 std::cerr << "\t" << model_bic << std::endl; 00128 00129 } 00130 std::cout << "# Model BIC: " << bic << std::endl; 00131 return result; 00132 } 00133 }