ToPS
|
00001 /* 00002 * SmoothedHistogramKernelDensity.cpp 00003 * 00004 * Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br> 00005 * Ígor Bonádio <ibonadio@ime.usp.br> 00006 * Vitor Onuchic <vitoronuchic@gmail.com> 00007 * Alan Mitchell Durham <aland@usp.br> 00008 * 00009 * This program is free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 3 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00022 * MA 02110-1301, USA. 00023 */ 00024 00025 #include "SmoothedHistogramKernelDensity.hpp" 00026 #include "DiscreteIIDModel.hpp" 00027 #include "util.hpp" 00028 00029 namespace tops { 00030 00031 00032 00033 ProbabilisticModelPtr SmoothedHistogramKernelDensity::create( ProbabilisticModelParameters & parameters) const 00034 { 00035 ProbabilisticModelParameterValuePtr training_set_parameter = 00036 parameters.getMandatoryParameterValue("training_set"); 00037 ProbabilisticModelParameterValuePtr maxlengthp = 00038 parameters.getOptionalParameterValue("max_length"); 00039 long max = 15000; 00040 if(maxlengthp != NULL) 00041 max = maxlengthp->getInt(); 00042 00043 00044 if(training_set_parameter == NULL) { 00045 std::cerr << help () << std::endl; 00046 ProbabilisticModelPtr nullmodel; 00047 exit(-1); 00048 return nullmodel; 00049 } 00050 00051 DoubleVector data; 00052 AlphabetPtr alpha = AlphabetPtr(new Alphabet());; 00053 SequenceEntryList sample_set; 00054 readSequencesFromFile(sample_set, alpha, training_set_parameter->getString()); 00055 for(int i = 0; i < (int)sample_set.size();i++) 00056 for(int j = 0; j < (int) (sample_set[i]->getSequence()).size(); j++) 00057 data.push_back((sample_set[i]->getSequence())[j]); 00058 std::map<long,double> sum; 00059 double total = 0.0; 00060 00061 if(data.size() > 0) 00062 { 00063 double bandwidth = sj_bandwidth(data); 00064 00065 for (int pos = 0; pos <= max; pos++) { 00066 sum[pos] = 0.0; 00067 double integral = 0.0; 00068 double min = kernel_density_estimation(pos-0.5, bandwidth, data); 00069 double max2 = kernel_density_estimation(pos+0.5, bandwidth, data); 00070 if(max2 < min) { 00071 double aux = min; 00072 min = max2; 00073 max2 = aux; 00074 } 00075 integral += min + (max2 - min)/2; 00076 sum[pos] = integral; 00077 total += integral; 00078 } 00079 } 00080 00081 DoubleVector prob; 00082 prob.resize(max+2); 00083 for (int k = 0; k <= max; k++){ 00084 prob[k] = sum[k]/total; 00085 } 00086 00087 00088 ProbabilisticModelParameters pars; 00089 pars.add("probabilities", ProbabilisticModelParameterValuePtr (new DoubleVectorParameterValue(prob))); 00090 pars.add("alphabet", alpha->getParameterValue()); 00091 DiscreteIIDModelPtr result = 00092 DiscreteIIDModelPtr(new DiscreteIIDModel()); 00093 result->initialize(pars); 00094 00095 return result; 00096 00097 } 00098 00099 }