ToPS
|
00001 /* 00002 * DiscreteIIDModelCreateModel.cpp 00003 * 00004 * Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br> 00005 * Ígor Bonádio <ibonadio@ime.usp.br> 00006 * Vitor Onuchic <vitoronuchic@gmail.com> 00007 * Alan Mitchell Durham <aland@usp.br> 00008 * 00009 * This program is free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 3 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00022 * MA 02110-1301, USA. 00023 */ 00024 00025 #include "Alphabet.h" 00026 #include "DiscreteIIDModelFactory.h" 00027 #include "DiscreteIIDModelCreateModel.h" 00028 #include "ReadConfigurationFile.h" 00029 #include "SequenceFactory.h" 00030 #include "FASTAReader.h" 00031 #include <string> 00032 00033 00034 namespace myop { 00035 00036 ProbabilisticModelPtr DiscreteIIDModelCreateModel::create (const std::string & config) { 00037 std::string TYPE("distribution"); 00038 std::vector <std::string> mandatory; 00039 mandatory.push_back(TYPE); 00040 ReadConfigurationFile readConf; 00041 Configuration conf = readConf.load_configuration_file(config, mandatory); 00042 00043 std::map <std::string, DiscreteIIDModelCreateModelPtr> commands; 00044 std::string BERNOULLI("Bernoulli"); 00045 std::string UNIFORM_ALPHABET("Uniform"); 00046 std::string SMOOTHED_HISTOGRAM_BURGE("SmoothedHistogramBurge"); 00047 std::string SMOOTHED_HISTOGRAM_STANKE("SmoothedHistogramStanke"); 00048 std::string SMOOTHED_HISTOGRAM_SHEATHER_JONES("SmoothedHistogramSheaterJones"); 00049 std::string SMOOTHED_HISTOGRAM_MYOP("SmoothedHistogramMYOP"); 00050 std::string SMOOTHED_HISTOGRAM_MAJOROS("SmoothedHistogramMajoros"); 00051 00052 commands[BERNOULLI] = DiscreteIIDModelCreateModelPtr(new BernoulliCreateModel()); 00053 commands[UNIFORM_ALPHABET] = DiscreteIIDModelCreateModelPtr(new UniformAlphabetCreateModel()); 00054 commands[SMOOTHED_HISTOGRAM_BURGE] = DiscreteIIDModelCreateModelPtr(new SmoothedHistogramBurgeCreateModel()); 00055 commands[SMOOTHED_HISTOGRAM_STANKE] = DiscreteIIDModelCreateModelPtr(new SmoothedHistogramStankeCreateModel()); 00056 commands[SMOOTHED_HISTOGRAM_SHEATHER_JONES] = DiscreteIIDModelCreateModelPtr(new SmoothedHistogramKernelDensityCreateModel()); 00057 commands[SMOOTHED_HISTOGRAM_MYOP] = DiscreteIIDModelCreateModelPtr(new SmoothedHistogramMYOPCreateModel()); 00058 commands[SMOOTHED_HISTOGRAM_MAJOROS] = DiscreteIIDModelCreateModelPtr(new SmoothedHistogramMajorosCreateModel()); 00059 00060 if(commands.find(conf[TYPE]) == commands.end()) 00061 { 00062 std::cerr << "Invalid value for \"distribution\" parameter" << std::endl; 00063 std::cerr << "Valid values are: " << std::endl; 00064 std::map<std::string,DiscreteIIDModelCreateModelPtr> :: iterator it; 00065 for (it = commands.begin(); it != commands.end(); it++) 00066 std::cerr << "\t" << it->first << std::endl; 00067 exit(-1); 00068 } 00069 return commands[conf[TYPE]]->create(config); 00070 } 00071 00072 ProbabilisticModelPtr BernoulliCreateModel::create (const std::string & config) { 00073 std::string PROBABILITY ("probability"); 00074 std::vector <std::string> mandatory; 00075 mandatory.push_back(PROBABILITY); 00076 ReadConfigurationFile readConf; 00077 Configuration conf = readConf.load_configuration_file(config, mandatory); 00078 DiscreteIIDModelFactory factory; 00079 return factory.bernoulli(atof(conf[PROBABILITY].c_str())); 00080 } 00081 00082 ProbabilisticModelPtr UniformAlphabetCreateModel::create (const std::string & config) { 00083 std::string ALPHABET("alphabet"); 00084 std::vector <std::string> mandatory; 00085 mandatory.push_back(ALPHABET); 00086 ReadConfigurationFile readConf; 00087 Configuration conf = readConf.load_configuration_file(config, mandatory); 00088 AlphabetPtr alphabet = AlphabetPtr(new Alphabet()); 00089 alphabet->initializeFromString(conf[ALPHABET]); 00090 DiscreteIIDModelFactory factory; 00091 return factory.uniform(alphabet); 00092 } 00093 00094 ProbabilisticModelPtr SmoothedHistogramBurgeCreateModel::create (const std::string & config) { 00095 std::string TRAINING_SET("trainig_set"); 00096 std::string C("C"); 00097 std::string ALPHABET("alphabet"); 00098 std::vector <std::string> mandatory; 00099 mandatory.push_back(TRAINING_SET); 00100 mandatory.push_back(C); 00101 mandatory.push_back(ALPHABET); 00102 ReadConfigurationFile readConf; 00103 Configuration conf = readConf.load_configuration_file(config, mandatory); 00104 DoubleVector lengths; 00105 00106 FASTAReader reader; 00107 if(!reader.open(conf[TRAINING_SET])) 00108 { 00109 std::cerr << "Can't open file: " << conf[TRAINING_SET] << std::endl; 00110 exit(-1); 00111 } 00112 std::string sequence; 00113 AlphabetPtr alphabet = AlphabetPtr(new Alphabet()); 00114 alphabet->initializeFromString(conf[ALPHABET]); 00115 SequenceFactory seqFactory(alphabet); 00116 while(reader.nextSequence(sequence)) 00117 { 00118 Sequence sample; 00119 seqFactory.createSequence(sample, sequence); 00120 lengths.push_back((double)sequence.size()); 00121 } 00122 reader.close(); 00123 DiscreteIIDModelFactory factory; 00124 return factory.smoothedDistributionBurge(lengths, atof(conf[C].c_str())); 00125 } 00126 00127 ProbabilisticModelPtr SmoothedHistogramMajorosCreateModel::create (const std::string & config) { 00128 std::string TRAINING_SET("trainig_set"); 00129 std::string WINDOW_SIZE("window_size"); 00130 std::string INTERACTIONS("interactions"); 00131 std::string N("N"); 00132 std::string ALPHABET("alphabet"); 00133 std::vector <std::string> mandatory; 00134 mandatory.push_back(TRAINING_SET); 00135 mandatory.push_back(WINDOW_SIZE); 00136 mandatory.push_back(INTERACTIONS); 00137 mandatory.push_back(N); 00138 mandatory.push_back(ALPHABET); 00139 ReadConfigurationFile readConf; 00140 Configuration conf = readConf.load_configuration_file(config, mandatory); 00141 DoubleVector lengths; 00142 00143 FASTAReader reader; 00144 00145 if(!reader.open(conf[TRAINING_SET])) 00146 { 00147 std::cerr << "Can't open file: " << conf[TRAINING_SET] << std::endl; 00148 exit(-1); 00149 } 00150 std::string sequence; 00151 AlphabetPtr alphabet = AlphabetPtr(new Alphabet()); 00152 alphabet->initializeFromString(conf[ALPHABET]); 00153 SequenceFactory seqFactory(alphabet); 00154 while(reader.nextSequence(sequence)) 00155 { 00156 Sequence sample; 00157 seqFactory.createSequence(sample, sequence); 00158 lengths.push_back((double)sequence.size()); 00159 } 00160 reader.close(); 00161 DiscreteIIDModelFactory factory; 00162 return factory.smoothedDistributionMajoros(lengths, atoi(conf[WINDOW_SIZE].c_str()), atoi(conf[INTERACTIONS].c_str()), atoi(conf[N].c_str())); 00163 } 00164 00165 00166 ProbabilisticModelPtr SmoothedHistogramStankeCreateModel::create (const std::string & config) 00167 { 00168 std::string TRAINING_SET("trainig_set"); 00169 std::string ALPHABET("alphabet"); 00170 std::vector <std::string> mandatory; 00171 mandatory.push_back(TRAINING_SET); 00172 mandatory.push_back(ALPHABET); 00173 ReadConfigurationFile readConf; 00174 Configuration conf = readConf.load_configuration_file(config, mandatory); 00175 DoubleVector lengths; 00176 00177 FASTAReader reader ; 00178 00179 if(!reader.open(conf[TRAINING_SET])) 00180 { 00181 std::cerr << "Can't open file: " << conf[TRAINING_SET] << std::endl; 00182 exit(-1); 00183 } 00184 std::string sequence; 00185 AlphabetPtr alphabet = AlphabetPtr(new Alphabet()); 00186 alphabet->initializeFromString(conf[ALPHABET]); 00187 SequenceFactory seqFactory(alphabet); 00188 while(reader.nextSequence(sequence)) 00189 { 00190 Sequence sample; 00191 seqFactory.createSequence(sample, sequence); 00192 lengths.push_back(sequence.size()); 00193 } 00194 reader.close(); 00195 DiscreteIIDModelFactory factory; 00196 return factory.smoothedDistributionKernelDensityStanke(lengths); 00197 } 00198 00199 00200 ProbabilisticModelPtr SmoothedHistogramKernelDensityCreateModel::create (const std::string & config) { 00201 std::string TRAINING_SET("trainig_set"); 00202 std::string ALPHABET("alphabet"); 00203 std::vector <std::string> mandatory; 00204 mandatory.push_back(TRAINING_SET); 00205 mandatory.push_back(ALPHABET); 00206 ReadConfigurationFile readConf; 00207 Configuration conf = readConf.load_configuration_file(config, mandatory); 00208 DoubleVector lengths; 00209 std::ifstream is; 00210 FASTAReader reader; 00211 00212 if(!reader.open(conf[TRAINING_SET])) 00213 { 00214 std::cerr << "Can't open file: " << conf[TRAINING_SET] << std::endl; 00215 exit(-1); 00216 } 00217 std::string sequence; 00218 AlphabetPtr alphabet = AlphabetPtr(new Alphabet()); 00219 alphabet->initializeFromString(conf[ALPHABET]); 00220 SequenceFactory seqFactory(alphabet); 00221 while(reader.nextSequence(sequence)) 00222 { 00223 Sequence sample; 00224 seqFactory.createSequence(sample, sequence); 00225 lengths.push_back(sequence.size()); 00226 } 00227 reader.close(); 00228 DiscreteIIDModelFactory factory; 00229 return factory.smoothedDistributionKernelDensity(lengths); 00230 } 00231 00232 00233 ProbabilisticModelPtr SmoothedHistogramMYOPCreateModel::create (const std::string & config) { 00234 std::string TRAINING_SET("trainig_set"); 00235 std::string ALPHABET("alphabet"); 00236 std::vector <std::string> mandatory; 00237 mandatory.push_back(TRAINING_SET); 00238 mandatory.push_back(ALPHABET); 00239 ReadConfigurationFile readConf; 00240 Configuration conf = readConf.load_configuration_file(config, mandatory); 00241 DoubleVector lengths; 00242 FASTAReader reader; 00243 00244 if(!reader.open(conf[TRAINING_SET])) 00245 { 00246 std::cerr << "Can't open file: " << conf[TRAINING_SET] << std::endl; 00247 exit(-1); 00248 } 00249 std::string sequence; 00250 AlphabetPtr alphabet = AlphabetPtr(new Alphabet()); 00251 alphabet->initializeFromString(conf[ALPHABET]); 00252 SequenceFactory seqFactory(alphabet); 00253 while(reader.nextSequence(sequence)) 00254 { 00255 Sequence sample; 00256 seqFactory.createSequence(sample, sequence); 00257 lengths.push_back(sequence.size()); 00258 } 00259 reader.close(); 00260 DiscreteIIDModelFactory factory; 00261 return factory.smoothedDistributionKernelDensityMYOP(lengths); 00262 00263 } 00264 } 00265