ToPS
|
00001 /* 00002 * ProbabilisticModelCreatorClient.cpp 00003 * 00004 * Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br> 00005 * Ígor Bonádio <ibonadio@ime.usp.br> 00006 * Vitor Onuchic <vitoronuchic@gmail.com> 00007 * Alan Mitchell Durham <aland@usp.br> 00008 * 00009 * This program is free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 3 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00022 * MA 02110-1301, USA. 00023 */ 00024 00025 #include "ProbabilisticModelCreatorClient.hpp" 00026 #include "StoreLoadedModel.hpp" 00027 00028 #include "ProbabilisticModelCreator.hpp" 00029 #include "DiscreteIIDModelCreator.hpp" 00030 #include "BernoulliModelCreator.hpp" 00031 #include "ConfigurationReader.hpp" 00032 #include "VariableLengthMarkovChainCreator.hpp" 00033 #include "InhomogeneousMarkovChainCreator.hpp" 00034 #include "HiddenMarkovModelCreator.hpp" 00035 #include "PairHiddenMarkovModelCreator.hpp" 00036 #include "GeneralizedHiddenMarkovModelCreator.hpp" 00037 #include "TargetModelCreator.hpp" 00038 #include "SmoothedHistogramKernelDensity.hpp" 00039 #include "FixedSequenceAtPositionCreator.hpp" 00040 #include "ReverseComplementDNACreator.hpp" 00041 #include "PhasedRunLengthDistributionCreator.hpp" 00042 #include "ProbabilisticModelParameter.hpp" 00043 #include "SimilarityBasedSequenceWeightingCreator.hpp" 00044 #include "MultipleSequentialModelCreator.hpp" 00045 #include "util.hpp" 00046 00047 00048 #include "TrainHMMBaumWelch.hpp" 00049 #include "TrainVariableLengthMarkovChain.hpp" 00050 #include "TrainDiscreteIIDModel.hpp" 00051 #include "TrainFixedLengthMarkovChain.hpp" 00052 #include "TrainWeightArrayModel.hpp" 00053 #include "BayesianInformationCriteria.hpp" 00054 #include "AkaikeInformationCriteria.hpp" 00055 #include "TrainVariableLengthInhomogeneousMarkovChain.hpp" 00056 #include "SmoothedHistogramKernelDensity.hpp" 00057 #include "SmoothedHistogramStanke.hpp" 00058 #include "SmoothedHistogramBurge.hpp" 00059 #include "TrainPhasedMarkovChain.hpp" 00060 #include "TrainInterpolatedPhasedMarkovChain.hpp" 00061 #include "TrainPhasedMarkovChainContextAlgorithm.hpp" 00062 #include "RemoveSequenceFromModel.hpp" 00063 #include "SequenceFormat.hpp" 00064 00065 00066 namespace tops 00067 { 00068 00069 ProbabilisticModelPtr ProbabilisticModelCreatorClient::create(ProbabilisticModelParameters & parameters, const std::map<std::string,ProbabilisticModelPtr> & models) 00070 { 00071 ProbabilisticModelParameterValuePtr modelnamepar = parameters.getMandatoryParameterValue("model_name"); 00072 if(modelnamepar == NULL) 00073 { 00074 std::cerr << "Cant create model with no name !" << std::endl; 00075 exit(-1); 00076 } 00077 string command = modelnamepar->getString(); 00078 if(_createModelCommand.find(command) == _createModelCommand.end()) 00079 { 00080 cerr << "ERROR: invalid model: " << command << endl; 00081 cerr << "Implemented model are: " << endl; 00082 map<string, ProbabilisticModelCreatorPtr>::iterator it; 00083 for(it = _createModelCommand.begin(); it != _createModelCommand.end(); it++) 00084 cerr << "\t" << it->first << endl; 00085 exit(-1); 00086 } 00087 ProbabilisticModelPtr m = _createModelCommand[command]->create(parameters, models); 00088 return m; 00089 } 00090 00091 00092 00093 00094 ProbabilisticModelPtr ProbabilisticModelCreatorClient::create(ProbabilisticModelParameters & parameters) 00095 { 00096 ProbabilisticModelPtr null; 00097 ProbabilisticModelParameterValuePtr modelnamepar = parameters.getMandatoryParameterValue("model_name"); 00098 if(modelnamepar == NULL) 00099 { 00100 std::cerr << "Cant create model with no name !" << std::endl; 00101 return null; 00102 } 00103 string command = modelnamepar->getString(); 00104 if(_createModelCommand.find(command) == _createModelCommand.end()) 00105 { 00106 cerr << "ERROR: invalid model: " << command << endl; 00107 cerr << "Implemented model are: " << endl; 00108 map<string, ProbabilisticModelCreatorPtr>::iterator it; 00109 for(it = _createModelCommand.begin(); it != _createModelCommand.end(); it++) 00110 cerr << "\t" << it->first << endl; 00111 00112 return null; 00113 } 00114 ProbabilisticModelPtr m = _createModelCommand[command]->create(parameters); 00115 return m; 00116 } 00117 00118 ProbabilisticModelPtr ProbabilisticModelCreatorClient::train (const std::string & input_file_name) 00119 { 00120 return train(readConfigurationFromFile(input_file_name)); 00121 } 00122 00123 ProbabilisticModelPtr ProbabilisticModelCreatorClient::create(const std::string & input_file_name) 00124 { 00125 ProbabilisticModelPtr m = StoreLoadedModel::instance()->get(input_file_name); 00126 if(m != NULL) 00127 return m; 00128 m = create(readConfigurationFromFile(input_file_name)); 00129 return StoreLoadedModel::instance()->add(input_file_name, m); 00130 } 00131 00132 ProbabilisticModelParameters & ProbabilisticModelCreatorClient::readConfigurationFromFile(const std::string & filename) 00133 { 00134 ConfigurationReader readConfig; 00135 std::ifstream input; 00136 std::string line; 00137 ProbabilisticModelPtr null; 00138 input.open(filename.c_str()); 00139 if(!input.is_open()) 00140 { 00141 std::cerr << "Cant open file " << filename << std::endl; 00142 return _p; 00143 } 00144 string conf; 00145 while(!input.eof()) 00146 { 00147 getline(input,line,'\n'); 00148 line += "\n"; 00149 conf.append(line); 00150 } 00151 input.close(); 00152 if(readConfig.load(conf)){ 00153 _p = *(readConfig.parameters()); 00154 return _p; 00155 } 00156 00157 std::cerr << "Error reading configuration file ! " << std::endl; 00158 return _p; 00159 } 00160 00161 ProbabilisticModelPtr ProbabilisticModelCreatorClient::train (ProbabilisticModelParameters & parameters) 00162 { 00163 00164 ProbabilisticModelParameterValuePtr create_model = 00165 parameters.getMandatoryParameterValue("training_algorithm"); 00166 ProbabilisticModelParameterValuePtr bic = parameters.getOptionalParameterValue("model_selection_criteria"); 00167 ProbabilisticModelParameterValuePtr decorator = parameters.getOptionalParameterValue("decorator"); 00168 00169 if (create_model == NULL) { 00170 exit(-1); 00171 } 00172 00173 string command = create_model->getString(); 00174 00175 ProbabilisticModelCreatorPtr creator; 00176 if (_trainingCommand.find(command) == _trainingCommand.end()) { 00177 cerr << "ERROR: invalid training algorithm: " << command 00178 << endl; 00179 cerr << "Implemented training algorithms are: " << endl; 00180 map<string, ProbabilisticModelCreatorPtr>::iterator it; 00181 for (it = _trainingCommand.begin(); it 00182 != _trainingCommand.end(); it++) 00183 cerr << "\t" << it->first << endl; 00184 exit(-1); 00185 } else 00186 creator = _trainingCommand[command]; 00187 00188 if (bic != NULL) { 00189 if (_modelSelectionCommand.find(bic->getString()) 00190 != _modelSelectionCommand.end()) { 00191 creator = _modelSelectionCommand[bic->getString()]; 00192 creator->setCreator(_trainingCommand[command]); 00193 } else { 00194 cerr << "ERROR: invalid model selection criteria: " 00195 << command << endl; 00196 cerr << "Implemented model selection are: " << endl; 00197 map<string, ProbabilisticModelCreatorPtr>::iterator it; 00198 for (it = _modelSelectionCommand.begin(); it 00199 != _modelSelectionCommand.end(); it++) 00200 cerr << "\t" << it->first << endl; 00201 exit(-1); 00202 } 00203 00204 } 00205 00206 if(decorator != NULL) { 00207 if (_decoratorCommand.find(decorator->getString()) 00208 != _decoratorCommand.end()) { 00209 _decoratorCommand[decorator->getString()]->setCreator(creator); 00210 creator = _decoratorCommand[decorator->getString()]; 00211 } else { 00212 cerr << "ERROR: invalid decorator: " 00213 << command << endl; 00214 cerr << "Implemented decorators are: " << endl; 00215 map<string, ProbabilisticModelCreatorPtr>::iterator it; 00216 for (it = _decoratorCommand.begin(); it 00217 != _decoratorCommand.end(); it++) 00218 cerr << "\t" << it->first << endl; 00219 exit(-1); 00220 } 00221 00222 00223 } 00224 ProbabilisticModelPtr model = creator->create(parameters); 00225 #if 0 00226 struct timeval start, stop; 00227 gettimeofday(&start, (struct timezone *) NULL); 00228 ProbabilisticModelPtr model = creator->create(parameters); 00229 gettimeofday(&stop, (struct timezone *)NULL); 00230 stop.tv_sec -= start.tv_sec; 00231 stop.tv_usec -= start.tv_usec; 00232 if(stop.tv_usec < 0){ 00233 stop.tv_sec --; 00234 stop.tv_usec += 1000000; 00235 } 00236 fprintf(stderr, "Elapsed time %ld%c%02d seconds\n", stop.tv_sec, '.', stop.tv_usec/1000); 00237 #endif 00238 return model; 00239 00240 } 00241 00242 00243 void ProbabilisticModelCreatorClient::registry_new_creator(std::string name, ProbabilisticModelCreatorPtr creator){ 00244 if (_createModelCommand.find(name) == _createModelCommand.end() ) 00245 _createModelCommand[name] = creator; 00246 } 00247 void ProbabilisticModelCreatorClient::registry_new_training(std::string name, ProbabilisticModelCreatorPtr creator){ 00248 if(_trainingCommand.find(name) == _trainingCommand.end()) 00249 _trainingCommand[name] = creator; 00250 } 00251 void ProbabilisticModelCreatorClient::registry_new_model_selector(std::string name, ProbabilisticModelCreatorPtr creator){ 00252 if(_modelSelectionCommand.find(name) == _modelSelectionCommand.end()) 00253 _modelSelectionCommand[name] = creator; 00254 } 00255 void ProbabilisticModelCreatorClient::registry_new_decorator(std::string name, ProbabilisticModelCreatorPtr creator){ 00256 if(_decoratorCommand.find(name) == _decoratorCommand.end()) 00257 _decoratorCommand[name] = creator; 00258 } 00259 00260 00261 ProbabilisticModelCreatorClient:: ProbabilisticModelCreatorClient() { 00262 _trainingCommand["ContextAlgorithm"] = TrainVariableLengthMarkovChainPtr(new TrainVariableLengthMarkovChain()); 00263 _trainingCommand["FixedLengthMarkovChain"]= TrainFixedLengthMarkovChainPtr(new TrainFixedLengthMarkovChain()); 00264 _trainingCommand["BaumWelch"] = TrainHMMBaumWelchPtr(new TrainHMMBaumWelch()); 00265 _trainingCommand["WeightArrayModel"] = TrainWeightArrayModelPtr(new TrainWeightArrayModel()); 00266 _trainingCommand["VariableLengthInhomogeneousMarkovChain"] 00267 = TrainVariableLengthInhomogeneousMarkovChainPtr(new TrainVariableLengthInhomogeneousMarkovChain()); 00268 _trainingCommand["PhasedMarkovChain"] = TrainPhasedMarkovChainPtr(new TrainPhasedMarkovChain()); 00269 _trainingCommand["InterpolatedPhasedMarkovChain"] = TrainInterpolatedPhasedMarkovChainPtr(new TrainInterpolatedPhasedMarkovChain()); 00270 _trainingCommand["PhasedMarkovChainContextAlgorithm"] = TrainPhasedMarkovChainContextAlgorithmPtr(new TrainPhasedMarkovChainContextAlgorithm()); 00271 _trainingCommand["SmoothedHistogramKernelDensity"] = SmoothedHistogramKernelDensityPtr(new SmoothedHistogramKernelDensity()); 00272 _trainingCommand["SmoothedHistogramStanke"] = SmoothedHistogramStankePtr(new SmoothedHistogramStanke()); 00273 _trainingCommand["SmoothedHistogramBurge"] = SmoothedHistogramBurgePtr(new SmoothedHistogramBurge()); 00274 _trainingCommand["DiscreteIIDModel"] = TrainDiscreteIIDModelPtr(new TrainDiscreteIIDModel()); 00275 _trainingCommand["MultinomialDistribution"] = TrainDiscreteIIDModelPtr(new TrainDiscreteIIDModel()); 00276 _modelSelectionCommand["BIC"] = BayesianInformationCriteriaPtr(new BayesianInformationCriteria()); 00277 _modelSelectionCommand["AIC"] = AkaikeInformationCriteriaPtr(new AkaikeInformationCriteria()); 00278 _decoratorCommand["RemoveSequence"] = RemoveSequenceFromModelPtr(new RemoveSequenceFromModel()); 00279 _createModelCommand["DiscreteIIDModel"] = 00280 DiscreteIIDModelCreatorPtr(new DiscreteIIDModelCreator()); 00281 _createModelCommand["MultinomialDistribution"] = 00282 DiscreteIIDModelCreatorPtr(new DiscreteIIDModelCreator()); 00283 _createModelCommand["VariableLengthMarkovChain"] = 00284 VariableLengthMarkovChainCreatorPtr(new VariableLengthMarkovChainCreator()); 00285 _createModelCommand["InhomogeneousMarkovChain"] = 00286 InhomogeneousMarkovChainCreatorPtr(new InhomogeneousMarkovChainCreator()); 00287 _createModelCommand["HiddenMarkovModel"] = 00288 HiddenMarkovModelCreatorPtr(new HiddenMarkovModelCreator()); 00289 _createModelCommand["PairHiddenMarkovModel"] = 00290 PairHiddenMarkovModelCreatorPtr(new PairHiddenMarkovModelCreator()); 00291 _createModelCommand["GeneralizedHiddenMarkovModel"] = 00292 GeneralizedHiddenMarkovModelCreatorPtr(new GeneralizedHiddenMarkovModelCreator()); 00293 _createModelCommand["Bernoulli"] = 00294 BernoulliModelCreatorPtr(new BernoulliModelCreator()); 00295 _createModelCommand["TargetModel"] = 00296 TargetModelCreatorPtr(new TargetModelCreator()); 00297 _createModelCommand["FixedSequenceAtPosition"] = 00298 FixedSequenceAtPositionCreatorPtr(new FixedSequenceAtPositionCreator()); 00299 _createModelCommand["PhasedRunLengthDistribution"] = 00300 PhasedRunLengthDistributionCreatorPtr(new PhasedRunLengthDistributionCreator()); 00301 _createModelCommand["ReverseComplementDNA"] = 00302 ReverseComplementDNACreatorPtr(new ReverseComplementDNACreator()); 00303 _createModelCommand["SimilarityBasedSequenceWeighting"] = 00304 SimilarityBasedSequenceWeightingCreatorPtr(new SimilarityBasedSequenceWeightingCreator()); 00305 _createModelCommand["MultipleSequentialModels"] = MultipleSequentialModelCreatorPtr( new MultipleSequentialModelCreator()); 00306 00307 } 00308 } 00309