ToPS
ProbabilisticModelCreatorClient.cpp
00001 /*
00002  *       ProbabilisticModelCreatorClient.cpp
00003  *
00004  *       Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br>
00005  *                      Ígor Bonádio <ibonadio@ime.usp.br>
00006  *                      Vitor Onuchic <vitoronuchic@gmail.com>
00007  *                      Alan Mitchell Durham <aland@usp.br>
00008  *
00009  *       This program is free software; you can redistribute it and/or modify
00010  *       it under the terms of the GNU  General Public License as published by
00011  *       the Free Software Foundation; either version 3 of the License, or
00012  *       (at your option) any later version.
00013  *
00014  *       This program is distributed in the hope that it will be useful,
00015  *       but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  *       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  *       GNU General Public License for more details.
00018  *
00019  *       You should have received a copy of the GNU General Public License
00020  *       along with this program; if not, write to the Free Software
00021  *       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00022  *       MA 02110-1301, USA.
00023  */
00024 
00025 #include "ProbabilisticModelCreatorClient.hpp"
00026 #include "StoreLoadedModel.hpp"
00027 
00028 #include "ProbabilisticModelCreator.hpp"
00029 #include "DiscreteIIDModelCreator.hpp"
00030 #include "BernoulliModelCreator.hpp"
00031 #include "ConfigurationReader.hpp"
00032 #include "VariableLengthMarkovChainCreator.hpp"
00033 #include "InhomogeneousMarkovChainCreator.hpp"
00034 #include "HiddenMarkovModelCreator.hpp"
00035 #include "PairHiddenMarkovModelCreator.hpp"
00036 #include "GeneralizedHiddenMarkovModelCreator.hpp"
00037 #include "TargetModelCreator.hpp"
00038 #include "SmoothedHistogramKernelDensity.hpp"
00039 #include "FixedSequenceAtPositionCreator.hpp"
00040 #include "ReverseComplementDNACreator.hpp"
00041 #include "PhasedRunLengthDistributionCreator.hpp"
00042 #include "ProbabilisticModelParameter.hpp"
00043 #include "SimilarityBasedSequenceWeightingCreator.hpp"
00044 #include "MultipleSequentialModelCreator.hpp"
00045 #include "util.hpp"
00046 
00047 
00048 #include "TrainHMMBaumWelch.hpp"
00049 #include "TrainVariableLengthMarkovChain.hpp"
00050 #include "TrainDiscreteIIDModel.hpp"
00051 #include "TrainFixedLengthMarkovChain.hpp"
00052 #include "TrainWeightArrayModel.hpp"
00053 #include "BayesianInformationCriteria.hpp"
00054 #include "AkaikeInformationCriteria.hpp"
00055 #include "TrainVariableLengthInhomogeneousMarkovChain.hpp"
00056 #include "SmoothedHistogramKernelDensity.hpp"
00057 #include "SmoothedHistogramStanke.hpp"
00058 #include "SmoothedHistogramBurge.hpp"
00059 #include "TrainPhasedMarkovChain.hpp"
00060 #include "TrainInterpolatedPhasedMarkovChain.hpp"
00061 #include "TrainPhasedMarkovChainContextAlgorithm.hpp"
00062 #include "RemoveSequenceFromModel.hpp"
00063 #include "SequenceFormat.hpp"
00064 
00065 
00066 namespace tops
00067 {
00068 
00069   ProbabilisticModelPtr ProbabilisticModelCreatorClient::create(ProbabilisticModelParameters & parameters, const std::map<std::string,ProbabilisticModelPtr> & models)
00070   {
00071     ProbabilisticModelParameterValuePtr modelnamepar = parameters.getMandatoryParameterValue("model_name");
00072     if(modelnamepar == NULL)
00073       {
00074         std::cerr << "Cant create model with no name !" << std::endl;
00075         exit(-1);
00076       }
00077     string command = modelnamepar->getString();
00078     if(_createModelCommand.find(command) == _createModelCommand.end())
00079       {
00080         cerr << "ERROR: invalid  model: " << command << endl;
00081         cerr << "Implemented model are: " << endl;
00082         map<string, ProbabilisticModelCreatorPtr>::iterator it;
00083         for(it = _createModelCommand.begin(); it != _createModelCommand.end(); it++)
00084           cerr << "\t" << it->first << endl;
00085         exit(-1);
00086       }
00087     ProbabilisticModelPtr m = _createModelCommand[command]->create(parameters, models);
00088     return m;
00089   }
00090 
00091 
00092 
00093 
00094   ProbabilisticModelPtr ProbabilisticModelCreatorClient::create(ProbabilisticModelParameters & parameters)
00095   {
00096     ProbabilisticModelPtr null;
00097     ProbabilisticModelParameterValuePtr modelnamepar = parameters.getMandatoryParameterValue("model_name");
00098     if(modelnamepar == NULL)
00099       {
00100         std::cerr << "Cant create model with no name !" << std::endl;
00101         return null;
00102       }
00103     string command = modelnamepar->getString();
00104     if(_createModelCommand.find(command) == _createModelCommand.end())
00105       {
00106         cerr << "ERROR: invalid  model: " << command << endl;
00107         cerr << "Implemented model are: " << endl;
00108         map<string, ProbabilisticModelCreatorPtr>::iterator it;
00109         for(it = _createModelCommand.begin(); it != _createModelCommand.end(); it++)
00110           cerr << "\t" << it->first << endl;
00111 
00112         return null;
00113       }
00114     ProbabilisticModelPtr m = _createModelCommand[command]->create(parameters);
00115     return m;
00116   }
00117 
00118   ProbabilisticModelPtr ProbabilisticModelCreatorClient::train (const std::string & input_file_name)
00119   {
00120     return train(readConfigurationFromFile(input_file_name));
00121   }
00122 
00123     ProbabilisticModelPtr ProbabilisticModelCreatorClient::create(const std::string & input_file_name)
00124     {
00125         ProbabilisticModelPtr m = StoreLoadedModel::instance()->get(input_file_name);
00126         if(m != NULL)
00127             return m;
00128         m = create(readConfigurationFromFile(input_file_name));
00129         return StoreLoadedModel::instance()->add(input_file_name, m);
00130     }
00131 
00132   ProbabilisticModelParameters & ProbabilisticModelCreatorClient::readConfigurationFromFile(const std::string  & filename)
00133   {
00134     ConfigurationReader readConfig;
00135     std::ifstream input;
00136     std::string line;
00137     ProbabilisticModelPtr null;
00138     input.open(filename.c_str());
00139     if(!input.is_open())
00140       {
00141         std::cerr << "Cant open file "  << filename << std::endl;
00142         return _p;
00143       }
00144     string conf;
00145     while(!input.eof())
00146       {
00147         getline(input,line,'\n');
00148         line += "\n";
00149         conf.append(line);
00150       }
00151     input.close();
00152     if(readConfig.load(conf)){
00153       _p = *(readConfig.parameters());
00154       return _p;
00155     }
00156 
00157     std::cerr << "Error reading configuration file ! " << std::endl;
00158     return _p;
00159   }
00160 
00161   ProbabilisticModelPtr ProbabilisticModelCreatorClient::train (ProbabilisticModelParameters & parameters)
00162   {
00163 
00164     ProbabilisticModelParameterValuePtr create_model =
00165       parameters.getMandatoryParameterValue("training_algorithm");
00166     ProbabilisticModelParameterValuePtr bic =  parameters.getOptionalParameterValue("model_selection_criteria");
00167     ProbabilisticModelParameterValuePtr decorator =  parameters.getOptionalParameterValue("decorator");
00168 
00169     if (create_model == NULL) {
00170       exit(-1);
00171     }
00172 
00173     string command = create_model->getString();
00174 
00175     ProbabilisticModelCreatorPtr creator;
00176     if (_trainingCommand.find(command) == _trainingCommand.end()) {
00177       cerr << "ERROR: invalid  training algorithm: " << command
00178            << endl;
00179       cerr << "Implemented training algorithms are: " << endl;
00180       map<string, ProbabilisticModelCreatorPtr>::iterator it;
00181       for (it = _trainingCommand.begin(); it
00182              != _trainingCommand.end(); it++)
00183         cerr << "\t" << it->first << endl;
00184       exit(-1);
00185     } else
00186       creator = _trainingCommand[command];
00187 
00188     if (bic != NULL) {
00189       if (_modelSelectionCommand.find(bic->getString())
00190           != _modelSelectionCommand.end()) {
00191         creator = _modelSelectionCommand[bic->getString()];
00192         creator->setCreator(_trainingCommand[command]);
00193       } else {
00194         cerr << "ERROR: invalid  model selection criteria: "
00195              << command << endl;
00196         cerr << "Implemented model selection are: " << endl;
00197         map<string, ProbabilisticModelCreatorPtr>::iterator it;
00198         for (it = _modelSelectionCommand.begin(); it
00199                != _modelSelectionCommand.end(); it++)
00200           cerr << "\t" << it->first << endl;
00201         exit(-1);
00202       }
00203 
00204     }
00205 
00206     if(decorator != NULL) {
00207       if (_decoratorCommand.find(decorator->getString())
00208           != _decoratorCommand.end()) {
00209         _decoratorCommand[decorator->getString()]->setCreator(creator);
00210         creator = _decoratorCommand[decorator->getString()];
00211       } else {
00212         cerr << "ERROR: invalid  decorator: "
00213              << command << endl;
00214         cerr << "Implemented decorators are: " << endl;
00215         map<string, ProbabilisticModelCreatorPtr>::iterator it;
00216         for (it = _decoratorCommand.begin(); it
00217                != _decoratorCommand.end(); it++)
00218           cerr << "\t" << it->first << endl;
00219         exit(-1);
00220       }
00221 
00222 
00223     }
00224     ProbabilisticModelPtr model = creator->create(parameters);
00225 #if 0
00226     struct timeval start, stop;
00227     gettimeofday(&start, (struct timezone *) NULL);
00228     ProbabilisticModelPtr model = creator->create(parameters);
00229     gettimeofday(&stop, (struct timezone *)NULL);
00230     stop.tv_sec -= start.tv_sec;
00231     stop.tv_usec -= start.tv_usec;
00232     if(stop.tv_usec  < 0){
00233       stop.tv_sec --;
00234       stop.tv_usec += 1000000;
00235     }
00236     fprintf(stderr, "Elapsed time %ld%c%02d seconds\n", stop.tv_sec, '.', stop.tv_usec/1000);
00237 #endif
00238     return model;
00239 
00240   }
00241 
00242 
00243   void ProbabilisticModelCreatorClient::registry_new_creator(std::string name, ProbabilisticModelCreatorPtr creator){
00244     if (_createModelCommand.find(name) == _createModelCommand.end() )
00245       _createModelCommand[name] = creator;
00246   }
00247   void ProbabilisticModelCreatorClient::registry_new_training(std::string name, ProbabilisticModelCreatorPtr creator){
00248     if(_trainingCommand.find(name) == _trainingCommand.end())
00249       _trainingCommand[name] = creator;
00250   }
00251   void ProbabilisticModelCreatorClient::registry_new_model_selector(std::string name, ProbabilisticModelCreatorPtr creator){
00252     if(_modelSelectionCommand.find(name) == _modelSelectionCommand.end())
00253       _modelSelectionCommand[name] = creator;
00254   }
00255   void ProbabilisticModelCreatorClient::registry_new_decorator(std::string name, ProbabilisticModelCreatorPtr creator){
00256     if(_decoratorCommand.find(name) == _decoratorCommand.end())
00257       _decoratorCommand[name] = creator;
00258   }
00259 
00260 
00261   ProbabilisticModelCreatorClient::  ProbabilisticModelCreatorClient() {
00262     _trainingCommand["ContextAlgorithm"] = TrainVariableLengthMarkovChainPtr(new TrainVariableLengthMarkovChain());
00263     _trainingCommand["FixedLengthMarkovChain"]= TrainFixedLengthMarkovChainPtr(new TrainFixedLengthMarkovChain());
00264     _trainingCommand["BaumWelch"] = TrainHMMBaumWelchPtr(new TrainHMMBaumWelch());
00265     _trainingCommand["WeightArrayModel"] = TrainWeightArrayModelPtr(new TrainWeightArrayModel());
00266     _trainingCommand["VariableLengthInhomogeneousMarkovChain"]
00267       = TrainVariableLengthInhomogeneousMarkovChainPtr(new TrainVariableLengthInhomogeneousMarkovChain());
00268     _trainingCommand["PhasedMarkovChain"] = TrainPhasedMarkovChainPtr(new TrainPhasedMarkovChain());
00269     _trainingCommand["InterpolatedPhasedMarkovChain"] = TrainInterpolatedPhasedMarkovChainPtr(new TrainInterpolatedPhasedMarkovChain());
00270     _trainingCommand["PhasedMarkovChainContextAlgorithm"] = TrainPhasedMarkovChainContextAlgorithmPtr(new TrainPhasedMarkovChainContextAlgorithm());
00271     _trainingCommand["SmoothedHistogramKernelDensity"] = SmoothedHistogramKernelDensityPtr(new SmoothedHistogramKernelDensity());
00272     _trainingCommand["SmoothedHistogramStanke"] = SmoothedHistogramStankePtr(new SmoothedHistogramStanke());
00273     _trainingCommand["SmoothedHistogramBurge"] = SmoothedHistogramBurgePtr(new SmoothedHistogramBurge());
00274     _trainingCommand["DiscreteIIDModel"] = TrainDiscreteIIDModelPtr(new TrainDiscreteIIDModel());
00275     _trainingCommand["MultinomialDistribution"] = TrainDiscreteIIDModelPtr(new TrainDiscreteIIDModel());
00276     _modelSelectionCommand["BIC"] = BayesianInformationCriteriaPtr(new BayesianInformationCriteria());
00277     _modelSelectionCommand["AIC"] = AkaikeInformationCriteriaPtr(new AkaikeInformationCriteria());
00278     _decoratorCommand["RemoveSequence"] = RemoveSequenceFromModelPtr(new RemoveSequenceFromModel());
00279     _createModelCommand["DiscreteIIDModel"] =
00280       DiscreteIIDModelCreatorPtr(new DiscreteIIDModelCreator());
00281     _createModelCommand["MultinomialDistribution"] =
00282       DiscreteIIDModelCreatorPtr(new DiscreteIIDModelCreator());
00283     _createModelCommand["VariableLengthMarkovChain"] =
00284       VariableLengthMarkovChainCreatorPtr(new VariableLengthMarkovChainCreator());
00285     _createModelCommand["InhomogeneousMarkovChain"] =
00286       InhomogeneousMarkovChainCreatorPtr(new InhomogeneousMarkovChainCreator());
00287     _createModelCommand["HiddenMarkovModel"] =
00288       HiddenMarkovModelCreatorPtr(new HiddenMarkovModelCreator());
00289     _createModelCommand["PairHiddenMarkovModel"] =
00290       PairHiddenMarkovModelCreatorPtr(new PairHiddenMarkovModelCreator());
00291     _createModelCommand["GeneralizedHiddenMarkovModel"] =
00292       GeneralizedHiddenMarkovModelCreatorPtr(new GeneralizedHiddenMarkovModelCreator());
00293     _createModelCommand["Bernoulli"] =
00294       BernoulliModelCreatorPtr(new BernoulliModelCreator());
00295     _createModelCommand["TargetModel"] =
00296       TargetModelCreatorPtr(new TargetModelCreator());
00297     _createModelCommand["FixedSequenceAtPosition"] =
00298       FixedSequenceAtPositionCreatorPtr(new FixedSequenceAtPositionCreator());
00299     _createModelCommand["PhasedRunLengthDistribution"] =
00300       PhasedRunLengthDistributionCreatorPtr(new PhasedRunLengthDistributionCreator());
00301     _createModelCommand["ReverseComplementDNA"] =
00302       ReverseComplementDNACreatorPtr(new ReverseComplementDNACreator());
00303     _createModelCommand["SimilarityBasedSequenceWeighting"] =
00304       SimilarityBasedSequenceWeightingCreatorPtr(new SimilarityBasedSequenceWeightingCreator());
00305     _createModelCommand["MultipleSequentialModels"] = MultipleSequentialModelCreatorPtr( new MultipleSequentialModelCreator());
00306 
00307   }
00308 }
00309