ToPS
DiscreteIIDModelCreateModel.cpp
00001 /*
00002  *       DiscreteIIDModelCreateModel.cpp
00003  *
00004  *       Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br>
00005  *                      Ígor Bonádio <ibonadio@ime.usp.br>
00006  *                      Vitor Onuchic <vitoronuchic@gmail.com>
00007  *                      Alan Mitchell Durham <aland@usp.br>
00008  *
00009  *       This program is free software; you can redistribute it and/or modify
00010  *       it under the terms of the GNU  General Public License as published by
00011  *       the Free Software Foundation; either version 3 of the License, or
00012  *       (at your option) any later version.
00013  *
00014  *       This program is distributed in the hope that it will be useful,
00015  *       but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  *       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  *       GNU General Public License for more details.
00018  *
00019  *       You should have received a copy of the GNU General Public License
00020  *       along with this program; if not, write to the Free Software
00021  *       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00022  *       MA 02110-1301, USA.
00023  */
00024 
00025 #include "Alphabet.h"
00026 #include "DiscreteIIDModelFactory.h"
00027 #include "DiscreteIIDModelCreateModel.h"
00028 #include "ReadConfigurationFile.h"
00029 #include "SequenceFactory.h"
00030 #include "FASTAReader.h"
00031 #include <string>
00032 
00033 
00034 namespace myop {
00035 
00036   ProbabilisticModelPtr DiscreteIIDModelCreateModel::create (const std::string & config) {
00037     std::string TYPE("distribution");
00038     std::vector <std::string> mandatory;
00039     mandatory.push_back(TYPE);
00040     ReadConfigurationFile readConf;
00041     Configuration conf = readConf.load_configuration_file(config, mandatory);
00042 
00043     std::map <std::string, DiscreteIIDModelCreateModelPtr> commands;
00044     std::string BERNOULLI("Bernoulli");
00045     std::string UNIFORM_ALPHABET("Uniform");
00046     std::string SMOOTHED_HISTOGRAM_BURGE("SmoothedHistogramBurge");
00047     std::string SMOOTHED_HISTOGRAM_STANKE("SmoothedHistogramStanke");
00048     std::string SMOOTHED_HISTOGRAM_SHEATHER_JONES("SmoothedHistogramSheaterJones");
00049     std::string SMOOTHED_HISTOGRAM_MYOP("SmoothedHistogramMYOP");
00050     std::string SMOOTHED_HISTOGRAM_MAJOROS("SmoothedHistogramMajoros");
00051 
00052     commands[BERNOULLI] = DiscreteIIDModelCreateModelPtr(new BernoulliCreateModel());
00053     commands[UNIFORM_ALPHABET] = DiscreteIIDModelCreateModelPtr(new UniformAlphabetCreateModel());
00054     commands[SMOOTHED_HISTOGRAM_BURGE] = DiscreteIIDModelCreateModelPtr(new SmoothedHistogramBurgeCreateModel());
00055     commands[SMOOTHED_HISTOGRAM_STANKE] = DiscreteIIDModelCreateModelPtr(new SmoothedHistogramStankeCreateModel());
00056     commands[SMOOTHED_HISTOGRAM_SHEATHER_JONES] = DiscreteIIDModelCreateModelPtr(new SmoothedHistogramKernelDensityCreateModel());
00057     commands[SMOOTHED_HISTOGRAM_MYOP] = DiscreteIIDModelCreateModelPtr(new SmoothedHistogramMYOPCreateModel());
00058     commands[SMOOTHED_HISTOGRAM_MAJOROS] = DiscreteIIDModelCreateModelPtr(new SmoothedHistogramMajorosCreateModel());
00059 
00060     if(commands.find(conf[TYPE]) == commands.end())
00061       {
00062         std::cerr << "Invalid value for \"distribution\" parameter" << std::endl;
00063         std::cerr << "Valid values are: " << std::endl;
00064         std::map<std::string,DiscreteIIDModelCreateModelPtr> :: iterator it;
00065         for (it = commands.begin(); it != commands.end(); it++)
00066           std::cerr << "\t" << it->first << std::endl;
00067         exit(-1);
00068       }
00069     return commands[conf[TYPE]]->create(config);
00070   }
00071 
00072   ProbabilisticModelPtr BernoulliCreateModel::create (const std::string & config) {
00073     std::string PROBABILITY ("probability");
00074     std::vector <std::string> mandatory;
00075     mandatory.push_back(PROBABILITY);
00076     ReadConfigurationFile readConf;
00077     Configuration conf = readConf.load_configuration_file(config, mandatory);
00078     DiscreteIIDModelFactory factory;
00079     return factory.bernoulli(atof(conf[PROBABILITY].c_str()));
00080   }
00081 
00082   ProbabilisticModelPtr UniformAlphabetCreateModel::create (const std::string & config) {
00083     std::string ALPHABET("alphabet");
00084     std::vector <std::string> mandatory;
00085     mandatory.push_back(ALPHABET);
00086     ReadConfigurationFile readConf;
00087     Configuration conf = readConf.load_configuration_file(config, mandatory);
00088     AlphabetPtr alphabet = AlphabetPtr(new Alphabet());
00089     alphabet->initializeFromString(conf[ALPHABET]);
00090     DiscreteIIDModelFactory factory;
00091     return factory.uniform(alphabet);
00092   }
00093 
00094   ProbabilisticModelPtr SmoothedHistogramBurgeCreateModel::create (const std::string & config) {
00095     std::string TRAINING_SET("trainig_set");
00096     std::string C("C");
00097     std::string ALPHABET("alphabet");
00098     std::vector <std::string> mandatory;
00099     mandatory.push_back(TRAINING_SET);
00100     mandatory.push_back(C);
00101     mandatory.push_back(ALPHABET);
00102     ReadConfigurationFile readConf;
00103     Configuration conf = readConf.load_configuration_file(config, mandatory);
00104     DoubleVector lengths;
00105 
00106     FASTAReader reader;
00107     if(!reader.open(conf[TRAINING_SET]))
00108       {
00109         std::cerr << "Can't open file: " << conf[TRAINING_SET] << std::endl;
00110         exit(-1);
00111       }
00112     std::string sequence;
00113     AlphabetPtr alphabet = AlphabetPtr(new Alphabet());
00114     alphabet->initializeFromString(conf[ALPHABET]);
00115     SequenceFactory seqFactory(alphabet);
00116     while(reader.nextSequence(sequence))
00117       {
00118         Sequence sample;
00119         seqFactory.createSequence(sample, sequence);
00120         lengths.push_back((double)sequence.size());
00121       }
00122     reader.close();
00123     DiscreteIIDModelFactory factory;
00124     return factory.smoothedDistributionBurge(lengths, atof(conf[C].c_str()));
00125   }
00126 
00127   ProbabilisticModelPtr SmoothedHistogramMajorosCreateModel::create (const std::string & config) {
00128     std::string TRAINING_SET("trainig_set");
00129     std::string WINDOW_SIZE("window_size");
00130     std::string INTERACTIONS("interactions");
00131     std::string N("N");
00132     std::string ALPHABET("alphabet");
00133     std::vector <std::string> mandatory;
00134     mandatory.push_back(TRAINING_SET);
00135     mandatory.push_back(WINDOW_SIZE);
00136     mandatory.push_back(INTERACTIONS);
00137     mandatory.push_back(N);
00138     mandatory.push_back(ALPHABET);
00139     ReadConfigurationFile readConf;
00140     Configuration conf = readConf.load_configuration_file(config, mandatory);
00141     DoubleVector lengths;
00142 
00143     FASTAReader reader;
00144 
00145     if(!reader.open(conf[TRAINING_SET]))
00146       {
00147         std::cerr << "Can't open file: " << conf[TRAINING_SET] << std::endl;
00148         exit(-1);
00149       }
00150     std::string sequence;
00151     AlphabetPtr alphabet = AlphabetPtr(new Alphabet());
00152     alphabet->initializeFromString(conf[ALPHABET]);
00153     SequenceFactory seqFactory(alphabet);
00154     while(reader.nextSequence(sequence))
00155       {
00156         Sequence sample;
00157         seqFactory.createSequence(sample, sequence);
00158         lengths.push_back((double)sequence.size());
00159       }
00160     reader.close();
00161     DiscreteIIDModelFactory factory;
00162     return factory.smoothedDistributionMajoros(lengths, atoi(conf[WINDOW_SIZE].c_str()), atoi(conf[INTERACTIONS].c_str()), atoi(conf[N].c_str()));
00163   }
00164 
00165 
00166   ProbabilisticModelPtr SmoothedHistogramStankeCreateModel::create (const std::string & config)
00167   {
00168     std::string TRAINING_SET("trainig_set");
00169     std::string ALPHABET("alphabet");
00170     std::vector <std::string> mandatory;
00171     mandatory.push_back(TRAINING_SET);
00172     mandatory.push_back(ALPHABET);
00173     ReadConfigurationFile readConf;
00174     Configuration conf = readConf.load_configuration_file(config, mandatory);
00175     DoubleVector lengths;
00176 
00177     FASTAReader reader ;
00178 
00179     if(!reader.open(conf[TRAINING_SET]))
00180       {
00181         std::cerr << "Can't open file: " << conf[TRAINING_SET] << std::endl;
00182         exit(-1);
00183       }
00184     std::string sequence;
00185     AlphabetPtr alphabet = AlphabetPtr(new Alphabet());
00186     alphabet->initializeFromString(conf[ALPHABET]);
00187     SequenceFactory seqFactory(alphabet);
00188     while(reader.nextSequence(sequence))
00189       {
00190         Sequence sample;
00191         seqFactory.createSequence(sample, sequence);
00192         lengths.push_back(sequence.size());
00193       }
00194     reader.close();
00195     DiscreteIIDModelFactory factory;
00196     return factory.smoothedDistributionKernelDensityStanke(lengths);
00197   }
00198 
00199 
00200   ProbabilisticModelPtr SmoothedHistogramKernelDensityCreateModel::create (const std::string & config) {
00201     std::string TRAINING_SET("trainig_set");
00202     std::string ALPHABET("alphabet");
00203     std::vector <std::string> mandatory;
00204     mandatory.push_back(TRAINING_SET);
00205     mandatory.push_back(ALPHABET);
00206     ReadConfigurationFile readConf;
00207     Configuration conf = readConf.load_configuration_file(config, mandatory);
00208     DoubleVector lengths;
00209     std::ifstream is;
00210     FASTAReader reader;
00211 
00212     if(!reader.open(conf[TRAINING_SET]))
00213       {
00214         std::cerr << "Can't open file: " << conf[TRAINING_SET] << std::endl;
00215         exit(-1);
00216       }
00217     std::string sequence;
00218     AlphabetPtr alphabet = AlphabetPtr(new Alphabet());
00219     alphabet->initializeFromString(conf[ALPHABET]);
00220     SequenceFactory seqFactory(alphabet);
00221     while(reader.nextSequence(sequence))
00222       {
00223         Sequence sample;
00224         seqFactory.createSequence(sample, sequence);
00225         lengths.push_back(sequence.size());
00226       }
00227     reader.close();
00228     DiscreteIIDModelFactory factory;
00229     return factory.smoothedDistributionKernelDensity(lengths);
00230   }
00231 
00232 
00233   ProbabilisticModelPtr SmoothedHistogramMYOPCreateModel::create (const std::string & config) {
00234     std::string TRAINING_SET("trainig_set");
00235     std::string ALPHABET("alphabet");
00236     std::vector <std::string> mandatory;
00237     mandatory.push_back(TRAINING_SET);
00238     mandatory.push_back(ALPHABET);
00239     ReadConfigurationFile readConf;
00240     Configuration conf = readConf.load_configuration_file(config, mandatory);
00241     DoubleVector lengths;
00242     FASTAReader reader;
00243 
00244     if(!reader.open(conf[TRAINING_SET]))
00245       {
00246         std::cerr << "Can't open file: " << conf[TRAINING_SET] << std::endl;
00247         exit(-1);
00248       }
00249     std::string sequence;
00250     AlphabetPtr alphabet = AlphabetPtr(new Alphabet());
00251     alphabet->initializeFromString(conf[ALPHABET]);
00252     SequenceFactory seqFactory(alphabet);
00253     while(reader.nextSequence(sequence))
00254       {
00255         Sequence sample;
00256         seqFactory.createSequence(sample, sequence);
00257         lengths.push_back(sequence.size());
00258       }
00259     reader.close();
00260     DiscreteIIDModelFactory factory;
00261     return factory.smoothedDistributionKernelDensityMYOP(lengths);
00262 
00263   }
00264 }
00265