ToPS
SequenceFactory.cpp
00001 /*
00002  *       SequenceFactory.cpp
00003  *
00004  *       Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br>
00005  *                      Ígor Bonádio <ibonadio@ime.usp.br>
00006  *                      Vitor Onuchic <vitoronuchic@gmail.com>
00007  *                      Alan Mitchell Durham <aland@usp.br>
00008  *
00009  *       This program is free software; you can redistribute it and/or modify
00010  *       it under the terms of the GNU  General Public License as published by
00011  *       the Free Software Foundation; either version 3 of the License, or
00012  *       (at your option) any later version.
00013  *
00014  *       This program is distributed in the hope that it will be useful,
00015  *       but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  *       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  *       GNU General Public License for more details.
00018  *
00019  *       You should have received a copy of the GNU General Public License
00020  *       along with this program; if not, write to the Free Software
00021  *       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00022  *       MA 02110-1301, USA.
00023  */
00024 
00025 #include "Alphabet.hpp"
00026 #include "Symbol.hpp"
00027 #include "SequenceFactory.hpp"
00028 
00029 
00030 #include <iostream>
00031 #include <fstream>
00032 #include <sstream>
00033 
00034 namespace tops {
00035 
00036 
00037     Sequence SequenceFactory::createSequence(const std::string & seq){
00038         std::vector<int> inv;
00039         return createSequence(seq, inv);
00040     }
00041     Sequence SequenceFactory::createSequenceRemovedSpaces(const std::string & seq){
00042         std::vector<int> inv;
00043         return createSequenceRemovedSpaces(seq, inv);
00044     }
00045     Sequence SequenceFactory::createSequence(const std::vector<std::string> & seq){
00046         std::vector<int> inv;
00047         return createSequence(seq, inv);
00048     }
00049     Sequence  SequenceFactory::createSequenceRemovedSpaces(const std::string & seq, std::vector<int> & invalidPositions)
00050     {
00051         Sequence result;
00052     int i;
00053     int start = 0;
00054     //  transform(seq.begin(), seq.end(), seq.begin(), my_tolower());
00055     if(_alphabet->size() == 0)
00056       {
00057         std::istringstream instream;
00058         instream.str(seq);
00059         while(instream.good()) {
00060           double v;
00061           instream >> v;
00062           result.push_back (v);
00063         }
00064         return result;
00065       }
00066     if(seq.size() > 0)
00067       for(i = 0 ; i < (int)seq.size(); i++)
00068       {
00069             std::string substr = seq.substr(start, i-start+1);
00070             if((substr.size() > 0) && _alphabet->has(substr))
00071               {
00072                 result.push_back (_alphabet->getSymbol(substr)->id());
00073               }
00074             else
00075               {
00076                 if(substr.size() >0) {
00077                   result.push_back (0);
00078                   std::cerr << "WARNING: Invalid symbol at position " <<  start <<": '" << substr << "'" << std::endl;
00079                   invalidPositions.push_back(result.size()-1);
00080                 }
00081               }
00082             start = i+1;
00083       }
00084       std::string substr = seq.substr(start, seq.size()-start);
00085       if((substr.size() > 0) && _alphabet->has(substr))
00086         {
00087           result.push_back (_alphabet->getSymbol(substr)->id());
00088         }
00089 
00090     return result;
00091   }
00092 
00093   Sequence  SequenceFactory::createSequence(const std::string & seq, std::vector<int> & invalidPositions)
00094   {
00095     Sequence result;
00096     int i;
00097     int start = 0;
00098     //  transform(seq.begin(), seq.end(), seq.begin(), my_tolower());
00099     if(_alphabet->size() == 0)
00100       {
00101         std::istringstream instream;
00102         instream.str(seq);
00103         while(instream.good()) {
00104           double v;
00105           instream >> v;
00106           result.push_back (v);
00107         }
00108         return result;
00109       }
00110     if(seq.size() > 0)
00111       for(i = 0 ; i < (int)seq.size(); i++)
00112       {
00113         if(seq[i] == ' ')
00114           {
00115             std::string substr = seq.substr(start, i-start);
00116             if((substr.size() > 0) && _alphabet->has(substr))
00117               {
00118                 result.push_back (_alphabet->getSymbol(substr)->id());
00119               }
00120             else
00121               {
00122                 if(substr.size() >0) {
00123                   result.push_back (0);
00124                   std::cerr << "WARNING: Invalid symbol at position " <<  start <<": '" << substr << "'" << std::endl;
00125                   invalidPositions.push_back(result.size()-1);
00126                 }
00127               }
00128             start = i+1;
00129           }
00130       }
00131     std::string substr = seq.substr(start, seq.size()-start);
00132     if((substr.size() > 0) && _alphabet->has(substr))
00133       {
00134         result.push_back (_alphabet->getSymbol(substr)->id());
00135       }
00136 
00137     return result;
00138   }
00139 
00140   Sequence SequenceFactory::createSequence(const std::vector<std::string> & seq, std::vector<int> & invalidPositions)
00141   {
00142     Sequence result;
00143 
00144     if(_alphabet->size() == 0)
00145       {
00146         std::istringstream instream;
00147         for(int i = 0 ; i < (int)seq.size(); i++)
00148           {
00149             instream.clear();
00150             instream.str(seq[i]);
00151             double v;
00152             instream >> v;
00153             result.push_back (v);
00154           }
00155       }
00156     else
00157       {
00158         for(int i = 0 ; i < (int)seq.size(); i++){
00159           if(_alphabet->getSymbol(seq[i])->id()<0){
00160             result.push_back (0);
00161             std::cerr << "WARNING: Invalid symbol at position " <<  i  <<": '" << seq[i] << "'" << std::endl;
00162           }
00163           else{
00164             result.push_back (_alphabet->getSymbol(seq[i])->id());
00165             invalidPositions.push_back(result.size()-1);
00166           }
00167         }
00168       }
00169     return result;
00170   }
00171 }