ToPS
|
00001 /* 00002 * SequenceFactory.cpp 00003 * 00004 * Copyright 2011 Andre Yoshiaki Kashiwabara <akashiwabara@usp.br> 00005 * Ígor Bonádio <ibonadio@ime.usp.br> 00006 * Vitor Onuchic <vitoronuchic@gmail.com> 00007 * Alan Mitchell Durham <aland@usp.br> 00008 * 00009 * This program is free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 3 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00022 * MA 02110-1301, USA. 00023 */ 00024 00025 #include "Alphabet.hpp" 00026 #include "Symbol.hpp" 00027 #include "SequenceFactory.hpp" 00028 00029 00030 #include <iostream> 00031 #include <fstream> 00032 #include <sstream> 00033 00034 namespace tops { 00035 00036 00037 Sequence SequenceFactory::createSequence(const std::string & seq){ 00038 std::vector<int> inv; 00039 return createSequence(seq, inv); 00040 } 00041 Sequence SequenceFactory::createSequenceRemovedSpaces(const std::string & seq){ 00042 std::vector<int> inv; 00043 return createSequenceRemovedSpaces(seq, inv); 00044 } 00045 Sequence SequenceFactory::createSequence(const std::vector<std::string> & seq){ 00046 std::vector<int> inv; 00047 return createSequence(seq, inv); 00048 } 00049 Sequence SequenceFactory::createSequenceRemovedSpaces(const std::string & seq, std::vector<int> & invalidPositions) 00050 { 00051 Sequence result; 00052 int i; 00053 int start = 0; 00054 // transform(seq.begin(), seq.end(), seq.begin(), my_tolower()); 00055 if(_alphabet->size() == 0) 00056 { 00057 std::istringstream instream; 00058 instream.str(seq); 00059 while(instream.good()) { 00060 double v; 00061 instream >> v; 00062 result.push_back (v); 00063 } 00064 return result; 00065 } 00066 if(seq.size() > 0) 00067 for(i = 0 ; i < (int)seq.size(); i++) 00068 { 00069 std::string substr = seq.substr(start, i-start+1); 00070 if((substr.size() > 0) && _alphabet->has(substr)) 00071 { 00072 result.push_back (_alphabet->getSymbol(substr)->id()); 00073 } 00074 else 00075 { 00076 if(substr.size() >0) { 00077 result.push_back (0); 00078 std::cerr << "WARNING: Invalid symbol at position " << start <<": '" << substr << "'" << std::endl; 00079 invalidPositions.push_back(result.size()-1); 00080 } 00081 } 00082 start = i+1; 00083 } 00084 std::string substr = seq.substr(start, seq.size()-start); 00085 if((substr.size() > 0) && _alphabet->has(substr)) 00086 { 00087 result.push_back (_alphabet->getSymbol(substr)->id()); 00088 } 00089 00090 return result; 00091 } 00092 00093 Sequence SequenceFactory::createSequence(const std::string & seq, std::vector<int> & invalidPositions) 00094 { 00095 Sequence result; 00096 int i; 00097 int start = 0; 00098 // transform(seq.begin(), seq.end(), seq.begin(), my_tolower()); 00099 if(_alphabet->size() == 0) 00100 { 00101 std::istringstream instream; 00102 instream.str(seq); 00103 while(instream.good()) { 00104 double v; 00105 instream >> v; 00106 result.push_back (v); 00107 } 00108 return result; 00109 } 00110 if(seq.size() > 0) 00111 for(i = 0 ; i < (int)seq.size(); i++) 00112 { 00113 if(seq[i] == ' ') 00114 { 00115 std::string substr = seq.substr(start, i-start); 00116 if((substr.size() > 0) && _alphabet->has(substr)) 00117 { 00118 result.push_back (_alphabet->getSymbol(substr)->id()); 00119 } 00120 else 00121 { 00122 if(substr.size() >0) { 00123 result.push_back (0); 00124 std::cerr << "WARNING: Invalid symbol at position " << start <<": '" << substr << "'" << std::endl; 00125 invalidPositions.push_back(result.size()-1); 00126 } 00127 } 00128 start = i+1; 00129 } 00130 } 00131 std::string substr = seq.substr(start, seq.size()-start); 00132 if((substr.size() > 0) && _alphabet->has(substr)) 00133 { 00134 result.push_back (_alphabet->getSymbol(substr)->id()); 00135 } 00136 00137 return result; 00138 } 00139 00140 Sequence SequenceFactory::createSequence(const std::vector<std::string> & seq, std::vector<int> & invalidPositions) 00141 { 00142 Sequence result; 00143 00144 if(_alphabet->size() == 0) 00145 { 00146 std::istringstream instream; 00147 for(int i = 0 ; i < (int)seq.size(); i++) 00148 { 00149 instream.clear(); 00150 instream.str(seq[i]); 00151 double v; 00152 instream >> v; 00153 result.push_back (v); 00154 } 00155 } 00156 else 00157 { 00158 for(int i = 0 ; i < (int)seq.size(); i++){ 00159 if(_alphabet->getSymbol(seq[i])->id()<0){ 00160 result.push_back (0); 00161 std::cerr << "WARNING: Invalid symbol at position " << i <<": '" << seq[i] << "'" << std::endl; 00162 } 00163 else{ 00164 result.push_back (_alphabet->getSymbol(seq[i])->id()); 00165 invalidPositions.push_back(result.size()-1); 00166 } 00167 } 00168 } 00169 return result; 00170 } 00171 }