guillaume

new single strand option

...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
17 vector<int> multiFASTA_seqlen; 17 vector<int> multiFASTA_seqlen;
18 map<string, string> genetic_code; 18 map<string, string> genetic_code;
19 vector<string> sCodons; 19 vector<string> sCodons;
20 -int minimum_length = 0; 20 +int minimum_length = 2; // dipeptides
21 21
22 22
23 // XXX PROTOTYPES 23 // XXX PROTOTYPES
...@@ -200,6 +200,14 @@ int main(int argc, char** argv){ ...@@ -200,6 +200,14 @@ int main(int argc, char** argv){
200 string optlist = 200 string optlist =
201 " Usage:\n" 201 " Usage:\n"
202 "\n" 202 "\n"
203 + " -i Input file\n"
204 + " -o Output filenames (default: input filename with .pro and .dna/.rna extensions)\n"
205 + " -s Start codon\n"
206 + " -m Minimum ORF length (default: 2)\n"
207 + " -a Alternative genetic code\n"
208 + " -r RNA input\n"
209 + " -c Check FASTA format\n"
210 + " -f Translate sense strand only\n" // f as in forward
203 " -h Help\n"; 211 " -h Help\n";
204 212
205 string outFileName; 213 string outFileName;
...@@ -208,10 +216,11 @@ int main(int argc, char** argv){ ...@@ -208,10 +216,11 @@ int main(int argc, char** argv){
208 bool checkFASTA; 216 bool checkFASTA;
209 string alt_gene_code; 217 string alt_gene_code;
210 string startCodon; 218 string startCodon;
219 + bool senseStrand;
211 220
212 221
213 int opt; 222 int opt;
214 - while ((opt = getopt(argc,argv,"hrci:o:s:m:a:")) != EOF){ 223 + while ((opt = getopt(argc,argv,"hrcfi:o:s:m:a:")) != EOF){
215 switch(opt){ 224 switch(opt){
216 case 'i': inFileName = optarg; break; 225 case 'i': inFileName = optarg; break;
217 case 'o': outFileName = optarg; break; 226 case 'o': outFileName = optarg; break;
...@@ -220,6 +229,7 @@ int main(int argc, char** argv){ ...@@ -220,6 +229,7 @@ int main(int argc, char** argv){
220 case 'a': alt_gene_code = optarg; break; 229 case 'a': alt_gene_code = optarg; break;
221 case 'r': rna = true; break; 230 case 'r': rna = true; break;
222 case 'c': checkFASTA = true; break; 231 case 'c': checkFASTA = true; break;
232 + case 'f': senseStrand = true; break;
223 case 'h': fprintf(stderr, "%s", optlist.c_str()); return 0; 233 case 'h': fprintf(stderr, "%s", optlist.c_str()); return 0;
224 } 234 }
225 } 235 }
...@@ -290,9 +300,20 @@ int main(int argc, char** argv){ ...@@ -290,9 +300,20 @@ int main(int argc, char** argv){
290 puts("5'-3' translation..."); 300 puts("5'-3' translation...");
291 (rna) ? genetic_code = gc_std : genetic_code = gc_std_DNA; 301 (rna) ? genetic_code = gc_std : genetic_code = gc_std_DNA;
292 translate_3_frames(inFileName, tmp_53_pro_file, tmp_53_dna_file, false); 302 translate_3_frames(inFileName, tmp_53_pro_file, tmp_53_dna_file, false);
303 +
304 + // XXX DIRTY CLEANING
305 + // awk '/^>/ {if (seq != "") {print head""seq;} seq=""; head=$0"\n";} /^[^>]/ {seq=seq""$0"\n";} END {if (seq != "") print head""seq;}' input.fasta > output.fasta
306 + string command = "awk '/^>/ {if (seq != \"\") {print head\"\"seq;} seq=\"\"; head=$0\"\\n\";} /^[^>]/ {seq=seq\"\"$0\"\\n\";} END {if (seq != \"\") print head\"\"seq;}' "+ tmp_53_pro_file + " > " + output_pro + " && rm *.53pro *.53dna";
307 + int status = std::system(command.c_str());
308 + if (status != 0) {
309 + std::cerr << "Error: system call failed with status " << status << std::endl;
310 + return 1;
311 + }
293 puts("Done"); 312 puts("Done");
294 313
295 314
315 + if (not senseStrand){
316 + // TODO Warning for RNA sequence
296 /* Reversion of the input sequence by system call: rev + tac */ 317 /* Reversion of the input sequence by system call: rev + tac */
297 puts("Genome reversion..."); 318 puts("Genome reversion...");
298 if ( system( ("rev "+inFileName+" | tac > " + tmp_reversed_input).c_str() ) != 0 ){ 319 if ( system( ("rev "+inFileName+" | tac > " + tmp_reversed_input).c_str() ) != 0 ){
...@@ -335,8 +356,7 @@ int main(int argc, char** argv){ ...@@ -335,8 +356,7 @@ int main(int argc, char** argv){
335 else{ 356 else{
336 puts("Done"); 357 puts("Done");
337 } 358 }
338 - 359 + }
339 -
340 360
341 361
342 362
......