guillaume

new single strand option

...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
17 vector<int> multiFASTA_seqlen; 17 vector<int> multiFASTA_seqlen;
18 map<string, string> genetic_code; 18 map<string, string> genetic_code;
19 vector<string> sCodons; 19 vector<string> sCodons;
20 -int minimum_length = 0; 20 +int minimum_length = 2; // dipeptides
21 21
22 22
23 // XXX PROTOTYPES 23 // XXX PROTOTYPES
...@@ -200,6 +200,14 @@ int main(int argc, char** argv){ ...@@ -200,6 +200,14 @@ int main(int argc, char** argv){
200 string optlist = 200 string optlist =
201 " Usage:\n" 201 " Usage:\n"
202 "\n" 202 "\n"
203 + " -i Input file\n"
204 + " -o Output filenames (default: input filename with .pro and .dna/.rna extensions)\n"
205 + " -s Start codon\n"
206 + " -m Minimum ORF length (default: 2)\n"
207 + " -a Alternative genetic code\n"
208 + " -r RNA input\n"
209 + " -c Check FASTA format\n"
210 + " -f Translate sense strand only\n" // f as in forward
203 " -h Help\n"; 211 " -h Help\n";
204 212
205 string outFileName; 213 string outFileName;
...@@ -208,10 +216,11 @@ int main(int argc, char** argv){ ...@@ -208,10 +216,11 @@ int main(int argc, char** argv){
208 bool checkFASTA; 216 bool checkFASTA;
209 string alt_gene_code; 217 string alt_gene_code;
210 string startCodon; 218 string startCodon;
219 + bool senseStrand;
211 220
212 221
213 int opt; 222 int opt;
214 - while ((opt = getopt(argc,argv,"hrci:o:s:m:a:")) != EOF){ 223 + while ((opt = getopt(argc,argv,"hrcfi:o:s:m:a:")) != EOF){
215 switch(opt){ 224 switch(opt){
216 case 'i': inFileName = optarg; break; 225 case 'i': inFileName = optarg; break;
217 case 'o': outFileName = optarg; break; 226 case 'o': outFileName = optarg; break;
...@@ -220,6 +229,7 @@ int main(int argc, char** argv){ ...@@ -220,6 +229,7 @@ int main(int argc, char** argv){
220 case 'a': alt_gene_code = optarg; break; 229 case 'a': alt_gene_code = optarg; break;
221 case 'r': rna = true; break; 230 case 'r': rna = true; break;
222 case 'c': checkFASTA = true; break; 231 case 'c': checkFASTA = true; break;
232 + case 'f': senseStrand = true; break;
223 case 'h': fprintf(stderr, "%s", optlist.c_str()); return 0; 233 case 'h': fprintf(stderr, "%s", optlist.c_str()); return 0;
224 } 234 }
225 } 235 }
...@@ -290,56 +300,66 @@ int main(int argc, char** argv){ ...@@ -290,56 +300,66 @@ int main(int argc, char** argv){
290 puts("5'-3' translation..."); 300 puts("5'-3' translation...");
291 (rna) ? genetic_code = gc_std : genetic_code = gc_std_DNA; 301 (rna) ? genetic_code = gc_std : genetic_code = gc_std_DNA;
292 translate_3_frames(inFileName, tmp_53_pro_file, tmp_53_dna_file, false); 302 translate_3_frames(inFileName, tmp_53_pro_file, tmp_53_dna_file, false);
293 - puts("Done");
294 -
295 303
296 - /* Reversion of the input sequence by system call: rev + tac */ 304 + // XXX DIRTY CLEANING
297 - puts("Genome reversion..."); 305 + // awk '/^>/ {if (seq != "") {print head""seq;} seq=""; head=$0"\n";} /^[^>]/ {seq=seq""$0"\n";} END {if (seq != "") print head""seq;}' input.fasta > output.fasta
298 - if ( system( ("rev "+inFileName+" | tac > " + tmp_reversed_input).c_str() ) != 0 ){ 306 + string command = "awk '/^>/ {if (seq != \"\") {print head\"\"seq;} seq=\"\"; head=$0\"\\n\";} /^[^>]/ {seq=seq\"\"$0\"\\n\";} END {if (seq != \"\") print head\"\"seq;}' "+ tmp_53_pro_file + " > " + output_pro + " && rm *.53pro *.53dna";
299 - perror("Error while creating reversed genome file"); 307 + int status = std::system(command.c_str());
300 - return(-1); 308 + if (status != 0) {
301 - } 309 + std::cerr << "Error: system call failed with status " << status << std::endl;
302 - else{ 310 + return 1;
303 - puts ("Done");
304 } 311 }
305 -
306 - /* Reversion of the (global) vector of lengths */
307 - reverse(multiFASTA_seqlen.begin(), multiFASTA_seqlen.end());
308 -
309 -
310 - /* Translation 3'-5' */
311 - puts("3'-5' translation...");
312 - (rna) ? genetic_code = gc_std_rev : genetic_code = gc_std_DNA_rev;
313 - sCodons = complement_sCodons(sCodons, rna);
314 - translate_3_frames(tmp_reversed_input, tmp_35_pro_file, tmp_35_dna_file, true);
315 puts("Done"); 312 puts("Done");
316 313
317 314
318 - /* Reversion (tac) of the 3'-5' translation, then concatenation */ 315 + if (not senseStrand){
319 - puts("Creating result files..."); 316 + // TODO Warning for RNA sequence
320 - if ( system( ("cat "+tmp_53_pro_file+" > "+output_pro+" && tac "+tmp_35_pro_file+" >> "+output_pro+ 317 + /* Reversion of the input sequence by system call: rev + tac */
321 - " && cat "+tmp_53_dna_file+" > "+output_dna+" && tac "+tmp_35_dna_file+" >> "+output_dna).c_str() ) != 0 ){ 318 + puts("Genome reversion...");
322 - perror("Error while line-reversing and concatenating output files"); 319 + if ( system( ("rev "+inFileName+" | tac > " + tmp_reversed_input).c_str() ) != 0 ){
323 - return(-1); 320 + perror("Error while creating reversed genome file");
324 - } 321 + return(-1);
325 - else{ 322 + }
326 - puts("Done"); 323 + else{
327 - } 324 + puts ("Done");
328 - 325 + }
329 - /* Delete the temporary files */ 326 +
330 - puts("Cleaning temporary files..."); 327 + /* Reversion of the (global) vector of lengths */
331 - if ( system( ("rm "+tmp_reversed_input+" "+tmp_35_pro_file+" "+tmp_35_dna_file+" "+tmp_53_pro_file+" "+tmp_53_dna_file).c_str() ) ){ 328 + reverse(multiFASTA_seqlen.begin(), multiFASTA_seqlen.end());
332 - perror("Error while deleting temporary files"); 329 +
333 - return(-1); 330 +
334 - } 331 + /* Translation 3'-5' */
335 - else{ 332 + puts("3'-5' translation...");
333 + (rna) ? genetic_code = gc_std_rev : genetic_code = gc_std_DNA_rev;
334 + sCodons = complement_sCodons(sCodons, rna);
335 + translate_3_frames(tmp_reversed_input, tmp_35_pro_file, tmp_35_dna_file, true);
336 puts("Done"); 336 puts("Done");
337 +
338 +
339 + /* Reversion (tac) of the 3'-5' translation, then concatenation */
340 + puts("Creating result files...");
341 + if ( system( ("cat "+tmp_53_pro_file+" > "+output_pro+" && tac "+tmp_35_pro_file+" >> "+output_pro+
342 + " && cat "+tmp_53_dna_file+" > "+output_dna+" && tac "+tmp_35_dna_file+" >> "+output_dna).c_str() ) != 0 ){
343 + perror("Error while line-reversing and concatenating output files");
344 + return(-1);
345 + }
346 + else{
347 + puts("Done");
348 + }
349 +
350 + /* Delete the temporary files */
351 + puts("Cleaning temporary files...");
352 + if ( system( ("rm "+tmp_reversed_input+" "+tmp_35_pro_file+" "+tmp_35_dna_file+" "+tmp_53_pro_file+" "+tmp_53_dna_file).c_str() ) ){
353 + perror("Error while deleting temporary files");
354 + return(-1);
355 + }
356 + else{
357 + puts("Done");
358 + }
337 } 359 }
338 360
339 361
340 362
341 -
342 -
343 // XXX XXX DETECT SEQUENCE 363 // XXX XXX DETECT SEQUENCE
344 // => output sequence and surrounding 364 // => output sequence and surrounding
345 365
......