Showing
1 changed file
with
62 additions
and
42 deletions
... | @@ -17,7 +17,7 @@ | ... | @@ -17,7 +17,7 @@ |
17 | vector<int> multiFASTA_seqlen; | 17 | vector<int> multiFASTA_seqlen; |
18 | map<string, string> genetic_code; | 18 | map<string, string> genetic_code; |
19 | vector<string> sCodons; | 19 | vector<string> sCodons; |
20 | -int minimum_length = 0; | 20 | +int minimum_length = 2; // dipeptides |
21 | 21 | ||
22 | 22 | ||
23 | // XXX PROTOTYPES | 23 | // XXX PROTOTYPES |
... | @@ -200,6 +200,14 @@ int main(int argc, char** argv){ | ... | @@ -200,6 +200,14 @@ int main(int argc, char** argv){ |
200 | string optlist = | 200 | string optlist = |
201 | " Usage:\n" | 201 | " Usage:\n" |
202 | "\n" | 202 | "\n" |
203 | + " -i Input file\n" | ||
204 | + " -o Output filenames (default: input filename with .pro and .dna/.rna extensions)\n" | ||
205 | + " -s Start codon\n" | ||
206 | + " -m Minimum ORF length (default: 2)\n" | ||
207 | + " -a Alternative genetic code\n" | ||
208 | + " -r RNA input\n" | ||
209 | + " -c Check FASTA format\n" | ||
210 | + " -f Translate sense strand only\n" // f as in forward | ||
203 | " -h Help\n"; | 211 | " -h Help\n"; |
204 | 212 | ||
205 | string outFileName; | 213 | string outFileName; |
... | @@ -208,10 +216,11 @@ int main(int argc, char** argv){ | ... | @@ -208,10 +216,11 @@ int main(int argc, char** argv){ |
208 | bool checkFASTA; | 216 | bool checkFASTA; |
209 | string alt_gene_code; | 217 | string alt_gene_code; |
210 | string startCodon; | 218 | string startCodon; |
219 | + bool senseStrand; | ||
211 | 220 | ||
212 | 221 | ||
213 | int opt; | 222 | int opt; |
214 | - while ((opt = getopt(argc,argv,"hrci:o:s:m:a:")) != EOF){ | 223 | + while ((opt = getopt(argc,argv,"hrcfi:o:s:m:a:")) != EOF){ |
215 | switch(opt){ | 224 | switch(opt){ |
216 | case 'i': inFileName = optarg; break; | 225 | case 'i': inFileName = optarg; break; |
217 | case 'o': outFileName = optarg; break; | 226 | case 'o': outFileName = optarg; break; |
... | @@ -220,6 +229,7 @@ int main(int argc, char** argv){ | ... | @@ -220,6 +229,7 @@ int main(int argc, char** argv){ |
220 | case 'a': alt_gene_code = optarg; break; | 229 | case 'a': alt_gene_code = optarg; break; |
221 | case 'r': rna = true; break; | 230 | case 'r': rna = true; break; |
222 | case 'c': checkFASTA = true; break; | 231 | case 'c': checkFASTA = true; break; |
232 | + case 'f': senseStrand = true; break; | ||
223 | case 'h': fprintf(stderr, "%s", optlist.c_str()); return 0; | 233 | case 'h': fprintf(stderr, "%s", optlist.c_str()); return 0; |
224 | } | 234 | } |
225 | } | 235 | } |
... | @@ -290,56 +300,66 @@ int main(int argc, char** argv){ | ... | @@ -290,56 +300,66 @@ int main(int argc, char** argv){ |
290 | puts("5'-3' translation..."); | 300 | puts("5'-3' translation..."); |
291 | (rna) ? genetic_code = gc_std : genetic_code = gc_std_DNA; | 301 | (rna) ? genetic_code = gc_std : genetic_code = gc_std_DNA; |
292 | translate_3_frames(inFileName, tmp_53_pro_file, tmp_53_dna_file, false); | 302 | translate_3_frames(inFileName, tmp_53_pro_file, tmp_53_dna_file, false); |
293 | - puts("Done"); | ||
294 | - | ||
295 | 303 | ||
296 | - /* Reversion of the input sequence by system call: rev + tac */ | 304 | + // XXX DIRTY CLEANING |
297 | - puts("Genome reversion..."); | 305 | + // awk '/^>/ {if (seq != "") {print head""seq;} seq=""; head=$0"\n";} /^[^>]/ {seq=seq""$0"\n";} END {if (seq != "") print head""seq;}' input.fasta > output.fasta |
298 | - if ( system( ("rev "+inFileName+" | tac > " + tmp_reversed_input).c_str() ) != 0 ){ | 306 | + string command = "awk '/^>/ {if (seq != \"\") {print head\"\"seq;} seq=\"\"; head=$0\"\\n\";} /^[^>]/ {seq=seq\"\"$0\"\\n\";} END {if (seq != \"\") print head\"\"seq;}' "+ tmp_53_pro_file + " > " + output_pro + " && rm *.53pro *.53dna"; |
299 | - perror("Error while creating reversed genome file"); | 307 | + int status = std::system(command.c_str()); |
300 | - return(-1); | 308 | + if (status != 0) { |
301 | - } | 309 | + std::cerr << "Error: system call failed with status " << status << std::endl; |
302 | - else{ | 310 | + return 1; |
303 | - puts ("Done"); | ||
304 | } | 311 | } |
305 | - | ||
306 | - /* Reversion of the (global) vector of lengths */ | ||
307 | - reverse(multiFASTA_seqlen.begin(), multiFASTA_seqlen.end()); | ||
308 | - | ||
309 | - | ||
310 | - /* Translation 3'-5' */ | ||
311 | - puts("3'-5' translation..."); | ||
312 | - (rna) ? genetic_code = gc_std_rev : genetic_code = gc_std_DNA_rev; | ||
313 | - sCodons = complement_sCodons(sCodons, rna); | ||
314 | - translate_3_frames(tmp_reversed_input, tmp_35_pro_file, tmp_35_dna_file, true); | ||
315 | puts("Done"); | 312 | puts("Done"); |
316 | 313 | ||
317 | 314 | ||
318 | - /* Reversion (tac) of the 3'-5' translation, then concatenation */ | 315 | + if (not senseStrand){ |
319 | - puts("Creating result files..."); | 316 | + // TODO Warning for RNA sequence |
320 | - if ( system( ("cat "+tmp_53_pro_file+" > "+output_pro+" && tac "+tmp_35_pro_file+" >> "+output_pro+ | 317 | + /* Reversion of the input sequence by system call: rev + tac */ |
321 | - " && cat "+tmp_53_dna_file+" > "+output_dna+" && tac "+tmp_35_dna_file+" >> "+output_dna).c_str() ) != 0 ){ | 318 | + puts("Genome reversion..."); |
322 | - perror("Error while line-reversing and concatenating output files"); | 319 | + if ( system( ("rev "+inFileName+" | tac > " + tmp_reversed_input).c_str() ) != 0 ){ |
323 | - return(-1); | 320 | + perror("Error while creating reversed genome file"); |
324 | - } | 321 | + return(-1); |
325 | - else{ | 322 | + } |
326 | - puts("Done"); | 323 | + else{ |
327 | - } | 324 | + puts ("Done"); |
328 | - | 325 | + } |
329 | - /* Delete the temporary files */ | 326 | + |
330 | - puts("Cleaning temporary files..."); | 327 | + /* Reversion of the (global) vector of lengths */ |
331 | - if ( system( ("rm "+tmp_reversed_input+" "+tmp_35_pro_file+" "+tmp_35_dna_file+" "+tmp_53_pro_file+" "+tmp_53_dna_file).c_str() ) ){ | 328 | + reverse(multiFASTA_seqlen.begin(), multiFASTA_seqlen.end()); |
332 | - perror("Error while deleting temporary files"); | 329 | + |
333 | - return(-1); | 330 | + |
334 | - } | 331 | + /* Translation 3'-5' */ |
335 | - else{ | 332 | + puts("3'-5' translation..."); |
333 | + (rna) ? genetic_code = gc_std_rev : genetic_code = gc_std_DNA_rev; | ||
334 | + sCodons = complement_sCodons(sCodons, rna); | ||
335 | + translate_3_frames(tmp_reversed_input, tmp_35_pro_file, tmp_35_dna_file, true); | ||
336 | puts("Done"); | 336 | puts("Done"); |
337 | + | ||
338 | + | ||
339 | + /* Reversion (tac) of the 3'-5' translation, then concatenation */ | ||
340 | + puts("Creating result files..."); | ||
341 | + if ( system( ("cat "+tmp_53_pro_file+" > "+output_pro+" && tac "+tmp_35_pro_file+" >> "+output_pro+ | ||
342 | + " && cat "+tmp_53_dna_file+" > "+output_dna+" && tac "+tmp_35_dna_file+" >> "+output_dna).c_str() ) != 0 ){ | ||
343 | + perror("Error while line-reversing and concatenating output files"); | ||
344 | + return(-1); | ||
345 | + } | ||
346 | + else{ | ||
347 | + puts("Done"); | ||
348 | + } | ||
349 | + | ||
350 | + /* Delete the temporary files */ | ||
351 | + puts("Cleaning temporary files..."); | ||
352 | + if ( system( ("rm "+tmp_reversed_input+" "+tmp_35_pro_file+" "+tmp_35_dna_file+" "+tmp_53_pro_file+" "+tmp_53_dna_file).c_str() ) ){ | ||
353 | + perror("Error while deleting temporary files"); | ||
354 | + return(-1); | ||
355 | + } | ||
356 | + else{ | ||
357 | + puts("Done"); | ||
358 | + } | ||
337 | } | 359 | } |
338 | 360 | ||
339 | 361 | ||
340 | 362 | ||
341 | - | ||
342 | - | ||
343 | // XXX XXX DETECT SEQUENCE | 363 | // XXX XXX DETECT SEQUENCE |
344 | // => output sequence and surrounding | 364 | // => output sequence and surrounding |
345 | 365 | ... | ... |
-
Please register or login to post a comment