guillaume

new single strand option

......@@ -17,7 +17,7 @@
vector<int> multiFASTA_seqlen;
map<string, string> genetic_code;
vector<string> sCodons;
int minimum_length = 0;
int minimum_length = 2; // dipeptides
// XXX PROTOTYPES
......@@ -200,6 +200,14 @@ int main(int argc, char** argv){
string optlist =
" Usage:\n"
"\n"
" -i Input file\n"
" -o Output filenames (default: input filename with .pro and .dna/.rna extensions)\n"
" -s Start codon\n"
" -m Minimum ORF length (default: 2)\n"
" -a Alternative genetic code\n"
" -r RNA input\n"
" -c Check FASTA format\n"
" -f Translate sense strand only\n" // f as in forward
" -h Help\n";
string outFileName;
......@@ -208,10 +216,11 @@ int main(int argc, char** argv){
bool checkFASTA;
string alt_gene_code;
string startCodon;
bool senseStrand;
int opt;
while ((opt = getopt(argc,argv,"hrci:o:s:m:a:")) != EOF){
while ((opt = getopt(argc,argv,"hrcfi:o:s:m:a:")) != EOF){
switch(opt){
case 'i': inFileName = optarg; break;
case 'o': outFileName = optarg; break;
......@@ -220,6 +229,7 @@ int main(int argc, char** argv){
case 'a': alt_gene_code = optarg; break;
case 'r': rna = true; break;
case 'c': checkFASTA = true; break;
case 'f': senseStrand = true; break;
case 'h': fprintf(stderr, "%s", optlist.c_str()); return 0;
}
}
......@@ -290,56 +300,66 @@ int main(int argc, char** argv){
puts("5'-3' translation...");
(rna) ? genetic_code = gc_std : genetic_code = gc_std_DNA;
translate_3_frames(inFileName, tmp_53_pro_file, tmp_53_dna_file, false);
puts("Done");
/* Reversion of the input sequence by system call: rev + tac */
puts("Genome reversion...");
if ( system( ("rev "+inFileName+" | tac > " + tmp_reversed_input).c_str() ) != 0 ){
perror("Error while creating reversed genome file");
return(-1);
}
else{
puts ("Done");
// XXX DIRTY CLEANING
// awk '/^>/ {if (seq != "") {print head""seq;} seq=""; head=$0"\n";} /^[^>]/ {seq=seq""$0"\n";} END {if (seq != "") print head""seq;}' input.fasta > output.fasta
string command = "awk '/^>/ {if (seq != \"\") {print head\"\"seq;} seq=\"\"; head=$0\"\\n\";} /^[^>]/ {seq=seq\"\"$0\"\\n\";} END {if (seq != \"\") print head\"\"seq;}' "+ tmp_53_pro_file + " > " + output_pro + " && rm *.53pro *.53dna";
int status = std::system(command.c_str());
if (status != 0) {
std::cerr << "Error: system call failed with status " << status << std::endl;
return 1;
}
/* Reversion of the (global) vector of lengths */
reverse(multiFASTA_seqlen.begin(), multiFASTA_seqlen.end());
/* Translation 3'-5' */
puts("3'-5' translation...");
(rna) ? genetic_code = gc_std_rev : genetic_code = gc_std_DNA_rev;
sCodons = complement_sCodons(sCodons, rna);
translate_3_frames(tmp_reversed_input, tmp_35_pro_file, tmp_35_dna_file, true);
puts("Done");
/* Reversion (tac) of the 3'-5' translation, then concatenation */
puts("Creating result files...");
if ( system( ("cat "+tmp_53_pro_file+" > "+output_pro+" && tac "+tmp_35_pro_file+" >> "+output_pro+
" && cat "+tmp_53_dna_file+" > "+output_dna+" && tac "+tmp_35_dna_file+" >> "+output_dna).c_str() ) != 0 ){
perror("Error while line-reversing and concatenating output files");
return(-1);
}
else{
puts("Done");
}
/* Delete the temporary files */
puts("Cleaning temporary files...");
if ( system( ("rm "+tmp_reversed_input+" "+tmp_35_pro_file+" "+tmp_35_dna_file+" "+tmp_53_pro_file+" "+tmp_53_dna_file).c_str() ) ){
perror("Error while deleting temporary files");
return(-1);
}
else{
if (not senseStrand){
// TODO Warning for RNA sequence
/* Reversion of the input sequence by system call: rev + tac */
puts("Genome reversion...");
if ( system( ("rev "+inFileName+" | tac > " + tmp_reversed_input).c_str() ) != 0 ){
perror("Error while creating reversed genome file");
return(-1);
}
else{
puts ("Done");
}
/* Reversion of the (global) vector of lengths */
reverse(multiFASTA_seqlen.begin(), multiFASTA_seqlen.end());
/* Translation 3'-5' */
puts("3'-5' translation...");
(rna) ? genetic_code = gc_std_rev : genetic_code = gc_std_DNA_rev;
sCodons = complement_sCodons(sCodons, rna);
translate_3_frames(tmp_reversed_input, tmp_35_pro_file, tmp_35_dna_file, true);
puts("Done");
/* Reversion (tac) of the 3'-5' translation, then concatenation */
puts("Creating result files...");
if ( system( ("cat "+tmp_53_pro_file+" > "+output_pro+" && tac "+tmp_35_pro_file+" >> "+output_pro+
" && cat "+tmp_53_dna_file+" > "+output_dna+" && tac "+tmp_35_dna_file+" >> "+output_dna).c_str() ) != 0 ){
perror("Error while line-reversing and concatenating output files");
return(-1);
}
else{
puts("Done");
}
/* Delete the temporary files */
puts("Cleaning temporary files...");
if ( system( ("rm "+tmp_reversed_input+" "+tmp_35_pro_file+" "+tmp_35_dna_file+" "+tmp_53_pro_file+" "+tmp_53_dna_file).c_str() ) ){
perror("Error while deleting temporary files");
return(-1);
}
else{
puts("Done");
}
}
// XXX XXX DETECT SEQUENCE
// => output sequence and surrounding
......