Showing
4 changed files
with
187 additions
and
128 deletions
... | @@ -3,10 +3,10 @@ BruteSmORF | ... | @@ -3,10 +3,10 @@ BruteSmORF |
3 | 3 | ||
4 | 4 | ||
5 | 5 | ||
6 | -Exhaustive approach for solving difficult cases of small open reading frame annotation: the PreImplantation Factor case | 6 | +Exhaustive approach for solving difficult cases of small open reading frame annotation: the PreImplantation Factor example |
7 | 7 | ||
8 | 8 | ||
9 | -The PreImplantation Factor: a difficult cases of small open reading frame annotation solved by an exhaustive approach | 9 | +The PreImplantation Factor: a difficult case of small open reading frame annotation solved by an exhaustive approach |
10 | 10 | ||
11 | 11 | ||
12 | 12 | ... | ... |
TODO.txt
0 → 100644
alternate_codes.h
0 → 100644
1 | +void select_genetic_code(string selection){ | ||
2 | + cout << "Genetic code selected: " << selection << endl; | ||
3 | + // The Vertebrate Mitochondrial Code | ||
4 | + if (selection == "2"){ | ||
5 | + gc_std["AGA"] = "*"; gc_std_DNA["AGA"] = "*"; gc_std_rev["UCU"] = "*"; gc_std_DNA_rev["TCT"] = "*"; | ||
6 | + gc_std["AGG"] = "*"; gc_std_DNA["AGG"] = "*"; gc_std_rev["UCC"] = "*"; gc_std_DNA_rev["TCC"] = "*"; | ||
7 | + gc_std["AGR"] = "*"; gc_std_DNA["AGR"] = "*"; gc_std_rev["UCY"] = "*"; gc_std_DNA_rev["TCY"] = "*"; | ||
8 | + gc_std["AUA"] = "M"; gc_std_DNA["ATA"] = "M"; gc_std_rev["UAU"] = "M"; gc_std_DNA_rev["TAT"] = "M"; | ||
9 | + gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
10 | + } | ||
11 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
12 | + else if (selection == "3"){ | ||
13 | + gc_std["AUA"] = "M"; gc_std_DNA["ATA"] = "M"; gc_std_rev["UAU"] = "M"; gc_std_DNA_rev["TAT"] = "M"; | ||
14 | + gc_std["CUU"] = "T"; gc_std_DNA["CTT"] = "T"; gc_std_rev["GAA"] = "T"; gc_std_DNA_rev["GAA"] = "T"; | ||
15 | + gc_std["CUC"] = "T"; gc_std_DNA["CTC"] = "T"; gc_std_rev["GAG"] = "T"; gc_std_DNA_rev["GAG"] = "T"; | ||
16 | + gc_std["CUA"] = "T"; gc_std_DNA["CTA"] = "T"; gc_std_rev["GAU"] = "T"; gc_std_DNA_rev["GAT"] = "T"; | ||
17 | + gc_std["CUG"] = "T"; gc_std_DNA["CTG"] = "T"; gc_std_rev["GAC"] = "T"; gc_std_DNA_rev["GAC"] = "T"; | ||
18 | + gc_std["CUN"] = "T"; gc_std_DNA["CTN"] = "T"; gc_std_rev["GAN"] = "T"; gc_std_DNA_rev["GAN"] = "T"; | ||
19 | + gc_std["CUR"] = "T"; gc_std_DNA["CTR"] = "T"; gc_std_rev["GAR"] = "T"; gc_std_DNA_rev["GAR"] = "T"; | ||
20 | + gc_std["CUY"] = "T"; gc_std_DNA["CTY"] = "T"; gc_std_rev["GAY"] = "T"; gc_std_DNA_rev["GAY"] = "T"; | ||
21 | + gc_std["CUM"] = "T"; gc_std_DNA["CTM"] = "T"; gc_std_rev["GAM"] = "T"; gc_std_DNA_rev["GAM"] = "T"; | ||
22 | + gc_std["CUK"] = "T"; gc_std_DNA["CTK"] = "T"; gc_std_rev["GAK"] = "T"; gc_std_DNA_rev["GAK"] = "T"; | ||
23 | + gc_std["CUS"] = "T"; gc_std_DNA["CTS"] = "T"; gc_std_rev["GAS"] = "T"; gc_std_DNA_rev["GAS"] = "T"; | ||
24 | + gc_std["CUW"] = "T"; gc_std_DNA["CTW"] = "T"; gc_std_rev["GAW"] = "T"; gc_std_DNA_rev["GAW"] = "T"; | ||
25 | + gc_std["CUH"] = "T"; gc_std_DNA["CTH"] = "T"; gc_std_rev["GAH"] = "T"; gc_std_DNA_rev["GAH"] = "T"; | ||
26 | + gc_std["CUB"] = "T"; gc_std_DNA["CTB"] = "T"; gc_std_rev["GAB"] = "T"; gc_std_DNA_rev["GAB"] = "T"; | ||
27 | + gc_std["CUV"] = "T"; gc_std_DNA["CTV"] = "T"; gc_std_rev["GAV"] = "T"; gc_std_DNA_rev["GAV"] = "T"; | ||
28 | + gc_std["CUD"] = "T"; gc_std_DNA["CTD"] = "T"; gc_std_rev["GAD"] = "T"; gc_std_DNA_rev["GAD"] = "T"; | ||
29 | + gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
30 | + } | ||
31 | + // xxxxxxxxxxxxxxxxxxxxxxxx | ||
32 | + else if (selection == "4"){ // Mycoplasma | ||
33 | + gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
34 | + } | ||
35 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
36 | + else if (selection == "5"){ | ||
37 | + gc_std["AGA"] = "S"; gc_std_DNA["AGA"] = "S"; gc_std_rev["UCU"] = "S"; gc_std_DNA_rev["TCT"] = "S"; | ||
38 | + gc_std["AGG"] = "S"; gc_std_DNA["AGG"] = "S"; gc_std_rev["UCC"] = "S"; gc_std_DNA_rev["TCC"] = "S"; | ||
39 | + gc_std["AGR"] = "S"; gc_std_DNA["AGR"] = "S"; gc_std_rev["UCY"] = "S"; gc_std_DNA_rev["TCY"] = "S"; | ||
40 | + gc_std["AUA"] = "M"; gc_std_DNA["ATA"] = "M"; gc_std_rev["UAU"] = "M"; gc_std_DNA_rev["TAT"] = "M"; | ||
41 | + gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
42 | + } | ||
43 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
44 | + else if (selection == "6"){ | ||
45 | + gc_std["UAA"] = "Q"; gc_std_DNA["TAA"] = "Q"; gc_std_rev["AUU"] = "Q"; gc_std_DNA_rev["ATT"] = "Q"; | ||
46 | + gc_std["UAG"] = "Q"; gc_std_DNA["TAG"] = "Q"; gc_std_rev["AUC"] = "Q"; gc_std_DNA_rev["ATC"] = "Q"; | ||
47 | + gc_std["UAR"] = "Q"; gc_std_DNA["TAR"] = "Q"; gc_std_rev["AUY"] = "Q"; gc_std_DNA_rev["ATY"] = "Q"; | ||
48 | + } | ||
49 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
50 | + else if (selection == "9"){ | ||
51 | + gc_std["AAA"] = "N"; gc_std_DNA["AAA"] = "N"; gc_std_rev["UUU"] = "N"; gc_std_DNA_rev["TTT"] = "N"; | ||
52 | + gc_std["AGA"] = "S"; gc_std_DNA["AGA"] = "S"; gc_std_rev["UCU"] = "S"; gc_std_DNA_rev["TCT"] = "S"; | ||
53 | + gc_std["AGG"] = "S"; gc_std_DNA["AGG"] = "S"; gc_std_rev["UCC"] = "S"; gc_std_DNA_rev["TCC"] = "S"; | ||
54 | + gc_std["AGR"] = "S"; gc_std_DNA["AGR"] = "S"; gc_std_rev["UCY"] = "S"; gc_std_DNA_rev["TCY"] = "S"; | ||
55 | + gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
56 | + } | ||
57 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
58 | + else if (selection == "10"){ | ||
59 | + gc_std["UGA"] = "C"; gc_std_DNA["TGA"] = "C"; gc_std_rev["ACU"] = "C"; gc_std_DNA_rev["ACT"] = "C"; | ||
60 | + } | ||
61 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
62 | + else if (selection == "12"){ | ||
63 | + gc_std["CUG"] = "S"; gc_std_DNA["CTG"] = "S"; gc_std_rev["GAC"] = "S"; gc_std_DNA_rev["GAC"] = "S"; | ||
64 | + } | ||
65 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
66 | + else if (selection == "13"){ | ||
67 | + gc_std["AGA"] = "G"; gc_std_DNA["AGA"] = "G"; gc_std_rev["UCU"] = "G"; gc_std_DNA_rev["TCT"] = "G"; | ||
68 | + gc_std["AGG"] = "G"; gc_std_DNA["AGG"] = "G"; gc_std_rev["UCC"] = "G"; gc_std_DNA_rev["TCC"] = "G"; | ||
69 | + gc_std["AGR"] = "G"; gc_std_DNA["AGR"] = "G"; gc_std_rev["UCY"] = "G"; gc_std_DNA_rev["TCY"] = "G"; | ||
70 | + gc_std["AUA"] = "M"; gc_std_DNA["ATA"] = "M"; gc_std_rev["UAU"] = "M"; gc_std_DNA_rev["TAT"] = "M"; | ||
71 | + gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
72 | + } | ||
73 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
74 | + else if (selection == "14"){ | ||
75 | + gc_std["AAA"] = "N"; gc_std_DNA["AAA"] = "N"; gc_std_rev["UUU"] = "N"; gc_std_DNA_rev["TTT"] = "N"; | ||
76 | + gc_std["AGA"] = "S"; gc_std_DNA["AGA"] = "S"; gc_std_rev["UCU"] = "S"; gc_std_DNA_rev["TCT"] = "S"; | ||
77 | + gc_std["AGG"] = "S"; gc_std_DNA["AGG"] = "S"; gc_std_rev["UCC"] = "S"; gc_std_DNA_rev["TCC"] = "S"; | ||
78 | + gc_std["AGR"] = "S"; gc_std_DNA["AGR"] = "S"; gc_std_rev["UCY"] = "S"; gc_std_DNA_rev["TCY"] = "S"; | ||
79 | + gc_std["UAA"] = "Y"; gc_std_DNA["TAA"] = "Y"; gc_std_rev["AUU"] = "Y"; gc_std_DNA_rev["ATT"] = "Y"; | ||
80 | + gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
81 | + } | ||
82 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
83 | + else if (selection == "16"){ | ||
84 | + gc_std["UAG"] = "L"; gc_std_DNA["TAG"] = "L"; gc_std_rev["AUC"] = "L"; gc_std_DNA_rev["ATC"] = "L"; | ||
85 | + } | ||
86 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
87 | + else if (selection == "21"){ | ||
88 | + gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
89 | + gc_std["AUA"] = "M"; gc_std_DNA["ATA"] = "M"; gc_std_rev["UAU"] = "M"; gc_std_DNA_rev["TAT"] = "M"; | ||
90 | + gc_std["AGA"] = "S"; gc_std_DNA["AGA"] = "S"; gc_std_rev["UCU"] = "S"; gc_std_DNA_rev["TCT"] = "S"; | ||
91 | + gc_std["AGG"] = "S"; gc_std_DNA["AGG"] = "S"; gc_std_rev["UCC"] = "S"; gc_std_DNA_rev["TCC"] = "S"; | ||
92 | + gc_std["AGR"] = "S"; gc_std_DNA["AGR"] = "S"; gc_std_rev["UCY"] = "S"; gc_std_DNA_rev["TCY"] = "S"; | ||
93 | + gc_std["AAA"] = "N"; gc_std_DNA["AAA"] = "N"; gc_std_rev["UUU"] = "N"; gc_std_DNA_rev["TTT"] = "N"; | ||
94 | + } | ||
95 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
96 | + else if (selection == "22"){ | ||
97 | + gc_std["UCA"] = "*"; gc_std_DNA["TCA"] = "*"; gc_std_rev["AGU"] = "*"; gc_std_DNA_rev["AGT"] = "*"; | ||
98 | + gc_std["UAG"] = "L"; gc_std_DNA["TAG"] = "L"; gc_std_rev["AUC"] = "L"; gc_std_DNA_rev["ATC"] = "L"; | ||
99 | + } | ||
100 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
101 | + else if (selection == "23"){ | ||
102 | + gc_std["UUA"] = "*"; gc_std_DNA["TTA"] = "*"; gc_std_rev["AAU"] = "*"; gc_std_DNA_rev["AAT"] = "*"; | ||
103 | + } | ||
104 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
105 | + else if (selection == "24"){ | ||
106 | + gc_std["AGA"] = "S"; gc_std_DNA["AGA"] = "S"; gc_std_rev["UCU"] = "S"; gc_std_DNA_rev["TCT"] = "S"; | ||
107 | + gc_std["AGG"] = "K"; gc_std_DNA["AGG"] = "K"; gc_std_rev["UCC"] = "K"; gc_std_DNA_rev["TCC"] = "K"; | ||
108 | + gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
109 | + } | ||
110 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
111 | + else if (selection == "25"){ | ||
112 | + gc_std["UGA"] = "G"; gc_std_DNA["TGA"] = "G"; gc_std_rev["ACU"] = "G"; gc_std_DNA_rev["ACT"] = "G"; | ||
113 | + } | ||
114 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
115 | + else if (selection == "26"){ | ||
116 | + gc_std["CUG"] = "A"; gc_std_DNA["CTG"] = "A"; gc_std_rev["GAC"] = "A"; gc_std_DNA_rev["GAC"] = "A"; | ||
117 | + } | ||
118 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
119 | + else if (selection == "29"){ | ||
120 | + gc_std["UAA"] = "Y"; gc_std_DNA["TAA"] = "Y"; gc_std_rev["AUU"] = "Y"; gc_std_DNA_rev["ATT"] = "Y"; | ||
121 | + gc_std["UAG"] = "Y"; gc_std_DNA["TAG"] = "Y"; gc_std_rev["AUC"] = "Y"; gc_std_DNA_rev["ATC"] = "Y"; | ||
122 | + gc_std["UAR"] = "Y"; gc_std_DNA["TAR"] = "Y"; gc_std_rev["AUY"] = "Y"; gc_std_DNA_rev["ATY"] = "Y"; | ||
123 | + } | ||
124 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
125 | + else if (selection == "30"){ | ||
126 | + gc_std["UAA"] = "E"; gc_std_DNA["TAA"] = "E"; gc_std_rev["AUU"] = "E"; gc_std_DNA_rev["ATT"] = "E"; | ||
127 | + gc_std["UAG"] = "E"; gc_std_DNA["TAG"] = "E"; gc_std_rev["AUC"] = "E"; gc_std_DNA_rev["ATC"] = "E"; | ||
128 | + gc_std["UAR"] = "E"; gc_std_DNA["TAR"] = "E"; gc_std_rev["AUY"] = "E"; gc_std_DNA_rev["ATY"] = "E"; | ||
129 | + } | ||
130 | + // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
131 | + else if (selection == "33"){ | ||
132 | + gc_std["UAA"] = "Y"; gc_std_DNA["TAA"] = "Y"; gc_std_rev["AUU"] = "Y"; gc_std_DNA_rev["ATT"] = "Y"; | ||
133 | + gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
134 | + gc_std["AGA"] = "S"; gc_std_DNA["AGA"] = "S"; gc_std_rev["UCU"] = "S"; gc_std_DNA_rev["TCT"] = "S"; | ||
135 | + gc_std["AGG"] = "K"; gc_std_DNA["AGG"] = "K"; gc_std_rev["UCC"] = "K"; gc_std_DNA_rev["TCC"] = "K"; | ||
136 | + } | ||
137 | +} | ||
138 | + | ||
139 | +// -11, (-27, -28, -31) | ||
140 | + | ||
141 | + | ||
142 | + // N R Y M K S W H B V D | ||
143 | + // N Y R K M S W D V B H | ||
144 | + | ||
145 | + |
... | @@ -7,6 +7,7 @@ | ... | @@ -7,6 +7,7 @@ |
7 | #include <algorithm> | 7 | #include <algorithm> |
8 | #include <vector> | 8 | #include <vector> |
9 | #include "split.h" | 9 | #include "split.h" |
10 | +#include "alternate_codes.h" | ||
10 | 11 | ||
11 | // system detection | 12 | // system detection |
12 | // docker | 13 | // docker |
... | @@ -15,12 +16,35 @@ | ... | @@ -15,12 +16,35 @@ |
15 | /* Global variables */ | 16 | /* Global variables */ |
16 | vector<int> multiFASTA_seqlen; | 17 | vector<int> multiFASTA_seqlen; |
17 | map<string, string> genetic_code; | 18 | map<string, string> genetic_code; |
18 | -vector<string> sCodon; | 19 | +vector<string> sCodons; |
19 | int minimum_length = 0; | 20 | int minimum_length = 0; |
20 | 21 | ||
21 | 22 | ||
22 | // XXX PROTOTYPES | 23 | // XXX PROTOTYPES |
23 | 24 | ||
25 | +vector<string> complement_sCodons(vector<string> vec_codons, bool rna){ | ||
26 | + vector<string> cvec_codons; | ||
27 | + for (string sCodon : vec_codons){ | ||
28 | + string codonCompl; | ||
29 | + for (char& c : sCodon){ | ||
30 | + if (c == 'A'){ | ||
31 | + (rna) ? codonCompl+='U' : codonCompl+='T'; | ||
32 | + } | ||
33 | + if (c == 'C'){ | ||
34 | + codonCompl+='G'; | ||
35 | + } | ||
36 | + if (c == 'G'){ | ||
37 | + codonCompl+='C'; | ||
38 | + } | ||
39 | + if (c == 'T'){ | ||
40 | + codonCompl+='A'; | ||
41 | + } | ||
42 | + } | ||
43 | + cvec_codons.push_back(codonCompl); | ||
44 | + } | ||
45 | + return cvec_codons; | ||
46 | +} | ||
47 | + | ||
24 | 48 | ||
25 | void extend_until_print(string &codon, string &protein, string &nucleic, int frame, char posneg, ofstream &outfile, ofstream &outfileDNA, bool backward, int position, int glob_vec_index){ | 49 | void extend_until_print(string &codon, string &protein, string &nucleic, int frame, char posneg, ofstream &outfile, ofstream &outfileDNA, bool backward, int position, int glob_vec_index){ |
26 | if (codon.length() == 3){ | 50 | if (codon.length() == 3){ |
... | @@ -42,8 +66,8 @@ void extend_until_print(string &codon, string &protein, string &nucleic, int fra | ... | @@ -42,8 +66,8 @@ void extend_until_print(string &codon, string &protein, string &nucleic, int fra |
42 | 66 | ||
43 | //reverse( nucleic.begin(), nucleic.end() ); // XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX | 67 | //reverse( nucleic.begin(), nucleic.end() ); // XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX |
44 | 68 | ||
45 | - if (!sCodon.empty()){ | 69 | + if (!sCodons.empty()){ |
46 | - if (find(sCodon.begin(), sCodon.end(), nucleic.substr(0, 3)) != sCodon.end()){ // if nucleic codon is in the vector of start codons | 70 | + if (find(sCodons.begin(), sCodons.end(), nucleic.substr(0, 3)) != sCodons.end()){ // if nucleic codon is in the vector of start codons |
47 | outfile << frame << posneg << "\t" << begin << "-" << position << "\t" << protein << "\n"; | 71 | outfile << frame << posneg << "\t" << begin << "-" << position << "\t" << protein << "\n"; |
48 | outfileDNA << frame << posneg << "\t" << begin << "-" << position << "\t" << nucleic << "\n"; | 72 | outfileDNA << frame << posneg << "\t" << begin << "-" << position << "\t" << nucleic << "\n"; |
49 | } | 73 | } |
... | @@ -169,126 +193,6 @@ int check_file_content(string fastaFileName, bool rna){ | ... | @@ -169,126 +193,6 @@ int check_file_content(string fastaFileName, bool rna){ |
169 | } | 193 | } |
170 | 194 | ||
171 | 195 | ||
172 | -void select_genetic_code(string selection){ | ||
173 | - cout << "Genetic code selected: " << selection << endl; | ||
174 | - // The Vertebrate Mitochondrial Code | ||
175 | - if (selection == "2"){ | ||
176 | - gc_std["AGA"] = "*"; gc_std_DNA["AGA"] = "*"; gc_std_rev["UCU"] = "*"; gc_std_DNA_rev["TCT"] = "*"; | ||
177 | - gc_std["AGG"] = "*"; gc_std_DNA["AGG"] = "*"; gc_std_rev["UCC"] = "*"; gc_std_DNA_rev["TCC"] = "*"; | ||
178 | - gc_std["AUA"] = "M"; gc_std_DNA["ATA"] = "M"; gc_std_rev["UAU"] = "M"; gc_std_DNA_rev["TAT"] = "M"; | ||
179 | - gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
180 | - } | ||
181 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
182 | - else if (selection == "3"){ | ||
183 | - gc_std["AUA"] = "M"; gc_std_DNA["ATA"] = "M"; gc_std_rev["UAU"] = "M"; gc_std_DNA_rev["TAT"] = "M"; | ||
184 | - gc_std["CUU"] = "T"; gc_std_DNA["CTT"] = "T"; gc_std_rev["GAA"] = "T"; gc_std_DNA_rev["GAA"] = "T"; | ||
185 | - gc_std["CUC"] = "T"; gc_std_DNA["CTC"] = "T"; gc_std_rev["GAG"] = "T"; gc_std_DNA_rev["GAG"] = "T"; | ||
186 | - gc_std["CUA"] = "T"; gc_std_DNA["CTA"] = "T"; gc_std_rev["GAU"] = "T"; gc_std_DNA_rev["GAT"] = "T"; | ||
187 | - gc_std["CUG"] = "T"; gc_std_DNA["CTG"] = "T"; gc_std_rev["GAC"] = "T"; gc_std_DNA_rev["GAC"] = "T"; | ||
188 | - gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
189 | - } | ||
190 | - // xxxxxxxxxxxxxxxxxxxxxxxx | ||
191 | - else if (selection == "4"){ // Mycoplasma | ||
192 | - gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
193 | - } | ||
194 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
195 | - else if (selection == "5"){ | ||
196 | - gc_std["AGA"] = "S"; gc_std_DNA["AGA"] = "S"; gc_std_rev["UCU"] = "S"; gc_std_DNA_rev["TCT"] = "S"; | ||
197 | - gc_std["AGG"] = "S"; gc_std_DNA["AGG"] = "S"; gc_std_rev["UCC"] = "S"; gc_std_DNA_rev["TCC"] = "S"; | ||
198 | - gc_std["AUA"] = "M"; gc_std_DNA["ATA"] = "M"; gc_std_rev["UAU"] = "M"; gc_std_DNA_rev["TAT"] = "M"; | ||
199 | - gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
200 | - } | ||
201 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
202 | - else if (selection == "6"){ | ||
203 | - gc_std["UAA"] = "Q"; gc_std_DNA["TAA"] = "Q"; gc_std_rev["AUU"] = "Q"; gc_std_DNA_rev["ATT"] = "Q"; | ||
204 | - gc_std["UAG"] = "Q"; gc_std_DNA["TAG"] = "Q"; gc_std_rev["AUC"] = "Q"; gc_std_DNA_rev["ATC"] = "Q"; | ||
205 | - } | ||
206 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
207 | - else if (selection == "9"){ | ||
208 | - gc_std["AAA"] = "N"; gc_std_DNA["AAA"] = "N"; gc_std_rev["UUU"] = "N"; gc_std_DNA_rev["TTT"] = "N"; | ||
209 | - gc_std["AGA"] = "S"; gc_std_DNA["AGA"] = "S"; gc_std_rev["UCU"] = "S"; gc_std_DNA_rev["TCT"] = "S"; | ||
210 | - gc_std["AGG"] = "S"; gc_std_DNA["AGG"] = "S"; gc_std_rev["UCC"] = "S"; gc_std_DNA_rev["TCC"] = "S"; | ||
211 | - gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
212 | - } | ||
213 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
214 | - else if (selection == "10"){ | ||
215 | - gc_std["UGA"] = "C"; gc_std_DNA["TGA"] = "C"; gc_std_rev["ACU"] = "C"; gc_std_DNA_rev["ACT"] = "C"; | ||
216 | - } | ||
217 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
218 | - else if (selection == "12"){ | ||
219 | - gc_std["CUG"] = "S"; gc_std_DNA["CTG"] = "S"; gc_std_rev["GAC"] = "S"; gc_std_DNA_rev["GAC"] = "S"; | ||
220 | - } | ||
221 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
222 | - else if (selection == "13"){ | ||
223 | - gc_std["AGA"] = "G"; gc_std_DNA["AGA"] = "G"; gc_std_rev["UCU"] = "G"; gc_std_DNA_rev["TCT"] = "G"; | ||
224 | - gc_std["AGG"] = "G"; gc_std_DNA["AGG"] = "G"; gc_std_rev["UCC"] = "G"; gc_std_DNA_rev["TCC"] = "G"; | ||
225 | - gc_std["AUA"] = "M"; gc_std_DNA["ATA"] = "M"; gc_std_rev["UAU"] = "M"; gc_std_DNA_rev["TAT"] = "M"; | ||
226 | - gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
227 | - } | ||
228 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
229 | - else if (selection == "14"){ | ||
230 | - gc_std["AAA"] = "N"; gc_std_DNA["AAA"] = "N"; gc_std_rev["UUU"] = "N"; gc_std_DNA_rev["TTT"] = "N"; | ||
231 | - gc_std["AGA"] = "S"; gc_std_DNA["AGA"] = "S"; gc_std_rev["UCU"] = "S"; gc_std_DNA_rev["TCT"] = "S"; | ||
232 | - gc_std["AGG"] = "S"; gc_std_DNA["AGG"] = "S"; gc_std_rev["UCC"] = "S"; gc_std_DNA_rev["TCC"] = "S"; | ||
233 | - gc_std["UAA"] = "Y"; gc_std_DNA["TAA"] = "Y"; gc_std_rev["AUU"] = "Y"; gc_std_DNA_rev["ATT"] = "Y"; | ||
234 | - gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
235 | - } | ||
236 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
237 | - else if (selection == "16"){ | ||
238 | - gc_std["UAG"] = "L"; gc_std_DNA["TAG"] = "L"; gc_std_rev["AUC"] = "L"; gc_std_DNA_rev["ATC"] = "L"; | ||
239 | - } | ||
240 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
241 | - else if (selection == "21"){ | ||
242 | - gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
243 | - gc_std["AUA"] = "M"; gc_std_DNA["ATA"] = "M"; gc_std_rev["UAU"] = "M"; gc_std_DNA_rev["TAT"] = "M"; | ||
244 | - gc_std["AGA"] = "S"; gc_std_DNA["AGA"] = "S"; gc_std_rev["UCU"] = "S"; gc_std_DNA_rev["TCT"] = "S"; | ||
245 | - gc_std["AGG"] = "S"; gc_std_DNA["AGG"] = "S"; gc_std_rev["UCC"] = "S"; gc_std_DNA_rev["TCC"] = "S"; | ||
246 | - gc_std["AAA"] = "N"; gc_std_DNA["AAA"] = "N"; gc_std_rev["UUU"] = "N"; gc_std_DNA_rev["TTT"] = "N"; | ||
247 | - } | ||
248 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
249 | - else if (selection == "22"){ | ||
250 | - gc_std["UCA"] = "*"; gc_std_DNA["TCA"] = "*"; gc_std_rev["AGU"] = "*"; gc_std_DNA_rev["AGT"] = "*"; | ||
251 | - gc_std["UAG"] = "L"; gc_std_DNA["TAG"] = "L"; gc_std_rev["AUC"] = "L"; gc_std_DNA_rev["ATC"] = "L"; | ||
252 | - } | ||
253 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
254 | - else if (selection == "23"){ | ||
255 | - gc_std["UUA"] = "*"; gc_std_DNA["TTA"] = "*"; gc_std_rev["AAU"] = "*"; gc_std_DNA_rev["AAT"] = "*"; | ||
256 | - } | ||
257 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
258 | - else if (selection == "24"){ | ||
259 | - gc_std["AGA"] = "S"; gc_std_DNA["AGA"] = "S"; gc_std_rev["UCU"] = "S"; gc_std_DNA_rev["TCT"] = "S"; | ||
260 | - gc_std["AGG"] = "K"; gc_std_DNA["AGG"] = "K"; gc_std_rev["UCC"] = "K"; gc_std_DNA_rev["TCC"] = "K"; | ||
261 | - gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
262 | - } | ||
263 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
264 | - else if (selection == "25"){ | ||
265 | - gc_std["UGA"] = "G"; gc_std_DNA["TGA"] = "G"; gc_std_rev["ACU"] = "G"; gc_std_DNA_rev["ACT"] = "G"; | ||
266 | - } | ||
267 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
268 | - else if (selection == "26"){ | ||
269 | - gc_std["CUG"] = "A"; gc_std_DNA["CTG"] = "A"; gc_std_rev["GAC"] = "A"; gc_std_DNA_rev["GAC"] = "A"; | ||
270 | - } | ||
271 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
272 | - else if (selection == "29"){ | ||
273 | - gc_std["UAA"] = "Y"; gc_std_DNA["TAA"] = "Y"; gc_std_rev["AUU"] = "Y"; gc_std_DNA_rev["ATT"] = "Y"; | ||
274 | - gc_std["UAG"] = "Y"; gc_std_DNA["TAG"] = "Y"; gc_std_rev["AUC"] = "Y"; gc_std_DNA_rev["ATC"] = "Y"; | ||
275 | - } | ||
276 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
277 | - else if (selection == "30"){ | ||
278 | - gc_std["UAA"] = "E"; gc_std_DNA["TAA"] = "E"; gc_std_rev["AUU"] = "E"; gc_std_DNA_rev["ATT"] = "E"; | ||
279 | - gc_std["UAG"] = "E"; gc_std_DNA["TAG"] = "E"; gc_std_rev["AUC"] = "E"; gc_std_DNA_rev["ATC"] = "E"; | ||
280 | - } | ||
281 | - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
282 | - else if (selection == "33"){ | ||
283 | - gc_std["UAA"] = "Y"; gc_std_DNA["TAA"] = "Y"; gc_std_rev["AUU"] = "Y"; gc_std_DNA_rev["ATT"] = "Y"; | ||
284 | - gc_std["UGA"] = "W"; gc_std_DNA["TGA"] = "W"; gc_std_rev["ACU"] = "W"; gc_std_DNA_rev["ACT"] = "W"; | ||
285 | - gc_std["AGA"] = "S"; gc_std_DNA["AGA"] = "S"; gc_std_rev["UCU"] = "S"; gc_std_DNA_rev["TCT"] = "S"; | ||
286 | - gc_std["AGG"] = "K"; gc_std_DNA["AGG"] = "K"; gc_std_rev["UCC"] = "K"; gc_std_DNA_rev["TCC"] = "K"; | ||
287 | - } | ||
288 | -} | ||
289 | - | ||
290 | -// -11, (-27, -28, -31) | ||
291 | - | ||
292 | // input list of files XXX | 196 | // input list of files XXX |
293 | 197 | ||
294 | 198 | ||
... | @@ -344,7 +248,8 @@ int main(int argc, char** argv){ | ... | @@ -344,7 +248,8 @@ int main(int argc, char** argv){ |
344 | puts("Start codon: no specific amino acid selected"); | 248 | puts("Start codon: no specific amino acid selected"); |
345 | } | 249 | } |
346 | else{ | 250 | else{ |
347 | - sCodon = split(startCodon, '-'); | 251 | + sCodons = split(startCodon, '-'); |
252 | + // XXX Check alternate | ||
348 | } | 253 | } |
349 | 254 | ||
350 | /* Check input file content */ | 255 | /* Check input file content */ |
... | @@ -361,8 +266,7 @@ int main(int argc, char** argv){ | ... | @@ -361,8 +266,7 @@ int main(int argc, char** argv){ |
361 | 266 | ||
362 | /* Modifications to the standard genetic code */ | 267 | /* Modifications to the standard genetic code */ |
363 | select_genetic_code(alt_gene_code); | 268 | select_genetic_code(alt_gene_code); |
364 | - // XXX option pour Alternative Initiation Codons | 269 | + // XXX Check value |
365 | - // OU BIEN fichier txt lu | ||
366 | 270 | ||
367 | 271 | ||
368 | /* Filenames: temporary AND final output */ | 272 | /* Filenames: temporary AND final output */ |
... | @@ -406,6 +310,7 @@ int main(int argc, char** argv){ | ... | @@ -406,6 +310,7 @@ int main(int argc, char** argv){ |
406 | /* Translation 3'-5' */ | 310 | /* Translation 3'-5' */ |
407 | puts("3'-5' translation..."); | 311 | puts("3'-5' translation..."); |
408 | (rna) ? genetic_code = gc_std_rev : genetic_code = gc_std_DNA_rev; | 312 | (rna) ? genetic_code = gc_std_rev : genetic_code = gc_std_DNA_rev; |
313 | + sCodons = complement_sCodons(sCodons, rna); | ||
409 | translate_3_frames(tmp_reversed_input, tmp_35_pro_file, tmp_35_dna_file, true); | 314 | translate_3_frames(tmp_reversed_input, tmp_35_pro_file, tmp_35_dna_file, true); |
410 | puts("Done"); | 315 | puts("Done"); |
411 | 316 | ... | ... |
-
Please register or login to post a comment