Toggle navigation
Toggle navigation
This project
Loading...
Sign in
EvryRNA
/
BruteSmORF
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
guillaume
2022-11-28 10:50:24 +0100
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
b4022c597988bdb042e7a73b932362d2be9d812e
b4022c59
1 parent
2828b9c8
alter gen codes
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
187 additions
and
128 deletions
README.txt
TODO.txt
alternate_codes.h
translate_FUNCTION.cpp
README.txt
View file @
b4022c5
...
...
@@ -3,10 +3,10 @@ BruteSmORF
Exhaustive approach for solving difficult cases of small open reading frame annotation: the PreImplantation Factor
cas
e
Exhaustive approach for solving difficult cases of small open reading frame annotation: the PreImplantation Factor
exampl
e
The PreImplantation Factor: a difficult case
s
of small open reading frame annotation solved by an exhaustive approach
The PreImplantation Factor: a difficult case of small open reading frame annotation solved by an exhaustive approach
...
...
TODO.txt
0 → 100644
View file @
b4022c5
mutli-FASTA
display name of alternate genetic code
alternate_codes.h
0 → 100644
View file @
b4022c5
void
select_genetic_code
(
string
selection
){
cout
<<
"Genetic code selected: "
<<
selection
<<
endl
;
// The Vertebrate Mitochondrial Code
if
(
selection
==
"2"
){
gc_std
[
"AGA"
]
=
"*"
;
gc_std_DNA
[
"AGA"
]
=
"*"
;
gc_std_rev
[
"UCU"
]
=
"*"
;
gc_std_DNA_rev
[
"TCT"
]
=
"*"
;
gc_std
[
"AGG"
]
=
"*"
;
gc_std_DNA
[
"AGG"
]
=
"*"
;
gc_std_rev
[
"UCC"
]
=
"*"
;
gc_std_DNA_rev
[
"TCC"
]
=
"*"
;
gc_std
[
"AGR"
]
=
"*"
;
gc_std_DNA
[
"AGR"
]
=
"*"
;
gc_std_rev
[
"UCY"
]
=
"*"
;
gc_std_DNA_rev
[
"TCY"
]
=
"*"
;
gc_std
[
"AUA"
]
=
"M"
;
gc_std_DNA
[
"ATA"
]
=
"M"
;
gc_std_rev
[
"UAU"
]
=
"M"
;
gc_std_DNA_rev
[
"TAT"
]
=
"M"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"3"
){
gc_std
[
"AUA"
]
=
"M"
;
gc_std_DNA
[
"ATA"
]
=
"M"
;
gc_std_rev
[
"UAU"
]
=
"M"
;
gc_std_DNA_rev
[
"TAT"
]
=
"M"
;
gc_std
[
"CUU"
]
=
"T"
;
gc_std_DNA
[
"CTT"
]
=
"T"
;
gc_std_rev
[
"GAA"
]
=
"T"
;
gc_std_DNA_rev
[
"GAA"
]
=
"T"
;
gc_std
[
"CUC"
]
=
"T"
;
gc_std_DNA
[
"CTC"
]
=
"T"
;
gc_std_rev
[
"GAG"
]
=
"T"
;
gc_std_DNA_rev
[
"GAG"
]
=
"T"
;
gc_std
[
"CUA"
]
=
"T"
;
gc_std_DNA
[
"CTA"
]
=
"T"
;
gc_std_rev
[
"GAU"
]
=
"T"
;
gc_std_DNA_rev
[
"GAT"
]
=
"T"
;
gc_std
[
"CUG"
]
=
"T"
;
gc_std_DNA
[
"CTG"
]
=
"T"
;
gc_std_rev
[
"GAC"
]
=
"T"
;
gc_std_DNA_rev
[
"GAC"
]
=
"T"
;
gc_std
[
"CUN"
]
=
"T"
;
gc_std_DNA
[
"CTN"
]
=
"T"
;
gc_std_rev
[
"GAN"
]
=
"T"
;
gc_std_DNA_rev
[
"GAN"
]
=
"T"
;
gc_std
[
"CUR"
]
=
"T"
;
gc_std_DNA
[
"CTR"
]
=
"T"
;
gc_std_rev
[
"GAR"
]
=
"T"
;
gc_std_DNA_rev
[
"GAR"
]
=
"T"
;
gc_std
[
"CUY"
]
=
"T"
;
gc_std_DNA
[
"CTY"
]
=
"T"
;
gc_std_rev
[
"GAY"
]
=
"T"
;
gc_std_DNA_rev
[
"GAY"
]
=
"T"
;
gc_std
[
"CUM"
]
=
"T"
;
gc_std_DNA
[
"CTM"
]
=
"T"
;
gc_std_rev
[
"GAM"
]
=
"T"
;
gc_std_DNA_rev
[
"GAM"
]
=
"T"
;
gc_std
[
"CUK"
]
=
"T"
;
gc_std_DNA
[
"CTK"
]
=
"T"
;
gc_std_rev
[
"GAK"
]
=
"T"
;
gc_std_DNA_rev
[
"GAK"
]
=
"T"
;
gc_std
[
"CUS"
]
=
"T"
;
gc_std_DNA
[
"CTS"
]
=
"T"
;
gc_std_rev
[
"GAS"
]
=
"T"
;
gc_std_DNA_rev
[
"GAS"
]
=
"T"
;
gc_std
[
"CUW"
]
=
"T"
;
gc_std_DNA
[
"CTW"
]
=
"T"
;
gc_std_rev
[
"GAW"
]
=
"T"
;
gc_std_DNA_rev
[
"GAW"
]
=
"T"
;
gc_std
[
"CUH"
]
=
"T"
;
gc_std_DNA
[
"CTH"
]
=
"T"
;
gc_std_rev
[
"GAH"
]
=
"T"
;
gc_std_DNA_rev
[
"GAH"
]
=
"T"
;
gc_std
[
"CUB"
]
=
"T"
;
gc_std_DNA
[
"CTB"
]
=
"T"
;
gc_std_rev
[
"GAB"
]
=
"T"
;
gc_std_DNA_rev
[
"GAB"
]
=
"T"
;
gc_std
[
"CUV"
]
=
"T"
;
gc_std_DNA
[
"CTV"
]
=
"T"
;
gc_std_rev
[
"GAV"
]
=
"T"
;
gc_std_DNA_rev
[
"GAV"
]
=
"T"
;
gc_std
[
"CUD"
]
=
"T"
;
gc_std_DNA
[
"CTD"
]
=
"T"
;
gc_std_rev
[
"GAD"
]
=
"T"
;
gc_std_DNA_rev
[
"GAD"
]
=
"T"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"4"
){
// Mycoplasma
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"5"
){
gc_std
[
"AGA"
]
=
"S"
;
gc_std_DNA
[
"AGA"
]
=
"S"
;
gc_std_rev
[
"UCU"
]
=
"S"
;
gc_std_DNA_rev
[
"TCT"
]
=
"S"
;
gc_std
[
"AGG"
]
=
"S"
;
gc_std_DNA
[
"AGG"
]
=
"S"
;
gc_std_rev
[
"UCC"
]
=
"S"
;
gc_std_DNA_rev
[
"TCC"
]
=
"S"
;
gc_std
[
"AGR"
]
=
"S"
;
gc_std_DNA
[
"AGR"
]
=
"S"
;
gc_std_rev
[
"UCY"
]
=
"S"
;
gc_std_DNA_rev
[
"TCY"
]
=
"S"
;
gc_std
[
"AUA"
]
=
"M"
;
gc_std_DNA
[
"ATA"
]
=
"M"
;
gc_std_rev
[
"UAU"
]
=
"M"
;
gc_std_DNA_rev
[
"TAT"
]
=
"M"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"6"
){
gc_std
[
"UAA"
]
=
"Q"
;
gc_std_DNA
[
"TAA"
]
=
"Q"
;
gc_std_rev
[
"AUU"
]
=
"Q"
;
gc_std_DNA_rev
[
"ATT"
]
=
"Q"
;
gc_std
[
"UAG"
]
=
"Q"
;
gc_std_DNA
[
"TAG"
]
=
"Q"
;
gc_std_rev
[
"AUC"
]
=
"Q"
;
gc_std_DNA_rev
[
"ATC"
]
=
"Q"
;
gc_std
[
"UAR"
]
=
"Q"
;
gc_std_DNA
[
"TAR"
]
=
"Q"
;
gc_std_rev
[
"AUY"
]
=
"Q"
;
gc_std_DNA_rev
[
"ATY"
]
=
"Q"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"9"
){
gc_std
[
"AAA"
]
=
"N"
;
gc_std_DNA
[
"AAA"
]
=
"N"
;
gc_std_rev
[
"UUU"
]
=
"N"
;
gc_std_DNA_rev
[
"TTT"
]
=
"N"
;
gc_std
[
"AGA"
]
=
"S"
;
gc_std_DNA
[
"AGA"
]
=
"S"
;
gc_std_rev
[
"UCU"
]
=
"S"
;
gc_std_DNA_rev
[
"TCT"
]
=
"S"
;
gc_std
[
"AGG"
]
=
"S"
;
gc_std_DNA
[
"AGG"
]
=
"S"
;
gc_std_rev
[
"UCC"
]
=
"S"
;
gc_std_DNA_rev
[
"TCC"
]
=
"S"
;
gc_std
[
"AGR"
]
=
"S"
;
gc_std_DNA
[
"AGR"
]
=
"S"
;
gc_std_rev
[
"UCY"
]
=
"S"
;
gc_std_DNA_rev
[
"TCY"
]
=
"S"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"10"
){
gc_std
[
"UGA"
]
=
"C"
;
gc_std_DNA
[
"TGA"
]
=
"C"
;
gc_std_rev
[
"ACU"
]
=
"C"
;
gc_std_DNA_rev
[
"ACT"
]
=
"C"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"12"
){
gc_std
[
"CUG"
]
=
"S"
;
gc_std_DNA
[
"CTG"
]
=
"S"
;
gc_std_rev
[
"GAC"
]
=
"S"
;
gc_std_DNA_rev
[
"GAC"
]
=
"S"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"13"
){
gc_std
[
"AGA"
]
=
"G"
;
gc_std_DNA
[
"AGA"
]
=
"G"
;
gc_std_rev
[
"UCU"
]
=
"G"
;
gc_std_DNA_rev
[
"TCT"
]
=
"G"
;
gc_std
[
"AGG"
]
=
"G"
;
gc_std_DNA
[
"AGG"
]
=
"G"
;
gc_std_rev
[
"UCC"
]
=
"G"
;
gc_std_DNA_rev
[
"TCC"
]
=
"G"
;
gc_std
[
"AGR"
]
=
"G"
;
gc_std_DNA
[
"AGR"
]
=
"G"
;
gc_std_rev
[
"UCY"
]
=
"G"
;
gc_std_DNA_rev
[
"TCY"
]
=
"G"
;
gc_std
[
"AUA"
]
=
"M"
;
gc_std_DNA
[
"ATA"
]
=
"M"
;
gc_std_rev
[
"UAU"
]
=
"M"
;
gc_std_DNA_rev
[
"TAT"
]
=
"M"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"14"
){
gc_std
[
"AAA"
]
=
"N"
;
gc_std_DNA
[
"AAA"
]
=
"N"
;
gc_std_rev
[
"UUU"
]
=
"N"
;
gc_std_DNA_rev
[
"TTT"
]
=
"N"
;
gc_std
[
"AGA"
]
=
"S"
;
gc_std_DNA
[
"AGA"
]
=
"S"
;
gc_std_rev
[
"UCU"
]
=
"S"
;
gc_std_DNA_rev
[
"TCT"
]
=
"S"
;
gc_std
[
"AGG"
]
=
"S"
;
gc_std_DNA
[
"AGG"
]
=
"S"
;
gc_std_rev
[
"UCC"
]
=
"S"
;
gc_std_DNA_rev
[
"TCC"
]
=
"S"
;
gc_std
[
"AGR"
]
=
"S"
;
gc_std_DNA
[
"AGR"
]
=
"S"
;
gc_std_rev
[
"UCY"
]
=
"S"
;
gc_std_DNA_rev
[
"TCY"
]
=
"S"
;
gc_std
[
"UAA"
]
=
"Y"
;
gc_std_DNA
[
"TAA"
]
=
"Y"
;
gc_std_rev
[
"AUU"
]
=
"Y"
;
gc_std_DNA_rev
[
"ATT"
]
=
"Y"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"16"
){
gc_std
[
"UAG"
]
=
"L"
;
gc_std_DNA
[
"TAG"
]
=
"L"
;
gc_std_rev
[
"AUC"
]
=
"L"
;
gc_std_DNA_rev
[
"ATC"
]
=
"L"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"21"
){
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
gc_std
[
"AUA"
]
=
"M"
;
gc_std_DNA
[
"ATA"
]
=
"M"
;
gc_std_rev
[
"UAU"
]
=
"M"
;
gc_std_DNA_rev
[
"TAT"
]
=
"M"
;
gc_std
[
"AGA"
]
=
"S"
;
gc_std_DNA
[
"AGA"
]
=
"S"
;
gc_std_rev
[
"UCU"
]
=
"S"
;
gc_std_DNA_rev
[
"TCT"
]
=
"S"
;
gc_std
[
"AGG"
]
=
"S"
;
gc_std_DNA
[
"AGG"
]
=
"S"
;
gc_std_rev
[
"UCC"
]
=
"S"
;
gc_std_DNA_rev
[
"TCC"
]
=
"S"
;
gc_std
[
"AGR"
]
=
"S"
;
gc_std_DNA
[
"AGR"
]
=
"S"
;
gc_std_rev
[
"UCY"
]
=
"S"
;
gc_std_DNA_rev
[
"TCY"
]
=
"S"
;
gc_std
[
"AAA"
]
=
"N"
;
gc_std_DNA
[
"AAA"
]
=
"N"
;
gc_std_rev
[
"UUU"
]
=
"N"
;
gc_std_DNA_rev
[
"TTT"
]
=
"N"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"22"
){
gc_std
[
"UCA"
]
=
"*"
;
gc_std_DNA
[
"TCA"
]
=
"*"
;
gc_std_rev
[
"AGU"
]
=
"*"
;
gc_std_DNA_rev
[
"AGT"
]
=
"*"
;
gc_std
[
"UAG"
]
=
"L"
;
gc_std_DNA
[
"TAG"
]
=
"L"
;
gc_std_rev
[
"AUC"
]
=
"L"
;
gc_std_DNA_rev
[
"ATC"
]
=
"L"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"23"
){
gc_std
[
"UUA"
]
=
"*"
;
gc_std_DNA
[
"TTA"
]
=
"*"
;
gc_std_rev
[
"AAU"
]
=
"*"
;
gc_std_DNA_rev
[
"AAT"
]
=
"*"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"24"
){
gc_std
[
"AGA"
]
=
"S"
;
gc_std_DNA
[
"AGA"
]
=
"S"
;
gc_std_rev
[
"UCU"
]
=
"S"
;
gc_std_DNA_rev
[
"TCT"
]
=
"S"
;
gc_std
[
"AGG"
]
=
"K"
;
gc_std_DNA
[
"AGG"
]
=
"K"
;
gc_std_rev
[
"UCC"
]
=
"K"
;
gc_std_DNA_rev
[
"TCC"
]
=
"K"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"25"
){
gc_std
[
"UGA"
]
=
"G"
;
gc_std_DNA
[
"TGA"
]
=
"G"
;
gc_std_rev
[
"ACU"
]
=
"G"
;
gc_std_DNA_rev
[
"ACT"
]
=
"G"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"26"
){
gc_std
[
"CUG"
]
=
"A"
;
gc_std_DNA
[
"CTG"
]
=
"A"
;
gc_std_rev
[
"GAC"
]
=
"A"
;
gc_std_DNA_rev
[
"GAC"
]
=
"A"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"29"
){
gc_std
[
"UAA"
]
=
"Y"
;
gc_std_DNA
[
"TAA"
]
=
"Y"
;
gc_std_rev
[
"AUU"
]
=
"Y"
;
gc_std_DNA_rev
[
"ATT"
]
=
"Y"
;
gc_std
[
"UAG"
]
=
"Y"
;
gc_std_DNA
[
"TAG"
]
=
"Y"
;
gc_std_rev
[
"AUC"
]
=
"Y"
;
gc_std_DNA_rev
[
"ATC"
]
=
"Y"
;
gc_std
[
"UAR"
]
=
"Y"
;
gc_std_DNA
[
"TAR"
]
=
"Y"
;
gc_std_rev
[
"AUY"
]
=
"Y"
;
gc_std_DNA_rev
[
"ATY"
]
=
"Y"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"30"
){
gc_std
[
"UAA"
]
=
"E"
;
gc_std_DNA
[
"TAA"
]
=
"E"
;
gc_std_rev
[
"AUU"
]
=
"E"
;
gc_std_DNA_rev
[
"ATT"
]
=
"E"
;
gc_std
[
"UAG"
]
=
"E"
;
gc_std_DNA
[
"TAG"
]
=
"E"
;
gc_std_rev
[
"AUC"
]
=
"E"
;
gc_std_DNA_rev
[
"ATC"
]
=
"E"
;
gc_std
[
"UAR"
]
=
"E"
;
gc_std_DNA
[
"TAR"
]
=
"E"
;
gc_std_rev
[
"AUY"
]
=
"E"
;
gc_std_DNA_rev
[
"ATY"
]
=
"E"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"33"
){
gc_std
[
"UAA"
]
=
"Y"
;
gc_std_DNA
[
"TAA"
]
=
"Y"
;
gc_std_rev
[
"AUU"
]
=
"Y"
;
gc_std_DNA_rev
[
"ATT"
]
=
"Y"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
gc_std
[
"AGA"
]
=
"S"
;
gc_std_DNA
[
"AGA"
]
=
"S"
;
gc_std_rev
[
"UCU"
]
=
"S"
;
gc_std_DNA_rev
[
"TCT"
]
=
"S"
;
gc_std
[
"AGG"
]
=
"K"
;
gc_std_DNA
[
"AGG"
]
=
"K"
;
gc_std_rev
[
"UCC"
]
=
"K"
;
gc_std_DNA_rev
[
"TCC"
]
=
"K"
;
}
}
// -11, (-27, -28, -31)
// N R Y M K S W H B V D
// N Y R K M S W D V B H
translate_FUNCTION.cpp
View file @
b4022c5
...
...
@@ -7,6 +7,7 @@
#include <algorithm>
#include <vector>
#include "split.h"
#include "alternate_codes.h"
// system detection
// docker
...
...
@@ -15,12 +16,35 @@
/* Global variables */
vector
<
int
>
multiFASTA_seqlen
;
map
<
string
,
string
>
genetic_code
;
vector
<
string
>
sCodon
;
vector
<
string
>
sCodon
s
;
int
minimum_length
=
0
;
// XXX PROTOTYPES
vector
<
string
>
complement_sCodons
(
vector
<
string
>
vec_codons
,
bool
rna
){
vector
<
string
>
cvec_codons
;
for
(
string
sCodon
:
vec_codons
){
string
codonCompl
;
for
(
char
&
c
:
sCodon
){
if
(
c
==
'A'
){
(
rna
)
?
codonCompl
+=
'U'
:
codonCompl
+=
'T'
;
}
if
(
c
==
'C'
){
codonCompl
+=
'G'
;
}
if
(
c
==
'G'
){
codonCompl
+=
'C'
;
}
if
(
c
==
'T'
){
codonCompl
+=
'A'
;
}
}
cvec_codons
.
push_back
(
codonCompl
);
}
return
cvec_codons
;
}
void
extend_until_print
(
string
&
codon
,
string
&
protein
,
string
&
nucleic
,
int
frame
,
char
posneg
,
ofstream
&
outfile
,
ofstream
&
outfileDNA
,
bool
backward
,
int
position
,
int
glob_vec_index
){
if
(
codon
.
length
()
==
3
){
...
...
@@ -42,8 +66,8 @@ void extend_until_print(string &codon, string &protein, string &nucleic, int fra
//reverse( nucleic.begin(), nucleic.end() ); // XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX
if
(
!
sCodon
.
empty
()){
if
(
find
(
sCodon
.
begin
(),
sCodon
.
end
(),
nucleic
.
substr
(
0
,
3
))
!=
sCodon
.
end
()){
// if nucleic codon is in the vector of start codons
if
(
!
sCodon
s
.
empty
()){
if
(
find
(
sCodon
s
.
begin
(),
sCodons
.
end
(),
nucleic
.
substr
(
0
,
3
))
!=
sCodons
.
end
()){
// if nucleic codon is in the vector of start codons
outfile
<<
frame
<<
posneg
<<
"
\t
"
<<
begin
<<
"-"
<<
position
<<
"
\t
"
<<
protein
<<
"
\n
"
;
outfileDNA
<<
frame
<<
posneg
<<
"
\t
"
<<
begin
<<
"-"
<<
position
<<
"
\t
"
<<
nucleic
<<
"
\n
"
;
}
...
...
@@ -169,126 +193,6 @@ int check_file_content(string fastaFileName, bool rna){
}
void
select_genetic_code
(
string
selection
){
cout
<<
"Genetic code selected: "
<<
selection
<<
endl
;
// The Vertebrate Mitochondrial Code
if
(
selection
==
"2"
){
gc_std
[
"AGA"
]
=
"*"
;
gc_std_DNA
[
"AGA"
]
=
"*"
;
gc_std_rev
[
"UCU"
]
=
"*"
;
gc_std_DNA_rev
[
"TCT"
]
=
"*"
;
gc_std
[
"AGG"
]
=
"*"
;
gc_std_DNA
[
"AGG"
]
=
"*"
;
gc_std_rev
[
"UCC"
]
=
"*"
;
gc_std_DNA_rev
[
"TCC"
]
=
"*"
;
gc_std
[
"AUA"
]
=
"M"
;
gc_std_DNA
[
"ATA"
]
=
"M"
;
gc_std_rev
[
"UAU"
]
=
"M"
;
gc_std_DNA_rev
[
"TAT"
]
=
"M"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"3"
){
gc_std
[
"AUA"
]
=
"M"
;
gc_std_DNA
[
"ATA"
]
=
"M"
;
gc_std_rev
[
"UAU"
]
=
"M"
;
gc_std_DNA_rev
[
"TAT"
]
=
"M"
;
gc_std
[
"CUU"
]
=
"T"
;
gc_std_DNA
[
"CTT"
]
=
"T"
;
gc_std_rev
[
"GAA"
]
=
"T"
;
gc_std_DNA_rev
[
"GAA"
]
=
"T"
;
gc_std
[
"CUC"
]
=
"T"
;
gc_std_DNA
[
"CTC"
]
=
"T"
;
gc_std_rev
[
"GAG"
]
=
"T"
;
gc_std_DNA_rev
[
"GAG"
]
=
"T"
;
gc_std
[
"CUA"
]
=
"T"
;
gc_std_DNA
[
"CTA"
]
=
"T"
;
gc_std_rev
[
"GAU"
]
=
"T"
;
gc_std_DNA_rev
[
"GAT"
]
=
"T"
;
gc_std
[
"CUG"
]
=
"T"
;
gc_std_DNA
[
"CTG"
]
=
"T"
;
gc_std_rev
[
"GAC"
]
=
"T"
;
gc_std_DNA_rev
[
"GAC"
]
=
"T"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"4"
){
// Mycoplasma
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"5"
){
gc_std
[
"AGA"
]
=
"S"
;
gc_std_DNA
[
"AGA"
]
=
"S"
;
gc_std_rev
[
"UCU"
]
=
"S"
;
gc_std_DNA_rev
[
"TCT"
]
=
"S"
;
gc_std
[
"AGG"
]
=
"S"
;
gc_std_DNA
[
"AGG"
]
=
"S"
;
gc_std_rev
[
"UCC"
]
=
"S"
;
gc_std_DNA_rev
[
"TCC"
]
=
"S"
;
gc_std
[
"AUA"
]
=
"M"
;
gc_std_DNA
[
"ATA"
]
=
"M"
;
gc_std_rev
[
"UAU"
]
=
"M"
;
gc_std_DNA_rev
[
"TAT"
]
=
"M"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"6"
){
gc_std
[
"UAA"
]
=
"Q"
;
gc_std_DNA
[
"TAA"
]
=
"Q"
;
gc_std_rev
[
"AUU"
]
=
"Q"
;
gc_std_DNA_rev
[
"ATT"
]
=
"Q"
;
gc_std
[
"UAG"
]
=
"Q"
;
gc_std_DNA
[
"TAG"
]
=
"Q"
;
gc_std_rev
[
"AUC"
]
=
"Q"
;
gc_std_DNA_rev
[
"ATC"
]
=
"Q"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"9"
){
gc_std
[
"AAA"
]
=
"N"
;
gc_std_DNA
[
"AAA"
]
=
"N"
;
gc_std_rev
[
"UUU"
]
=
"N"
;
gc_std_DNA_rev
[
"TTT"
]
=
"N"
;
gc_std
[
"AGA"
]
=
"S"
;
gc_std_DNA
[
"AGA"
]
=
"S"
;
gc_std_rev
[
"UCU"
]
=
"S"
;
gc_std_DNA_rev
[
"TCT"
]
=
"S"
;
gc_std
[
"AGG"
]
=
"S"
;
gc_std_DNA
[
"AGG"
]
=
"S"
;
gc_std_rev
[
"UCC"
]
=
"S"
;
gc_std_DNA_rev
[
"TCC"
]
=
"S"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"10"
){
gc_std
[
"UGA"
]
=
"C"
;
gc_std_DNA
[
"TGA"
]
=
"C"
;
gc_std_rev
[
"ACU"
]
=
"C"
;
gc_std_DNA_rev
[
"ACT"
]
=
"C"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"12"
){
gc_std
[
"CUG"
]
=
"S"
;
gc_std_DNA
[
"CTG"
]
=
"S"
;
gc_std_rev
[
"GAC"
]
=
"S"
;
gc_std_DNA_rev
[
"GAC"
]
=
"S"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"13"
){
gc_std
[
"AGA"
]
=
"G"
;
gc_std_DNA
[
"AGA"
]
=
"G"
;
gc_std_rev
[
"UCU"
]
=
"G"
;
gc_std_DNA_rev
[
"TCT"
]
=
"G"
;
gc_std
[
"AGG"
]
=
"G"
;
gc_std_DNA
[
"AGG"
]
=
"G"
;
gc_std_rev
[
"UCC"
]
=
"G"
;
gc_std_DNA_rev
[
"TCC"
]
=
"G"
;
gc_std
[
"AUA"
]
=
"M"
;
gc_std_DNA
[
"ATA"
]
=
"M"
;
gc_std_rev
[
"UAU"
]
=
"M"
;
gc_std_DNA_rev
[
"TAT"
]
=
"M"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"14"
){
gc_std
[
"AAA"
]
=
"N"
;
gc_std_DNA
[
"AAA"
]
=
"N"
;
gc_std_rev
[
"UUU"
]
=
"N"
;
gc_std_DNA_rev
[
"TTT"
]
=
"N"
;
gc_std
[
"AGA"
]
=
"S"
;
gc_std_DNA
[
"AGA"
]
=
"S"
;
gc_std_rev
[
"UCU"
]
=
"S"
;
gc_std_DNA_rev
[
"TCT"
]
=
"S"
;
gc_std
[
"AGG"
]
=
"S"
;
gc_std_DNA
[
"AGG"
]
=
"S"
;
gc_std_rev
[
"UCC"
]
=
"S"
;
gc_std_DNA_rev
[
"TCC"
]
=
"S"
;
gc_std
[
"UAA"
]
=
"Y"
;
gc_std_DNA
[
"TAA"
]
=
"Y"
;
gc_std_rev
[
"AUU"
]
=
"Y"
;
gc_std_DNA_rev
[
"ATT"
]
=
"Y"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"16"
){
gc_std
[
"UAG"
]
=
"L"
;
gc_std_DNA
[
"TAG"
]
=
"L"
;
gc_std_rev
[
"AUC"
]
=
"L"
;
gc_std_DNA_rev
[
"ATC"
]
=
"L"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"21"
){
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
gc_std
[
"AUA"
]
=
"M"
;
gc_std_DNA
[
"ATA"
]
=
"M"
;
gc_std_rev
[
"UAU"
]
=
"M"
;
gc_std_DNA_rev
[
"TAT"
]
=
"M"
;
gc_std
[
"AGA"
]
=
"S"
;
gc_std_DNA
[
"AGA"
]
=
"S"
;
gc_std_rev
[
"UCU"
]
=
"S"
;
gc_std_DNA_rev
[
"TCT"
]
=
"S"
;
gc_std
[
"AGG"
]
=
"S"
;
gc_std_DNA
[
"AGG"
]
=
"S"
;
gc_std_rev
[
"UCC"
]
=
"S"
;
gc_std_DNA_rev
[
"TCC"
]
=
"S"
;
gc_std
[
"AAA"
]
=
"N"
;
gc_std_DNA
[
"AAA"
]
=
"N"
;
gc_std_rev
[
"UUU"
]
=
"N"
;
gc_std_DNA_rev
[
"TTT"
]
=
"N"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"22"
){
gc_std
[
"UCA"
]
=
"*"
;
gc_std_DNA
[
"TCA"
]
=
"*"
;
gc_std_rev
[
"AGU"
]
=
"*"
;
gc_std_DNA_rev
[
"AGT"
]
=
"*"
;
gc_std
[
"UAG"
]
=
"L"
;
gc_std_DNA
[
"TAG"
]
=
"L"
;
gc_std_rev
[
"AUC"
]
=
"L"
;
gc_std_DNA_rev
[
"ATC"
]
=
"L"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"23"
){
gc_std
[
"UUA"
]
=
"*"
;
gc_std_DNA
[
"TTA"
]
=
"*"
;
gc_std_rev
[
"AAU"
]
=
"*"
;
gc_std_DNA_rev
[
"AAT"
]
=
"*"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"24"
){
gc_std
[
"AGA"
]
=
"S"
;
gc_std_DNA
[
"AGA"
]
=
"S"
;
gc_std_rev
[
"UCU"
]
=
"S"
;
gc_std_DNA_rev
[
"TCT"
]
=
"S"
;
gc_std
[
"AGG"
]
=
"K"
;
gc_std_DNA
[
"AGG"
]
=
"K"
;
gc_std_rev
[
"UCC"
]
=
"K"
;
gc_std_DNA_rev
[
"TCC"
]
=
"K"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"25"
){
gc_std
[
"UGA"
]
=
"G"
;
gc_std_DNA
[
"TGA"
]
=
"G"
;
gc_std_rev
[
"ACU"
]
=
"G"
;
gc_std_DNA_rev
[
"ACT"
]
=
"G"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"26"
){
gc_std
[
"CUG"
]
=
"A"
;
gc_std_DNA
[
"CTG"
]
=
"A"
;
gc_std_rev
[
"GAC"
]
=
"A"
;
gc_std_DNA_rev
[
"GAC"
]
=
"A"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"29"
){
gc_std
[
"UAA"
]
=
"Y"
;
gc_std_DNA
[
"TAA"
]
=
"Y"
;
gc_std_rev
[
"AUU"
]
=
"Y"
;
gc_std_DNA_rev
[
"ATT"
]
=
"Y"
;
gc_std
[
"UAG"
]
=
"Y"
;
gc_std_DNA
[
"TAG"
]
=
"Y"
;
gc_std_rev
[
"AUC"
]
=
"Y"
;
gc_std_DNA_rev
[
"ATC"
]
=
"Y"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"30"
){
gc_std
[
"UAA"
]
=
"E"
;
gc_std_DNA
[
"TAA"
]
=
"E"
;
gc_std_rev
[
"AUU"
]
=
"E"
;
gc_std_DNA_rev
[
"ATT"
]
=
"E"
;
gc_std
[
"UAG"
]
=
"E"
;
gc_std_DNA
[
"TAG"
]
=
"E"
;
gc_std_rev
[
"AUC"
]
=
"E"
;
gc_std_DNA_rev
[
"ATC"
]
=
"E"
;
}
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
else
if
(
selection
==
"33"
){
gc_std
[
"UAA"
]
=
"Y"
;
gc_std_DNA
[
"TAA"
]
=
"Y"
;
gc_std_rev
[
"AUU"
]
=
"Y"
;
gc_std_DNA_rev
[
"ATT"
]
=
"Y"
;
gc_std
[
"UGA"
]
=
"W"
;
gc_std_DNA
[
"TGA"
]
=
"W"
;
gc_std_rev
[
"ACU"
]
=
"W"
;
gc_std_DNA_rev
[
"ACT"
]
=
"W"
;
gc_std
[
"AGA"
]
=
"S"
;
gc_std_DNA
[
"AGA"
]
=
"S"
;
gc_std_rev
[
"UCU"
]
=
"S"
;
gc_std_DNA_rev
[
"TCT"
]
=
"S"
;
gc_std
[
"AGG"
]
=
"K"
;
gc_std_DNA
[
"AGG"
]
=
"K"
;
gc_std_rev
[
"UCC"
]
=
"K"
;
gc_std_DNA_rev
[
"TCC"
]
=
"K"
;
}
}
// -11, (-27, -28, -31)
// input list of files XXX
...
...
@@ -344,7 +248,8 @@ int main(int argc, char** argv){
puts
(
"Start codon: no specific amino acid selected"
);
}
else
{
sCodon
=
split
(
startCodon
,
'-'
);
sCodons
=
split
(
startCodon
,
'-'
);
// XXX Check alternate
}
/* Check input file content */
...
...
@@ -361,8 +266,7 @@ int main(int argc, char** argv){
/* Modifications to the standard genetic code */
select_genetic_code
(
alt_gene_code
);
// XXX option pour Alternative Initiation Codons
// OU BIEN fichier txt lu
// XXX Check value
/* Filenames: temporary AND final output */
...
...
@@ -406,6 +310,7 @@ int main(int argc, char** argv){
/* Translation 3'-5' */
puts
(
"3'-5' translation..."
);
(
rna
)
?
genetic_code
=
gc_std_rev
:
genetic_code
=
gc_std_DNA_rev
;
sCodons
=
complement_sCodons
(
sCodons
,
rna
);
translate_3_frames
(
tmp_reversed_input
,
tmp_35_pro_file
,
tmp_35_dna_file
,
true
);
puts
(
"Done"
);
...
...
Please
register
or
login
to post a comment