rna.cpp
3.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#include <iostream>
extern "C"
{
#include <ViennaRNA/part_func_window.h>
}
#include "rna.h"
using namespace Eigen;
using std::cerr;
using std::cout;
using std::endl;
using std::ofstream;
using std::string;
using std::vector;
RNA::RNA(void) {}
RNA::RNA(string name, string seq, bool verbose)
: verbose_{verbose}, name_(name), seq_(seq), n_(seq.size()),
pij_(MatrixXf::Zero(n_, n_))
{
vector<char> unknown_chars;
bool contains_T = false;
for (char c : seq) {
if (c == 'T' or c == 't') {
c = 'U';
contains_T = true;
}
if (base_type(c) == BASE_N) {
unknown_chars.push_back(c);
}
}
if (contains_T)
if (verbose_) cout << "\tWARNING: Thymines automatically replaced by uraciles.";
if (unknown_chars.size() > 0) {
if (verbose_) cout << "\tWARNING: Unknown chars in input sequence ignored : ";
for (char c : unknown_chars)
if (verbose_) cout << c << " ";
if (verbose_) cout << endl;
}
if (verbose_) cout << "\t> Sequence formatted" << endl;
// Compute using ViennaRNA
if (verbose_) cout << "\t> Computing pairing probabilities (ViennaRNA's pfl_fold)..." << endl;
const char *cseq = seq.c_str();
int window_size = 100;
int max_bp_span = 100;
float cutoff = 1e-6;
vrna_ep_t* results = vrna_pfl_fold(cseq, window_size, max_bp_span, cutoff);
if (results != NULL)
{
while (results->i != 0 && results->j != 0)
{
pij_(results->i-1,results->j-1) = results->p;
results++;
}
/*define type_*/
type_ = vector<vector<int>>(n_, vector<int>(n_));
for(uint i = 0; i < n_; i++){
for(uint j = 0; j < n_; j++){
if (i < j){
std::stringstream ss;
ss << seq_[i] << seq_[j];
std::string str = ss.str();
if(str.compare("AU") == 0 ){
type_[i][j] = 1;
}
else if(str.compare("CG") == 0 ){
type_[i][j] = 2;
}
else if(str.compare("GC") == 0 ){
type_[i][j] = 3;
}
else if(str.compare("GU") == 0 ){
type_[i][j] = 4;
}
else if(str.compare("UG") == 0 ){
type_[i][j] = 5;
}
else if(str.compare("UA") == 0 ){
type_[i][j] = 6;
}
else{
type_[i][j] = 0;
}
}
else{
type_[i][j] = 0;
}
}
}
}
else cerr << "NULL result returned by vrna_pfl_fold" << endl;
}
void RNA::print_basepair_p_matrix(float theta) const
{
cout << endl;
cout << "\t=== -log10(p(i,j)) for each pair (i,j) of nucleotides: ===" << endl << endl;
cout << "\t" << seq_ << endl;
uint i = 0;
for (uint u = 0; u < pij_.rows(); u++) {
cout << "\t";
for (uint v = 0; v < pij_.cols(); v++) {
if (pij_(u, v) < 5e-10)
cout << " ";
else if (pij_(u, v) > theta)
cout << "\033[0;32m" << int(-log10(pij_(u, v))) << "\033[0m";
else
cout << int(-log10(pij_(u, v)));
}
cout << seq_[i] << endl;
i++;
}
cout << endl << "\t\033[0;32mgreen\033[0m basepairs are kept as decision variables." << endl << endl;
}
base_t RNA::base_type(char x) const
{
if (x == 'a' or x == 'A') return BASE_A;
if (x == 'c' or x == 'C') return BASE_C;
if (x == 'g' or x == 'G') return BASE_G;
if (x == 'u' or x == 'U') return BASE_U;
return BASE_N;
}