rna.cpp
3.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include "rna.h"
#include "nupack.h"
#include <iostream>
#include <string>
#include <utility>
#include <vector>
using std::cout, std::cerr, std::endl;
RNA::RNA(string name, string seq)
{
if (!check_seq(seq)) {
cerr << "Unknown chars in input sequence. Please restrict to ACGTU." << endl;
exit(EXIT_FAILURE);
}
name_ = name;
seq_ = seq;
format();
n_ = seq_.size();
cout << "\t>formatted sequence" << endl;
/*define type_*/
type_ = vector<vector<int>>(n_, vector<int>(n_));
for (int i = 0; i < n_; i++) {
for (int j = 0; j < n_; j++) {
if (i < j) {
std::stringstream ss;
ss << seq_[i] << seq_[j];
string str = ss.str();
if (str.compare("AU") == 0) {
type_[i][j] = 1;
} else if (str.compare("CG") == 0) {
type_[i][j] = 2;
} else if (str.compare("GC") == 0) {
type_[i][j] = 3;
} else if (str.compare("GU") == 0) {
type_[i][j] = 4;
} else if (str.compare("UG") == 0) {
type_[i][j] = 5;
} else if (str.compare("UA") == 0) {
type_[i][j] = 6;
} else {
type_[i][j] = 0;
}
} else {
type_[i][j] = 0;
}
}
}
nBP_ = type_.size();
/*define coord_*/
for (int i = 0; i < n_; i++) {
for (int j = 0; j < n_; j++) {
if (i < j and type_[i][j] > 0) {
if (i != 0 and i != n_ and j != 0 and j != n_) {
if (type_[i - 1][j + 1] > 0 or type_[i + 1][j - 1] > 0) {
coord_.push_back(std::make_pair(i, j));
}
} else if (i == 0 or j == n_) {
if (type_[i + 1][j - 1] > 0) {
coord_.push_back(std::make_pair(i, j));
}
}
}
}
}
/*define pij_*/
vector<float> bp;
vector<int> offset;
Nupack nu;
// nu.load_parameters("rna1999.dG");
nu.load_default_parameters();
cout << "\t>default parameters loaded (Serra and Turner, 1995)" << endl;
nu.load_sequence(seq_);
cout << "\t>computing pairing probabilities..." << endl;
try {
nu.calculate_partition_function();
} catch (std::exception& e) {
cerr << e.what() << endl;
exit(EXIT_FAILURE);
}
nu.calculate_posterior();
nu.get_posterior(bp, offset);
pij_ = vector<vector<float>>(n_, vector<float>(n_));
for (int i = 1; i <= n_; i++) {
for (int j = 1; j <= n_; j++) {
pij_[i - 1][j - 1] = bp[offset[i] + j];
}
}
cout << "\t>pairing probabilities defined" << endl;
}
int RNA::find_coord(pair<int, int> p)
{
vector<pair<int, int>>::iterator it = find(coord_.begin(), coord_.end(), p);
int r = -1;
if (it != coord_.end()) r = distance(coord_.begin(), it);
return r;
}
bool RNA::check_seq(string seq) // Checks if the sequences only contains ACGUT.
{
bool res = true;
for (unsigned int i = 0; i < seq.size(); i++) {
if (seq[i] != 'A' and seq[i] != 'U' and seq[i] != 'C' and seq[i] != 'G' and seq[i] != 'T') {
res = false;
break;
}
}
return res;
}
void RNA::format()
{
for (unsigned int i = 0; i < seq_.size(); i++) {
seq_[i] = toupper(seq_[i]);
if (seq_[i] == 'T') {
seq_[i] = 'U';
break;
}
}
}