Showing
35 changed files
with
1534 additions
and
163 deletions
EditMe
0 → 100644
1 | +# Please set the following variables to the correct paths for JAR3D: | ||
2 | +jar3dexec="/home/persalteas/Software/jar3dbin/jar3d_2014-12-11.jar" | ||
3 | +ILmotifDir="/home/persalteas/Data/RNA/motifs/Matlab_results/IL/3.2/lib" | ||
4 | +HLmotifDir="/home/persalteas/Data/RNA/motifs/Matlab_results/HL/3.2/lib" | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
1 | -# ------------------------------------------------ | 1 | +ICONCERT=/opt/ibm/ILOG/CPLEX_Studio_Community128/concert/include |
2 | -# Generic Makefile | 2 | +ICPLEX=/opt/ibm/ILOG/CPLEX_Studio_Community128/cplex/include |
3 | -# ------------------------------------------------ | 3 | +LCONCERT=/opt/ibm/ILOG/CPLEX_Studio_Community128/concert/lib/x86-64_linux/static_pic/ |
4 | +LCPLEX=/opt/ibm/ILOG/CPLEX_Studio_Community128/cplex/lib/x86-64_linux/static_pic/ | ||
4 | 5 | ||
5 | # project name (generate executable with this name) | 6 | # project name (generate executable with this name) |
6 | -TARGET = motifscan | 7 | +TARGET = biominserter |
7 | 8 | ||
8 | CC = g++ | 9 | CC = g++ |
9 | # compiling flags here | 10 | # compiling flags here |
10 | -CFLAGS = -I. -O3 | 11 | +CFLAGS = -Icppsrc/ -I$(ICONCERT) -I$(ICPLEX) -O3 |
11 | -CXXFLAGS = -std=c++17 -Wall -Wpedantic -Wextra | 12 | +CXXFLAGS = -std=c++17 -Wall -Wpedantic -Wextra -Wno-ignored-attributes |
12 | 13 | ||
13 | LINKER = g++ | 14 | LINKER = g++ |
14 | # linking flags here | 15 | # linking flags here |
15 | -LDFLAGS = -lboost_system -lboost_filesystem -lboost_program_options | 16 | +LDFLAGS = -lconcert -lilocplex -lcplex -lm -lpthread -ldl -lboost_system -lboost_filesystem -lboost_program_options -L$(LCONCERT) -L$(LCPLEX) |
16 | 17 | ||
17 | # change these to proper directories where each file should be | 18 | # change these to proper directories where each file should be |
18 | -SRCDIR = src | 19 | +SRCDIR = cppsrc |
19 | OBJDIR = obj | 20 | OBJDIR = obj |
20 | BINDIR = bin | 21 | BINDIR = bin |
21 | 22 | ... | ... |
1 | -# RNA_Motifs_Inserter | 1 | +This is a bi-objective integer programming algorithm. |
2 | -A C++ software interface to RNA motif databases, and algorithms to include known motifs in raw sequences. | 2 | +It predicts the secondary structure of a RNA sequence with pieces of 3D information (non-canonical contacts) at some places, |
3 | +by identifying zones that can fold like known motifs. | ||
4 | + | ||
5 | +1/ How it works | ||
6 | +=================================== | ||
7 | +INPUT: | ||
8 | +- An RNA sequence (tested with sequences ~100 bases) | ||
9 | + | ||
10 | +THEN | ||
11 | +- Identifies possible 2D folds with RNAsubopt. | ||
12 | +- Knowing possible 2D folds, locate every possibly unpaired loop (hairpin loop, internal loop, multiple junction...) | ||
13 | +- align each unpaired loop to the catalogue of models of known RNA motifs (The 3D Motif Atlas of the BGSU RNA group) | ||
14 | +- retrieve a list of potential motif-insertion-sites in the RNA sequence. Use them to define the constraints for the IP problem. | ||
15 | +- Solve a bi-objective IP problem: | ||
16 | + * Maximize the expected accuracy of the secondary structure, | ||
17 | + * Maximize the number and size of motifs inserted in the structure. | ||
18 | + | ||
19 | +OUTPUT: | ||
20 | +- A set of secondary structures from the pareto front, | ||
21 | +- The list of known motif inserted in the corresponding structures (and the non-canonical contacts) | ||
22 | +- (Optionally, lower score structures from k-Pareto sets.) | ||
23 | + | ||
24 | +2/ Installation | ||
25 | +================================== | ||
26 | +- Download and install RNAsubopt from the ViennaRNA package (https://www.tbi.univie.ac.at/RNA/) | ||
27 | +- Download and install IBM ILOG Cplex optimization studio (https://www.ibm.com/analytics/cplex-optimizer), free academic account required | ||
28 | +- Download and install Java runtime (Tested with Java 10) | ||
29 | +- Download and install the latest JAR3D executable "jar3d_releasedate.jar" and motif models in this folder (http://rna.bgsu.edu/data/jar3d/models/) | ||
30 | + Note that for HL and ILs, only the latest version is required (not all the versions provided in the folders). | ||
31 | +- Download and install a C++ compiler and building dependencies and utilities (g++ or clang, automake, libboost) | ... | ... |
cppsrc/MOIP.cpp
0 → 100644
1 | +#include "MOIP.h" | ||
2 | +#include "SecondaryStructure.h" | ||
3 | +#include "rna.h" | ||
4 | +#include <algorithm> | ||
5 | +#include <boost/format.hpp> | ||
6 | +#include <cfloat> | ||
7 | +#include <cmath> | ||
8 | +#include <cstdlib> | ||
9 | +#include <iostream> | ||
10 | +#include <limits> | ||
11 | +#include <stdexcept> | ||
12 | +#include <utility> | ||
13 | +#include <vector> | ||
14 | + | ||
15 | +using std::cerr, std::cout, std::endl; | ||
16 | +using std::make_pair; | ||
17 | +using std::vector; | ||
18 | + | ||
19 | +uint MOIP::ncores = 0; | ||
20 | + | ||
21 | +MOIP::MOIP(const RNA& rna, const vector<Motif>& insertionSites) | ||
22 | +: rna_(rna), insertion_sites_(insertionSites), beta_(1.0), theta_{1.0 / (2.0 + 1.0)} | ||
23 | +{ | ||
24 | + basepair_dv_ = IloNumVarArray(env_); | ||
25 | + insertion_dv_ = IloNumVarArray(env_); | ||
26 | + | ||
27 | + // Add the y^u_v decision variables | ||
28 | + uint u, v, c = 0; | ||
29 | + index_of_yuv_ = vector<vector<size_t>>(rna_.get_RNA_length() - 6, vector<size_t>(0)); | ||
30 | + for (u = 0; u < rna_.get_RNA_length() - 6; u++) { | ||
31 | + for (v = u + 4; v < rna_.get_RNA_length(); v++) // A basepair is possible iff v > u+3 | ||
32 | + { | ||
33 | + index_of_yuv_[u].push_back(c); | ||
34 | + c++; | ||
35 | + char name[15]; | ||
36 | + sprintf(name, "y%d,%d", u, v); | ||
37 | + basepair_dv_.add(IloNumVar(env_, 0, 1, IloNumVar::Bool, name)); // A boolean whether u and v are paired | ||
38 | + } | ||
39 | + } | ||
40 | + // Add the Cx,i,p decision variables | ||
41 | + index_of_first_components.reserve(insertionSites.size()); | ||
42 | + index_of_Cxip_ = vector<vector<size_t>>(0); | ||
43 | + index_of_Cxip_.reserve(insertionSites.size()); | ||
44 | + size_t i = 0; | ||
45 | + for (const Motif m : insertionSites) { | ||
46 | + index_of_first_components.push_back(i); | ||
47 | + index_of_Cxip_.push_back(vector<size_t>(0)); | ||
48 | + for (const Component c : m.comp) { | ||
49 | + index_of_Cxip_.back().push_back(i); | ||
50 | + if (c.k > 0) i++; | ||
51 | + char name[20]; | ||
52 | + sprintf( | ||
53 | + name, | ||
54 | + "C%d,%d-%d", | ||
55 | + static_cast<int>(index_of_Cxip_.size() - 1), | ||
56 | + static_cast<int>(index_of_Cxip_.back().size() - 1), | ||
57 | + c.pos.first); | ||
58 | + insertion_dv_.add(IloNumVar(env_, 0, 1, IloNumVar::Bool, name)); // A boolean whether component i of motif x is inserted at position p | ||
59 | + } | ||
60 | + } | ||
61 | + cout << i + c << " decision variables are used !" << endl; | ||
62 | +} | ||
63 | + | ||
64 | + | ||
65 | +MOIP::~MOIP() { env_.end(); } | ||
66 | + | ||
67 | + | ||
68 | +bool MOIP::is_undominated_yet(const SecondaryStructure& s) | ||
69 | +{ | ||
70 | + for (uint i = 0; i < pareto_.size(); i++) { | ||
71 | + if (pareto_[i] > s) return false; | ||
72 | + } | ||
73 | + return true; | ||
74 | +} | ||
75 | + | ||
76 | +void MOIP::solve_objective(int o, double min, double max) | ||
77 | +{ | ||
78 | + IloModel model_ = IloModel(env_); | ||
79 | + cout << "Solving objective function " << o << "..." << endl; | ||
80 | + add_problem_constraints(model_); | ||
81 | + switch (o) { | ||
82 | + case 1: { | ||
83 | + // Add the motif objective function | ||
84 | + IloExpr obj1 = IloExpr(env_); | ||
85 | + for (uint i = 0; i < insertion_sites_.size(); i++) { | ||
86 | + IloNum n_compo_squared = IloNum(insertion_sites_[i].comp.size() * insertion_sites_[i].comp.size()); | ||
87 | + obj1 += n_compo_squared * insertion_dv_[index_of_first_components[i]]; | ||
88 | + } | ||
89 | + model_.add(IloMinimize(env_, obj1)); | ||
90 | + } break; | ||
91 | + case 2: { | ||
92 | + // Add the likelihood objective function | ||
93 | + } | ||
94 | + } | ||
95 | + IloCplex cplex_ = IloCplex(model_); | ||
96 | + if (!cplex_.solve()) { | ||
97 | + env_.error() << "\t>Failed to optimize LP." << endl; | ||
98 | + throw(-1); | ||
99 | + } | ||
100 | + IloNumArray basepair_values(env_); | ||
101 | + IloNumArray insertion_values(env_); | ||
102 | + env_.out() << endl << "Solution status = " << cplex_.getStatus() << endl; | ||
103 | + env_.out() << endl << "Objective value = " << cplex_.getObjValue() << endl; | ||
104 | + cplex_.getValues(basepair_values, basepair_dv_); | ||
105 | + env_.out() << endl << "Basepair Values = " << basepair_values << endl; | ||
106 | + cplex_.getValues(insertion_values, basepair_dv_); | ||
107 | + env_.out() << endl << "Insertion Values = " << insertion_values << endl; | ||
108 | + | ||
109 | + // TODO : retrieve the secondary structure !! | ||
110 | +} | ||
111 | + | ||
112 | +void MOIP::add_problem_constraints(const IloModel& model_) | ||
113 | +{ | ||
114 | + // ensure there only is 0 or 1 pairing by nucleotide: | ||
115 | + cout << "\t>ensuring there are 0 or 1 pairing by nucleotide..." << endl; | ||
116 | + uint u, v; | ||
117 | + uint n = rna_.get_RNA_length(); | ||
118 | + for (u = 0; u < n - 6; u++) { | ||
119 | + IloExpr c1(env_); | ||
120 | + for (v = 0; v < u; v++) | ||
121 | + if (allowed_basepair(v, u)) c1 += y(v, u); | ||
122 | + for (v = u + 4; v < n; v++) | ||
123 | + if (allowed_basepair(u, v)) c1 += y(u, v); | ||
124 | + model_.add(c1 <= 1); | ||
125 | + // cout << "\t>It worked for base " << u << " : " << (c1 <= 1) << endl; | ||
126 | + } | ||
127 | + // forbid lonely basepairs | ||
128 | + cout << "\t>forbidding lonely basepairs..." << endl; | ||
129 | + for (u = 0; u < n - 6; u++) { | ||
130 | + IloExpr c2(env_); // for the case where s[u] is paired to s[v], v>u | ||
131 | + c2 += 1; | ||
132 | + for (v = u; v < n; v++) | ||
133 | + if (allowed_basepair(u - 1, v)) c2 += y(u - 1, v); | ||
134 | + for (v = u + 1; v < n; v++) | ||
135 | + if (allowed_basepair(u, v)) c2 -= y(u, v); | ||
136 | + for (v = u + 2; v < n; v++) | ||
137 | + if (allowed_basepair(u + 1, v)) c2 += y(u + 1, v); | ||
138 | + model_.add(c2 >= 1); | ||
139 | + // cout << "\t>It worked for base " << u << " : " << (c2 >= 1) << endl; | ||
140 | + } | ||
141 | + for (v = 5; v < n; v++) { | ||
142 | + IloExpr c2p(env_); // for the case where s[u] is paired to s[v], v<u | ||
143 | + c2p += 1; | ||
144 | + for (u = 1; u <= v - 2; u++) | ||
145 | + if (allowed_basepair(u, v - 1)) c2p += y(u, v - 1); | ||
146 | + for (u = 1; u <= v - 1; u++) | ||
147 | + if (allowed_basepair(u, v)) c2p -= y(u, v); | ||
148 | + for (u = 1; u <= v; u++) | ||
149 | + if (allowed_basepair(u, v + 1)) c2p += y(u, v + 1); | ||
150 | + model_.add(c2p >= 1); | ||
151 | + // cout << "\t>It worked for base " << u << " : " << (c2p >= 1) << endl; | ||
152 | + } | ||
153 | + // Forbid pairings inside every motif component if included | ||
154 | + cout << "\t>forbidding basepairs inside included motif's components..." << endl; | ||
155 | + for (size_t i = 0; i < insertion_sites_.size(); i++) { | ||
156 | + Motif& x = insertion_sites_[i]; | ||
157 | + for (size_t j = 0; j < x.comp.size(); j++) { | ||
158 | + Component& c = x.comp[j]; | ||
159 | + IloExpr c3(env_); | ||
160 | + IloNum kxi = IloNum(c.k); | ||
161 | + c3 += kxi * C(i, j); | ||
162 | + for (u = c.pos.first; u < c.pos.second; u++) { | ||
163 | + for (v = 0; v < n; v++) | ||
164 | + if (allowed_basepair(u, v)) c3 += y(u, v); | ||
165 | + } | ||
166 | + model_.add(c3 <= kxi); | ||
167 | + } | ||
168 | + } | ||
169 | + // To be continued ... | ||
170 | +} | ||
171 | + | ||
172 | +void MOIP::extend_pareto(double lambdaMin, double lambdaMax) | ||
173 | +{ | ||
174 | + if (lambdaMin >= lambdaMax) { | ||
175 | + cerr << "lambdaMax < lambdaMin, something's wrong !" << endl; | ||
176 | + exit(EXIT_FAILURE); | ||
177 | + } | ||
178 | + | ||
179 | + // for any SecondaryStructure in pareto_ such that the value of the second | ||
180 | + // objective is between lambdaMin and lambdaMax | ||
181 | + // a DIFF() constraint and a mirror constraint is added | ||
182 | + for (uint i = 0; i < pareto_.size(); i++) { | ||
183 | + // DIFF() | ||
184 | + if ( | ||
185 | + (abs(pareto_[i].get_objective_score(2) - lambdaMin) < PRECISION or pareto_[i].get_objective_score(2) > lambdaMin) and | ||
186 | + (abs(pareto_[i].get_objective_score(2) - lambdaMax) < PRECISION or pareto_[i].get_objective_score(2) < lambdaMax)) { | ||
187 | + // ip.add_bj_ct(pareto_[i]); | ||
188 | + } | ||
189 | + // mirror | ||
190 | + // if ( | ||
191 | + // (abs(pareto_[i].get_obj2M_() - lambdaMin) < PRECISION or pareto_[i].get_obj2M_() > lambdaMin) and | ||
192 | + // (abs(pareto_[i].get_obj2M_() - lambdaMax) < PRECISION or pareto_[i].get_obj2M_() < lambdaMax)) { | ||
193 | + // ip.add_bj_ct_M(pareto_[i]); | ||
194 | + // } | ||
195 | + } | ||
196 | + | ||
197 | + // SecondaryStructure s = solve_objective(1, lambdaMin, lambdaMax); | ||
198 | + | ||
199 | + // if (is_undominated_yet(s)) { | ||
200 | + // // adding the SecondaryStructure s to the set pareto_ | ||
201 | + // add_solution(s); | ||
202 | + // // run localPareto above the SecondaryStructure s | ||
203 | + // extend_pareto(s.get_objective_score(2), lambdaMax); | ||
204 | + // } | ||
205 | +} | ||
206 | + | ||
207 | +size_t MOIP::get_yuv_index(size_t u, size_t v) const | ||
208 | +{ | ||
209 | + size_t a, b; | ||
210 | + a = (u < v) ? u : v; | ||
211 | + b = (u > v) ? u : v; | ||
212 | + return index_of_yuv_[a][b - 4 - a]; | ||
213 | +} | ||
214 | + | ||
215 | +size_t MOIP::get_Cpxi_index(size_t x_i, size_t i_on_j) const { return index_of_Cxip_[x_i][i_on_j]; } | ||
216 | + | ||
217 | + | ||
218 | +bool MOIP::allowed_basepair(size_t u, size_t v) const | ||
219 | +{ | ||
220 | + size_t a, b; | ||
221 | + a = (v > u) ? u : v; | ||
222 | + b = (v > u) ? v : u; | ||
223 | + if (b - a < 4) return false; | ||
224 | + if (a >= rna_.get_RNA_length() - 6) return false; | ||
225 | + if (b >= rna_.get_RNA_length()) return false; | ||
226 | + return true; | ||
227 | +} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
cppsrc/MOIP.h
0 → 100644
1 | +#ifndef MOIP_H_ | ||
2 | +#define MOIP_H_ | ||
3 | + | ||
4 | +#define IL_STD | ||
5 | + | ||
6 | +#include "SecondaryStructure.h" | ||
7 | +#include "rna.h" | ||
8 | +#include <ilcplex/ilocplex.h> | ||
9 | + | ||
10 | +using std::vector; | ||
11 | + | ||
12 | +const double PRECISION = 0.0001; | ||
13 | + | ||
14 | + | ||
15 | +class MOIP | ||
16 | +{ | ||
17 | + public: | ||
18 | + static uint ncores; | ||
19 | + typedef enum { MIN, MAX } DirType; | ||
20 | + typedef enum { FR, LO, UP, DB, FX } BoundType; | ||
21 | + MOIP(const RNA& rna, const vector<Motif>& motifSites); | ||
22 | + ~MOIP(void); | ||
23 | + void solve_objective(int o, double min, double max); | ||
24 | + uint get_n_solutions(void) const; | ||
25 | + const SecondaryStructure& solution(uint i) const; | ||
26 | + void extend_pareto(double lambdaMin, double lambdaMax); | ||
27 | + bool allowed_basepair(size_t u, size_t v) const; | ||
28 | + void add_solution(const SecondaryStructure& s); | ||
29 | + | ||
30 | + private: | ||
31 | + bool is_undominated_yet(const SecondaryStructure& s); | ||
32 | + void add_problem_constraints(const IloModel& model_); | ||
33 | + size_t get_yuv_index(size_t u, size_t v) const; | ||
34 | + size_t get_Cpxi_index(size_t x_i, size_t i_on_j) const; | ||
35 | + IloNumExprArg& y(size_t u, size_t v); // Direct reference to y^u_v in basepair_dv_ | ||
36 | + IloNumExprArg& C(size_t x, size_t i); // Direct reference to C_p^xi in insertion_dv_ | ||
37 | + | ||
38 | + | ||
39 | + RNA rna_; // RNA object | ||
40 | + vector<Motif> insertion_sites_; // Potential Motif insertion sites | ||
41 | + const float beta_; // beta parameter of the probability function | ||
42 | + double lambdaMin_; // minimum threshold value for the probability value | ||
43 | + double lambdaMax_; // maximum threshold value for the probability value | ||
44 | + int vp_; // vp_ variable for penalization of the probability score | ||
45 | + float theta_; // theta parameter for the probability function | ||
46 | + IloEnv env_; // environment CPLEX object | ||
47 | + IloNumVarArray basepair_dv_; // Decision variables | ||
48 | + IloNumVarArray insertion_dv_; // Decision variables | ||
49 | + vector<SecondaryStructure> pareto_; // Vector of results | ||
50 | + vector<vector<size_t>> index_of_Cxip_; // Stores the indexes of the Cxip in insertion_dv_ | ||
51 | + vector<vector<size_t>> index_of_yuv_; // Stores the indexes of the y^u_v in basepair_dv_ in a complex way. Use get_yuv_index(u,v) to retrieve. | ||
52 | + vector<size_t> index_of_first_components; // Stores the indexes of Cx1p in insertion_dv_ | ||
53 | +}; | ||
54 | + | ||
55 | +inline void MOIP::add_solution(const SecondaryStructure& s) { pareto_.push_back(s); } | ||
56 | +inline uint MOIP::get_n_solutions(void) const { return pareto_.size(); } | ||
57 | +inline const SecondaryStructure& MOIP::solution(uint i) const { return pareto_[i]; } | ||
58 | +inline IloNumExprArg& MOIP::y(size_t u, size_t v) { return basepair_dv_[get_yuv_index(u, v)]; } | ||
59 | +inline IloNumExprArg& MOIP::C(size_t x, size_t i) { return insertion_dv_[get_Cpxi_index(x, i)]; } | ||
60 | + | ||
61 | +#endif // MOIP_H_ | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
cppsrc/SecondaryStructure.cpp
0 → 100644
1 | +#include "SecondaryStructure.h" | ||
2 | +#include <boost/format.hpp> | ||
3 | + | ||
4 | +static const double PRECISION(0.0001); | ||
5 | + | ||
6 | +SecondaryStructure::SecondaryStructure(const vector<double>& scores, const vector<bool>& decision_variables, VII coord, int RNAlength) | ||
7 | +: objective_scores_(scores), dv_(decision_variables), coord_(coord), n_(RNAlength) | ||
8 | +{ | ||
9 | +} | ||
10 | + | ||
11 | +string SecondaryStructure::to_DBN(void) const | ||
12 | +{ | ||
13 | + string res(n_, '.'); | ||
14 | + for (size_t i = 0; i < n_; i++) { | ||
15 | + if (dv_[i]) { | ||
16 | + res[coord_[i].first] = '('; | ||
17 | + res[coord_[i].second] = ')'; | ||
18 | + } | ||
19 | + } | ||
20 | + return res; | ||
21 | +} | ||
22 | + | ||
23 | +string SecondaryStructure::to_string(void) const | ||
24 | +{ | ||
25 | + return to_DBN() + "\t" + boost::str(boost::format("%.6f") % objective_scores_[0]) + "\t" + | ||
26 | + boost::str(boost::format("%.6f") % objective_scores_[1]); | ||
27 | +} | ||
28 | + | ||
29 | +bool operator>(const SecondaryStructure& s1, const SecondaryStructure& s2) | ||
30 | +{ | ||
31 | + double s11 = s1.get_objective_score(0); | ||
32 | + double s12 = s1.get_objective_score(1); | ||
33 | + double s21 = s2.get_objective_score(0); | ||
34 | + double s22 = s2.get_objective_score(1); | ||
35 | + | ||
36 | + bool obj1 = false, obj2 = false, strict1 = false, strict2 = false; | ||
37 | + | ||
38 | + if (s11 > s21) { | ||
39 | + strict1 = true; | ||
40 | + obj1 = true; | ||
41 | + } else if (abs(s11 - s21) < PRECISION) { | ||
42 | + obj1 = true; | ||
43 | + } | ||
44 | + if (s12 > s22) { | ||
45 | + strict2 = true; | ||
46 | + obj2 = true; | ||
47 | + } else if (abs(s12 - s22) < PRECISION) { | ||
48 | + obj2 = true; | ||
49 | + } | ||
50 | + | ||
51 | + if (obj1 && obj2 && (strict1 || strict2)) { | ||
52 | + return true; | ||
53 | + } | ||
54 | + | ||
55 | + return false; | ||
56 | +} | ||
57 | + | ||
58 | +bool operator<(const SecondaryStructure& s1, const SecondaryStructure& s2) | ||
59 | +{ | ||
60 | + double s11 = s1.get_objective_score(0); | ||
61 | + double s12 = s1.get_objective_score(1); | ||
62 | + double s21 = s2.get_objective_score(0); | ||
63 | + double s22 = s2.get_objective_score(1); | ||
64 | + | ||
65 | + bool obj1 = false, obj2 = false, strict1 = false, strict2 = false; | ||
66 | + | ||
67 | + if (s11 < s21) { | ||
68 | + strict1 = true; | ||
69 | + obj1 = true; | ||
70 | + } else if (abs(s11 - s21) < PRECISION) { | ||
71 | + obj1 = true; | ||
72 | + } | ||
73 | + if (s12 < s22) { | ||
74 | + strict2 = true; | ||
75 | + obj2 = true; | ||
76 | + } else if (abs(s12 - s22) < PRECISION) { | ||
77 | + obj2 = true; | ||
78 | + } | ||
79 | + | ||
80 | + if (obj1 && obj2 && (strict1 || strict2)) { | ||
81 | + return true; | ||
82 | + } | ||
83 | + return false; | ||
84 | +} |
cppsrc/SecondaryStructure.h
0 → 100644
1 | +#ifndef __INC_IP_SOL__ | ||
2 | +#define __INC_IP_SOL__ | ||
3 | + | ||
4 | +#include "rna.h" | ||
5 | +#include <string> | ||
6 | +#include <vector> | ||
7 | + | ||
8 | +using std::string; | ||
9 | +using std::vector; | ||
10 | + | ||
11 | +typedef vector<int> VI; | ||
12 | +typedef vector<VI> VVI; | ||
13 | +typedef vector<std::pair<int, int>> VII; | ||
14 | + | ||
15 | + | ||
16 | +class SecondaryStructure | ||
17 | +{ | ||
18 | + public: | ||
19 | + SecondaryStructure(void); | ||
20 | + SecondaryStructure(const vector<double>& scores, const vector<bool>& decision_variables, VII coord, int RNAlength); | ||
21 | + | ||
22 | + double get_objective_score(int i) const; | ||
23 | + const vector<bool>& get_decision_variables() const; | ||
24 | + bool get_decision_value(int i) const; | ||
25 | + VII get_coord() const; | ||
26 | + int get_RNA_length() const; | ||
27 | + | ||
28 | + void set_objective_score(int i, double s); | ||
29 | + string to_DBN() const; | ||
30 | + string to_string() const; | ||
31 | + | ||
32 | + private: | ||
33 | + vector<double> objective_scores_; // values of the different objective functions for that SecondaryStructure | ||
34 | + vector<bool> dv_; // values of the decision variable of the integer program | ||
35 | + vector<Motif> motif_info_; // information about known motives in this secondary structure and their positions | ||
36 | + VII coord_; // coordinates of the dv_. dv_[i] == true <==> coord_[i][0] paired to coord_[i][1]; | ||
37 | + size_t n_; // length of the RNA | ||
38 | +}; | ||
39 | + | ||
40 | +// return if this SecondaryStructure s1 dominates s2 | ||
41 | +bool operator>(const SecondaryStructure& s1, const SecondaryStructure& s2); | ||
42 | +// return if this SecondaryStructure s2 dominates s1 | ||
43 | +bool operator<(const SecondaryStructure& s1, const SecondaryStructure& s2); | ||
44 | + | ||
45 | +inline double SecondaryStructure::get_objective_score(int i) const { return objective_scores_[i]; } | ||
46 | +inline const vector<bool>& SecondaryStructure::get_decision_variables() const { return dv_; } | ||
47 | +inline void SecondaryStructure::set_objective_score(int i, double s) { objective_scores_[i - 1] = s; } | ||
48 | +inline VII SecondaryStructure::get_coord() const { return coord_; } | ||
49 | +inline int SecondaryStructure::get_RNA_length() const { return n_; } | ||
50 | +inline bool SecondaryStructure::get_decision_value(int i) const { return dv_[i]; } | ||
51 | + | ||
52 | +#endif |
cppsrc/biominserter.cpp
0 → 100644
1 | +/*** | ||
2 | + Biominserter | ||
3 | + Louis Becquey, starting from Audrey Legendre's code | ||
4 | + nov 2018 | ||
5 | +***/ | ||
6 | + | ||
7 | +#include <algorithm> | ||
8 | +#include <boost/algorithm/string.hpp> | ||
9 | +#include <cstdlib> | ||
10 | +#include <iostream> | ||
11 | +#include <iterator> | ||
12 | +#include <string> | ||
13 | +#include <thread> | ||
14 | +#include <vector> | ||
15 | + | ||
16 | +#include "MOIP.h" | ||
17 | +#include "fa.h" | ||
18 | +#include "nupack.h" | ||
19 | + | ||
20 | +using namespace std; | ||
21 | + | ||
22 | +Motif parse_csv_line(string line) | ||
23 | +{ | ||
24 | + vector<string> tokens; | ||
25 | + boost::split(tokens, line, boost::is_any_of(",")); | ||
26 | + Motif m; | ||
27 | + m.atlas_id = tokens[0]; | ||
28 | + m.comp.push_back(Component(make_pair<int, int>(stoi(tokens[3]), stoi(tokens[4])), stoi(tokens[2]))); | ||
29 | + if (tokens[5] != "-") | ||
30 | + m.comp.push_back(Component(make_pair<int, int>(stoi(tokens[5]), stoi(tokens[6])), stoi(tokens[2]))); | ||
31 | + m.reversed = (tokens[1] == "True"); | ||
32 | + return m; | ||
33 | +} | ||
34 | + | ||
35 | +int main(int argc, char* argv[]) | ||
36 | +{ | ||
37 | + // float time; | ||
38 | + // clock_t t1, t2; | ||
39 | + // t1 = clock(); | ||
40 | + | ||
41 | + MOIP::ncores = thread::hardware_concurrency() - 1; | ||
42 | + | ||
43 | + if (argc != 3) { | ||
44 | + cerr << argc << " arguments specified !" << endl; | ||
45 | + cerr << "Please specify the following input files:" << endl; | ||
46 | + cerr << "biominserter sequence.fasta insertion.sites.csv" << endl; | ||
47 | + return EXIT_FAILURE; | ||
48 | + } | ||
49 | + | ||
50 | + | ||
51 | + const char* inputName = argv[1]; | ||
52 | + const char* csvname = argv[2]; | ||
53 | + cout << "Reading input files..." << endl; | ||
54 | + if (access(inputName, F_OK) == -1) { | ||
55 | + cerr << inputName << " not found" << endl; | ||
56 | + return EXIT_FAILURE; | ||
57 | + } | ||
58 | + if (access(csvname, F_OK) == -1) { | ||
59 | + cerr << csvname << " not found" << endl; | ||
60 | + return EXIT_FAILURE; | ||
61 | + } | ||
62 | + | ||
63 | + // load fasta file | ||
64 | + list<Fasta> f; | ||
65 | + Fasta::load(f, inputName); | ||
66 | + list<Fasta>::iterator fa = f.begin(); | ||
67 | + cout << "loading " << fa->name() << "..." << endl; | ||
68 | + RNA myRNA = RNA(fa->name(), fa->seq()); | ||
69 | + cout << "\t>" << inputName << " successfuly loaded" << endl; | ||
70 | + | ||
71 | + // load CSV file | ||
72 | + string line; | ||
73 | + ifstream motifs = ifstream(csvname); | ||
74 | + getline(motifs, line); // skip header | ||
75 | + vector<Motif> posInsertionSites; | ||
76 | + while (getline(motifs, line)) { | ||
77 | + posInsertionSites.push_back(parse_csv_line(line)); | ||
78 | + } | ||
79 | + cout << "\t>" << csvname << " successfuly loaded" << endl; | ||
80 | + | ||
81 | + // creating the Multi-Objective problem: | ||
82 | + MOIP myMOIP = MOIP(myRNA, posInsertionSites); // using the constructor with arguments automatically defines the decision variables. | ||
83 | + | ||
84 | + // finding the best SecondaryStructures for each objective | ||
85 | + double max = myRNA.get_RNA_length(); | ||
86 | + try { | ||
87 | + myMOIP.solve_objective(1, -max, max); | ||
88 | + } catch (IloCplex::Exception& e) { | ||
89 | + cerr << e << endl; | ||
90 | + } | ||
91 | + // SecondaryStructure bestSSO1 = myMOIP.solve_objective(1, -max, max); | ||
92 | + // SecondaryStructure bestSSO2 = myMOIP.solve_objective(2, -max, max); | ||
93 | + // double bestObj2 = bestSSO2.get_objective_score(2); | ||
94 | + | ||
95 | + // extend to the whole pareto set | ||
96 | + // myMOIP.add_solution(bestSSO1); | ||
97 | + // myMOIP.extend_pareto(bestObj2, max); | ||
98 | + | ||
99 | + // print the pareto set | ||
100 | + // cout << "Structure \t Free energy score \t Expected accuracy score" << endl; | ||
101 | + // for (uint i = 0; i < myMOIP.get_n_solutions(); i++) { | ||
102 | + // cout << myMOIP.solution(i).to_string() << endl; | ||
103 | + // } | ||
104 | + // cout << endl; | ||
105 | + | ||
106 | + return EXIT_SUCCESS; | ||
107 | +} |
cppsrc/dptable.h
0 → 100644
1 | +/* | ||
2 | + * $Id$ | ||
3 | + * | ||
4 | + * Copyright (C) 2010 Kengo Sato | ||
5 | + * | ||
6 | + * doublehis file comes from IPknot. | ||
7 | + * | ||
8 | + */ | ||
9 | + | ||
10 | +#ifndef __INC_DP_TABLE_H__ | ||
11 | +#define __INC_DP_TABLE_H__ | ||
12 | + | ||
13 | +class DPtable2 | ||
14 | +{ | ||
15 | + public: | ||
16 | + DPtable2() : V_(), N_(0) {} | ||
17 | + void resize(int n) | ||
18 | + { | ||
19 | + N_ = n; | ||
20 | + V_.resize(N_ * (N_ + 1) / 2 + (N_ + 1)); | ||
21 | + } | ||
22 | + void fill(const double& v) { std::fill(V_.begin(), V_.end(), v); } | ||
23 | + double& operator()(int i, int j) { return V_[index(i, j)]; } | ||
24 | + const double& operator()(int i, int j) const { return V_[index(i, j)]; } | ||
25 | + | ||
26 | + private: | ||
27 | + int index(int i, int j) const | ||
28 | + { | ||
29 | + assert(j <= N_); | ||
30 | + return j == i - 1 ? N_ * (N_ + 1) / 2 + i : i * N_ + j - i * (1 + i) / 2; | ||
31 | + } | ||
32 | + std::vector<double> V_; | ||
33 | + int N_; | ||
34 | +}; | ||
35 | + | ||
36 | +class DPtable4 | ||
37 | +{ | ||
38 | + public: | ||
39 | + DPtable4() : V_(), N_(0) {} | ||
40 | + void resize(int n) | ||
41 | + { | ||
42 | + N_ = n; | ||
43 | + V_.resize(N_ * (N_ - 1) * (N_ - 2) * (N_ - 3) / 2 / 3 / 4); | ||
44 | + } | ||
45 | + void fill(const double& v) { std::fill(V_.begin(), V_.end(), v); } | ||
46 | + double& operator()(int i, int d, int e, int j) { return V_[index(i, d, e, j)]; } | ||
47 | + const double& operator()(int i, int d, int e, int j) const { return V_[index(i, d, e, j)]; } | ||
48 | + | ||
49 | + private: | ||
50 | + int index(int h, int r, int m, int s) const | ||
51 | + { | ||
52 | + int n = N_; | ||
53 | + int h2 = h * h; | ||
54 | + int h3 = h2 * h; | ||
55 | + int h4 = h3 * h; | ||
56 | + int m2 = m * m; | ||
57 | + int n2 = n * n; | ||
58 | + int n3 = n2 * n; | ||
59 | + int r2 = r * r; | ||
60 | + int r3 = r2 * r; | ||
61 | + assert(h <= r); | ||
62 | + assert(r <= m); | ||
63 | + assert(m <= s); | ||
64 | + assert(s <= N_); | ||
65 | + return (h == r && m == s) ? V_.size() - 1 : | ||
66 | + ( | ||
67 | + -24 - 50 * h - 35 * h2 - 10 * h3 - h4 - 36 * m - 12 * m2 + 12 * n + 70 * h * n + | ||
68 | + 30 * h2 * n + 4 * h3 * n + 24 * m * n - 12 * n2 - 30 * h * n2 - 6 * h2 * n2 + 4 * h * n3 + | ||
69 | + 44 * r - 48 * n * r + 12 * n2 * r + 24 * r2 - 12 * n * r2 + 4 * r3 + 24 * s) / | ||
70 | + 24; | ||
71 | + } | ||
72 | + std::vector<double> V_; | ||
73 | + int N_; | ||
74 | +}; | ||
75 | + | ||
76 | +class DPtableX | ||
77 | +{ | ||
78 | + public: | ||
79 | + DPtableX() : V_(), N_(0), D_(0) {} | ||
80 | + void resize(int d, int n) | ||
81 | + { | ||
82 | + N_ = n; | ||
83 | + D_ = d; | ||
84 | + int max_sz = 0; | ||
85 | + for (int i = d; i < d + 3; ++i) max_sz = std::max(max_sz, (N_ - i) * (i - 5) * (i - 1) * (i - 2) / 2); | ||
86 | + V_.resize(max_sz); | ||
87 | + } | ||
88 | + void fill(const double& v) { std::fill(V_.begin(), V_.end(), v); } | ||
89 | + double& operator()(int i, int d, int e, int s) { return V_[index(i, d, e, s)]; } | ||
90 | + const double& operator()(int i, int d, int e, int s) const { return V_[index(i, d, e, s)]; } | ||
91 | + void swap(DPtableX& x) | ||
92 | + { | ||
93 | + std::swap(V_, x.V_); | ||
94 | + std::swap(N_, x.N_); | ||
95 | + std::swap(D_, x.D_); | ||
96 | + } | ||
97 | + | ||
98 | + private: | ||
99 | + int index(int i, int h1, int m1, int s) const | ||
100 | + { | ||
101 | + int d = D_; | ||
102 | + int d1d2 = (d - 1) * (d - 2); | ||
103 | + int d5 = d - 5; | ||
104 | + int h1_i_1 = h1 - i - 1; | ||
105 | + assert(i + d < N_); | ||
106 | + assert(d - 6 >= s); | ||
107 | + assert(i < h1); | ||
108 | + return i * d5 * d1d2 / 2 + s * d1d2 / 2 + h1_i_1 * (d - 1) - h1_i_1 * (h1 - i) / 2 + m1 - h1 - 1; | ||
109 | + } | ||
110 | + | ||
111 | + std::vector<double> V_; | ||
112 | + int N_; | ||
113 | + int D_; | ||
114 | +}; | ||
115 | + | ||
116 | +#endif |
cppsrc/fa.cpp
0 → 100644
1 | +/* | ||
2 | + * $Id$ | ||
3 | + * | ||
4 | + * Copyright (C) 2008-2010 Kengo Sato | ||
5 | + * | ||
6 | + * This file comes from Ipknot. | ||
7 | + */ | ||
8 | + | ||
9 | + | ||
10 | +#include "fa.h" | ||
11 | +#include <iostream> | ||
12 | +#include <fstream> | ||
13 | +#include <sstream> | ||
14 | +#include <string> | ||
15 | +#include <cctype> | ||
16 | +#include <cstring> | ||
17 | +#include <cassert> | ||
18 | + | ||
19 | +typedef unsigned int uint; | ||
20 | + | ||
21 | +//static | ||
22 | +unsigned int Fasta::load(std::list<Fasta>& data, const char* file){ | ||
23 | + | ||
24 | + std::string line, name, seq, str; | ||
25 | + std::ifstream ifs(file); | ||
26 | + while (std::getline(ifs, line)) { | ||
27 | + if (line[0]=='>') { // header | ||
28 | + if (!name.empty()) { | ||
29 | + assert(str.size()==0 || seq.size()==str.size()); | ||
30 | + data.push_back(Fasta(name, seq, str)); | ||
31 | + } | ||
32 | + | ||
33 | + name=line.substr(1); | ||
34 | + seq.clear(); | ||
35 | + str.clear(); | ||
36 | + continue; | ||
37 | + } | ||
38 | + | ||
39 | + if (std::strchr("()[].?xle ", line[0])==NULL) { // seq | ||
40 | + uint i; | ||
41 | + for (i=0; i!=line.size(); ++i) | ||
42 | + if (!isalpha(line[i])) break; | ||
43 | + seq+=line.substr(0, i); | ||
44 | + } else { | ||
45 | + uint i; | ||
46 | + for (i=0; i!=line.size(); ++i) | ||
47 | + if (std::strchr("()[].?xle ", line[i])==NULL) break; | ||
48 | + str+=line.substr(0, i); | ||
49 | + } | ||
50 | + } | ||
51 | + | ||
52 | + if (!name.empty()) | ||
53 | + data.push_back(Fasta(name, seq, str)); | ||
54 | + | ||
55 | + return data.size(); | ||
56 | +} | ||
57 | + |
cppsrc/fa.h
0 → 100644
1 | +/* | ||
2 | + * $Id$ | ||
3 | + * | ||
4 | + * Copyright (C) 2008-2010 Kengo Sato | ||
5 | + * | ||
6 | + * This file comes from Ipknot. | ||
7 | +*/ | ||
8 | + | ||
9 | +#include <list> | ||
10 | +#include <string> | ||
11 | + | ||
12 | +using std::list; | ||
13 | +using std::string; | ||
14 | + | ||
15 | +class Fasta | ||
16 | +{ | ||
17 | + public: | ||
18 | + Fasta() : name_(), seq_(), str_() {} | ||
19 | + Fasta(const string& name, const string& seq, const string& str = "") : name_(name), seq_(seq), str_(str) {} | ||
20 | + Fasta(const Fasta& fa) : name_(fa.name_), seq_(fa.seq_), str_(fa.str_) {} | ||
21 | + Fasta& operator=(const Fasta& fa) | ||
22 | + { | ||
23 | + if (this != &fa) { | ||
24 | + name_ = fa.name_; | ||
25 | + seq_ = fa.seq_; | ||
26 | + str_ = fa.str_; | ||
27 | + } | ||
28 | + return *this; | ||
29 | + } | ||
30 | + const string& name() const { return name_; } | ||
31 | + const string& seq() const { return seq_; } | ||
32 | + string& seq() { return seq_; } | ||
33 | + const string& str() const { return str_; } | ||
34 | + unsigned int size() const { return seq_.size(); } | ||
35 | + | ||
36 | + static unsigned int load(list<Fasta>& data, const char* file); | ||
37 | + | ||
38 | + private: | ||
39 | + string name_; | ||
40 | + string seq_; | ||
41 | + string str_; | ||
42 | +}; |
cppsrc/nupack.cpp
0 → 100644
This diff is collapsed. Click to expand it.
cppsrc/nupack.h
0 → 100644
1 | +/* | ||
2 | + * $Id$ | ||
3 | + * | ||
4 | + * Copyright (C) 2010 Kengo Sato | ||
5 | + * | ||
6 | + * This file comes from IPknot. | ||
7 | + * | ||
8 | + */ | ||
9 | + | ||
10 | +#include <boost/multi_array.hpp> | ||
11 | +#include <string> | ||
12 | +#include <vector> | ||
13 | +#include <iostream> | ||
14 | + | ||
15 | +using std::string; | ||
16 | +using std::vector; | ||
17 | + | ||
18 | +#define kB 0.00198717 // Boltzmann constant in kcal/mol/K | ||
19 | +#define ZERO_C_IN_KELVIN 273.15 // Zero degrees C in Kelvin | ||
20 | +#define AVOGADRO 6.022e23 // Avogadro's number | ||
21 | + | ||
22 | +typedef float energy_t; | ||
23 | + | ||
24 | +class DPtable2 | ||
25 | +{ | ||
26 | + public: | ||
27 | + DPtable2() : V_(), N_(0) {} | ||
28 | + void resize(int n) | ||
29 | + { | ||
30 | + N_ = n; | ||
31 | + V_.resize(N_ * (N_ + 1) / 2 + (N_ + 1)); | ||
32 | + } | ||
33 | + void fill(const float& v) { std::fill(V_.begin(), V_.end(), v); } | ||
34 | + float& operator()(int i, int j) { return V_[index(i, j)]; } | ||
35 | + const float& operator()(int i, int j) const { return V_[index(i, j)]; } | ||
36 | + | ||
37 | + private: | ||
38 | + int index(int i, int j) const | ||
39 | + { | ||
40 | + assert(j <= N_); | ||
41 | + return j == i - 1 ? N_ * (N_ + 1) / 2 + i : i * N_ + j - i * (1 + i) / 2; | ||
42 | + } | ||
43 | + std::vector<float> V_; | ||
44 | + int N_; | ||
45 | +}; | ||
46 | + | ||
47 | +class DPtable4 | ||
48 | +{ | ||
49 | + public: | ||
50 | + DPtable4() : V_(), N_(0) {} | ||
51 | + void resize(int n) | ||
52 | + { | ||
53 | + N_ = n; | ||
54 | + std::cout << V_.max_size() << " - " << N_ << " - " << sizeof(float) * static_cast<unsigned long>(N_) * (N_ - 1) * (N_ - 2) * (N_ - 3) / 2 / 3 / 4 << std::endl; | ||
55 | + V_.resize(static_cast<unsigned long>(N_) * (N_ - 1) * (N_ - 2) * (N_ - 3) / 2 / 3 / 4); // This number can be HUGE | ||
56 | + std::cout << "c'est toi qui bad_allocque ?" << std::endl; | ||
57 | + } | ||
58 | + void fill(const float& v) { std::fill(V_.begin(), V_.end(), v); } | ||
59 | + float& operator()(int i, int d, int e, int j) { return V_[index(i, d, e, j)]; } | ||
60 | + const float& operator()(int i, int d, int e, int j) const { return V_[index(i, d, e, j)]; } | ||
61 | + | ||
62 | + private: | ||
63 | + int index(int h, int r, int m, int s) const | ||
64 | + { | ||
65 | + int n = N_; | ||
66 | + int h2 = h * h; | ||
67 | + int h3 = h2 * h; | ||
68 | + int h4 = h3 * h; | ||
69 | + int m2 = m * m; | ||
70 | + int n2 = n * n; | ||
71 | + int n3 = n2 * n; | ||
72 | + int r2 = r * r; | ||
73 | + int r3 = r2 * r; | ||
74 | + assert(h <= r); | ||
75 | + assert(r <= m); | ||
76 | + assert(m <= s); | ||
77 | + assert(s <= N_); | ||
78 | + return (h == r && m == s) ? V_.size() - 1 : | ||
79 | + ( | ||
80 | + -24 - 50 * h - 35 * h2 - 10 * h3 - h4 - 36 * m - 12 * m2 + 12 * n + 70 * h * n + | ||
81 | + 30 * h2 * n + 4 * h3 * n + 24 * m * n - 12 * n2 - 30 * h * n2 - 6 * h2 * n2 + 4 * h * n3 + | ||
82 | + 44 * r - 48 * n * r + 12 * n2 * r + 24 * r2 - 12 * n * r2 + 4 * r3 + 24 * s) / | ||
83 | + 24; | ||
84 | + } | ||
85 | + std::vector<float> V_; | ||
86 | + int N_; | ||
87 | +}; | ||
88 | + | ||
89 | +class DPtableX | ||
90 | +{ | ||
91 | + public: | ||
92 | + DPtableX() : V_(), N_(0), D_(0) {} | ||
93 | + void resize(int d, int n) | ||
94 | + { | ||
95 | + N_ = n; | ||
96 | + D_ = d; | ||
97 | + int max_sz = 0; | ||
98 | + for (int i = d; i < d + 3; ++i) max_sz = std::max(max_sz, (N_ - i) * (i - 5) * (i - 1) * (i - 2) / 2); | ||
99 | + V_.resize(max_sz); | ||
100 | + } | ||
101 | + void fill(const float& v) { std::fill(V_.begin(), V_.end(), v); } | ||
102 | + float& operator()(int i, int d, int e, int s) { return V_[index(i, d, e, s)]; } | ||
103 | + const float& operator()(int i, int d, int e, int s) const { return V_[index(i, d, e, s)]; } | ||
104 | + void swap(DPtableX& x) | ||
105 | + { | ||
106 | + std::swap(V_, x.V_); | ||
107 | + std::swap(N_, x.N_); | ||
108 | + std::swap(D_, x.D_); | ||
109 | + } | ||
110 | + | ||
111 | + private: | ||
112 | + int index(int i, int h1, int m1, int s) const | ||
113 | + { | ||
114 | + int d = D_; | ||
115 | + int d1d2 = (d - 1) * (d - 2); | ||
116 | + int d5 = d - 5; | ||
117 | + int h1_i_1 = h1 - i - 1; | ||
118 | + assert(i + d < N_); | ||
119 | + assert(d - 6 >= s); | ||
120 | + assert(i < h1); | ||
121 | + return i * d5 * d1d2 / 2 + s * d1d2 / 2 + h1_i_1 * (d - 1) - h1_i_1 * (h1 - i) / 2 + m1 - h1 - 1; | ||
122 | + } | ||
123 | + | ||
124 | + std::vector<float> V_; | ||
125 | + int N_; | ||
126 | + int D_; | ||
127 | +}; | ||
128 | + | ||
129 | +class Nupack | ||
130 | +{ | ||
131 | + public: | ||
132 | + Nupack(); | ||
133 | + void load_sequence(const string& s); | ||
134 | + void load_parameters_fm363(const vector<float>& v); | ||
135 | + void load_default_parameters(/*int which*/); | ||
136 | + bool load_parameters(const char* filename); | ||
137 | + void dump_parameters(std::ostream& os) const; | ||
138 | + float calculate_partition_function(); | ||
139 | + void calculate_posterior(); | ||
140 | + void get_posterior(vector<float>& bp, vector<int>& offset) const; | ||
141 | + void get_posterior(vector<float>& bp1, vector<float>& bp2, vector<int>& offset) const; | ||
142 | + | ||
143 | + private: | ||
144 | + void fastiloops(int i, int j, DPtable4& Qg, DPtableX& Qx, DPtableX& Qx2); | ||
145 | + void fastiloops_pr(int i, int j, DPtable4& Qg, DPtableX& Qx, DPtableX& Qx2, DPtable4& Pg, DPtableX& Px, DPtableX& Px2); | ||
146 | + energy_t score_hairpin(int i, int j) const; | ||
147 | + energy_t score_loop(int l) const; | ||
148 | + energy_t score_interior(int i, int d, int e, int j, bool pk) const; | ||
149 | + energy_t score_interior_mismatch(int i, int j) const; | ||
150 | + energy_t score_interior_mismatch(int i, int j, int k, int l) const; | ||
151 | + energy_t score_interior_asymmetry(int l1, int l2) const; | ||
152 | + energy_t score_multiloop(bool pk) const; | ||
153 | + energy_t score_multiloop_paired(int n, bool pk) const; | ||
154 | + energy_t score_multiloop_unpaired(int n, bool pk) const; | ||
155 | + energy_t score_at_penalty(int i, int j) const; | ||
156 | + energy_t score_dangle(int i, int j) const; | ||
157 | + energy_t score_pk() const; | ||
158 | + energy_t score_pk_multiloop() const; | ||
159 | + energy_t score_pk_pk() const; | ||
160 | + energy_t score_pk_paired(int n) const; | ||
161 | + energy_t score_pk_unpaired(int n) const; | ||
162 | + energy_t score_pk_band(int n) const; | ||
163 | + int base(char x) const; | ||
164 | + bool allow_paired(int i, int j) const; | ||
165 | + bool wc_pair(int i, int j) const; | ||
166 | + int pair_type(int i, int j) const; | ||
167 | + int pair_type(int i) const; | ||
168 | + | ||
169 | + vector<int> base_map; | ||
170 | + boost::multi_array<int, 2> pair_map; | ||
171 | + vector<int> seq; | ||
172 | + int N; | ||
173 | + float RT; | ||
174 | + DPtable2 Q; | ||
175 | + DPtable2 Qb; | ||
176 | + DPtable2 Qm; | ||
177 | + DPtable2 Qp; | ||
178 | + DPtable2 Qz; | ||
179 | + DPtable4 Qg; | ||
180 | + DPtable4 Qgl; | ||
181 | + DPtable4 Qgr; | ||
182 | + DPtable4 Qgls; | ||
183 | + DPtable4 Qgrs; | ||
184 | + DPtable2 P; | ||
185 | + DPtable2 Pb; | ||
186 | + DPtable2 Pm; | ||
187 | + DPtable2 Pp; | ||
188 | + DPtable2 Pz; | ||
189 | + DPtable2 Pbg; | ||
190 | + DPtable4 Pg; | ||
191 | + DPtable4 Pgl; | ||
192 | + DPtable4 Pgr; | ||
193 | + DPtable4 Pgls; | ||
194 | + DPtable4 Pgrs; | ||
195 | + | ||
196 | + // energy parameters | ||
197 | + energy_t hairpin37[30]; | ||
198 | + energy_t bulge37[30]; | ||
199 | + energy_t interior37[30]; | ||
200 | + energy_t stack37[6][6]; | ||
201 | + energy_t int11_37[6][6][4][4]; | ||
202 | + energy_t int21_37[6][4][4][6][4]; | ||
203 | + energy_t int22_37[6][6][4][4][4][4]; | ||
204 | + energy_t dangle3_37[6][4]; | ||
205 | + energy_t dangle5_37[6][4]; | ||
206 | + energy_t triloop37[4][4][4][4][4]; | ||
207 | + energy_t tloop37[4][4][4][4][4][4]; | ||
208 | + energy_t mismatch_hairpin37[4][4][6]; | ||
209 | + energy_t mismatch_interior37[4][4][6]; | ||
210 | + energy_t asymmetry_penalty[4]; | ||
211 | + energy_t polyC_penalty, polyC_slope, polyC_int; | ||
212 | + energy_t at_penalty; | ||
213 | + energy_t multiloop_penalty; // alpha1 | ||
214 | + energy_t multiloop_paired_penalty; // alpha2 | ||
215 | + energy_t multiloop_unpaired_penalty; // alpha3 | ||
216 | + energy_t pk_penalty; // beta1 | ||
217 | + energy_t pk_multiloop_penalty; // beta1m | ||
218 | + energy_t pk_pk_penalty; // beta1p | ||
219 | + energy_t pk_paired_penalty; // beta2 | ||
220 | + energy_t pk_unpaired_penalty; // beta3 | ||
221 | + energy_t pk_band_penalty; | ||
222 | + energy_t pk_stack_span; | ||
223 | + energy_t pk_interior_span; | ||
224 | + energy_t multiloop_penalty_pk; | ||
225 | + energy_t multiloop_paired_penalty_pk; | ||
226 | + energy_t multiloop_unpaired_penalty_pk; | ||
227 | + energy_t max_asymmetry; | ||
228 | + energy_t salt_correction; | ||
229 | + energy_t loop_greater30; | ||
230 | + energy_t hairpin_GGG; | ||
231 | + float intermolecular_initiation; | ||
232 | +}; |
cppsrc/rna.cpp
0 → 100644
1 | +#include "rna.h" | ||
2 | +#include "nupack.h" | ||
3 | +#include <iostream> | ||
4 | +#include <string> | ||
5 | +#include <utility> | ||
6 | +#include <vector> | ||
7 | + | ||
8 | +using std::cout, std::cerr, std::endl; | ||
9 | + | ||
10 | +RNA::RNA(string name, string seq) | ||
11 | +{ | ||
12 | + if (!check_seq(seq)) { | ||
13 | + cerr << "Unknown chars in input sequence. Please restrict to ACGTU." << endl; | ||
14 | + exit(EXIT_FAILURE); | ||
15 | + } | ||
16 | + name_ = name; | ||
17 | + seq_ = seq; | ||
18 | + format(); | ||
19 | + n_ = seq_.size(); | ||
20 | + cout << "\t>formatted sequence" << endl; | ||
21 | + | ||
22 | + /*define type_*/ | ||
23 | + type_ = vector<vector<int>>(n_, vector<int>(n_)); | ||
24 | + for (int i = 0; i < n_; i++) { | ||
25 | + for (int j = 0; j < n_; j++) { | ||
26 | + if (i < j) { | ||
27 | + std::stringstream ss; | ||
28 | + ss << seq_[i] << seq_[j]; | ||
29 | + string str = ss.str(); | ||
30 | + if (str.compare("AU") == 0) { | ||
31 | + type_[i][j] = 1; | ||
32 | + } else if (str.compare("CG") == 0) { | ||
33 | + type_[i][j] = 2; | ||
34 | + | ||
35 | + } else if (str.compare("GC") == 0) { | ||
36 | + type_[i][j] = 3; | ||
37 | + } else if (str.compare("GU") == 0) { | ||
38 | + type_[i][j] = 4; | ||
39 | + } else if (str.compare("UG") == 0) { | ||
40 | + type_[i][j] = 5; | ||
41 | + } else if (str.compare("UA") == 0) { | ||
42 | + type_[i][j] = 6; | ||
43 | + } else { | ||
44 | + type_[i][j] = 0; | ||
45 | + } | ||
46 | + } else { | ||
47 | + type_[i][j] = 0; | ||
48 | + } | ||
49 | + } | ||
50 | + } | ||
51 | + nBP_ = type_.size(); | ||
52 | + | ||
53 | + /*define coord_*/ | ||
54 | + for (int i = 0; i < n_; i++) { | ||
55 | + for (int j = 0; j < n_; j++) { | ||
56 | + if (i < j and type_[i][j] > 0) { | ||
57 | + if (i != 0 and i != n_ and j != 0 and j != n_) { | ||
58 | + if (type_[i - 1][j + 1] > 0 or type_[i + 1][j - 1] > 0) { | ||
59 | + coord_.push_back(std::make_pair(i, j)); | ||
60 | + } | ||
61 | + } else if (i == 0 or j == n_) { | ||
62 | + if (type_[i + 1][j - 1] > 0) { | ||
63 | + coord_.push_back(std::make_pair(i, j)); | ||
64 | + } | ||
65 | + } | ||
66 | + } | ||
67 | + } | ||
68 | + } | ||
69 | + | ||
70 | + | ||
71 | + /*define pij_*/ | ||
72 | + vector<float> bp; | ||
73 | + vector<int> offset; | ||
74 | + Nupack nu; | ||
75 | + // nu.load_parameters("rna1999.dG"); | ||
76 | + nu.load_default_parameters(); | ||
77 | + cout << "\t>default parameters loaded (Serra and Turner, 1995)" << endl; | ||
78 | + nu.load_sequence(seq_); | ||
79 | + cout << "\t>computing pairing probabilities..." << endl; | ||
80 | + try { | ||
81 | + nu.calculate_partition_function(); | ||
82 | + } catch (std::exception& e) { | ||
83 | + cerr << e.what() << endl; | ||
84 | + exit(EXIT_FAILURE); | ||
85 | + } | ||
86 | + nu.calculate_posterior(); | ||
87 | + nu.get_posterior(bp, offset); | ||
88 | + | ||
89 | + pij_ = vector<vector<float>>(n_, vector<float>(n_)); | ||
90 | + for (int i = 1; i <= n_; i++) { | ||
91 | + for (int j = 1; j <= n_; j++) { | ||
92 | + pij_[i - 1][j - 1] = bp[offset[i] + j]; | ||
93 | + } | ||
94 | + } | ||
95 | + cout << "\t>pairing probabilities defined" << endl; | ||
96 | +} | ||
97 | + | ||
98 | +int RNA::find_coord(pair<int, int> p) | ||
99 | +{ | ||
100 | + vector<pair<int, int>>::iterator it = find(coord_.begin(), coord_.end(), p); | ||
101 | + int r = -1; | ||
102 | + if (it != coord_.end()) r = distance(coord_.begin(), it); | ||
103 | + return r; | ||
104 | +} | ||
105 | + | ||
106 | +bool RNA::check_seq(string seq) // Checks if the sequences only contains ACGUT. | ||
107 | +{ | ||
108 | + bool res = true; | ||
109 | + for (unsigned int i = 0; i < seq.size(); i++) { | ||
110 | + if (seq[i] != 'A' and seq[i] != 'U' and seq[i] != 'C' and seq[i] != 'G' and seq[i] != 'T') { | ||
111 | + res = false; | ||
112 | + break; | ||
113 | + } | ||
114 | + } | ||
115 | + return res; | ||
116 | +} | ||
117 | + | ||
118 | +void RNA::format() | ||
119 | +{ | ||
120 | + for (unsigned int i = 0; i < seq_.size(); i++) { | ||
121 | + seq_[i] = toupper(seq_[i]); | ||
122 | + if (seq_[i] == 'T') { | ||
123 | + seq_[i] = 'U'; | ||
124 | + break; | ||
125 | + } | ||
126 | + } | ||
127 | +} |
cppsrc/rna.h
0 → 100644
1 | + | ||
2 | +#ifndef DEF_RNA | ||
3 | +#define DEF_RNA | ||
4 | + | ||
5 | +#include <map> | ||
6 | +#include <string> | ||
7 | +#include <vector> | ||
8 | + | ||
9 | +using std::pair, std::string, std::vector, std::map; | ||
10 | + | ||
11 | +typedef struct Comp_ { | ||
12 | + pair<uint, uint> pos; | ||
13 | + int score; | ||
14 | + size_t k; | ||
15 | + Comp_(pair<int, int> p, int s) : pos(p), score(s) { k = 1 + pos.second - pos.first; } | ||
16 | +} Component; | ||
17 | +typedef struct { | ||
18 | + string atlas_id; | ||
19 | + vector<Component> comp; | ||
20 | + bool reversed; | ||
21 | +} Motif; | ||
22 | + | ||
23 | +class RNA | ||
24 | +{ | ||
25 | + public: | ||
26 | + RNA(); | ||
27 | + RNA(string name, string seq); | ||
28 | + | ||
29 | + int get_n_(); | ||
30 | + string get_name_(); | ||
31 | + string get_seq_(); | ||
32 | + vector<vector<int>> get_type_(); | ||
33 | + int get_type(int i, int j); | ||
34 | + int get_type(int i); | ||
35 | + vector<pair<int, int>> get_coord_(); | ||
36 | + pair<int, int> get_coord(int i); | ||
37 | + int get_coordF(int i); | ||
38 | + int get_coordS(int i); | ||
39 | + int find_coord(pair<int, int>); | ||
40 | + vector<vector<float>> get_pij_(); | ||
41 | + float get_pij(int i, int j); | ||
42 | + float get_pij(int i); | ||
43 | + int get_err_(); | ||
44 | + uint get_RNA_length() const; | ||
45 | + | ||
46 | + bool check_seq(string seq); | ||
47 | + void format(); | ||
48 | + | ||
49 | + private: | ||
50 | + string name_; /*name of the rna*/ | ||
51 | + string seq_; /*sequence of the rna*/ | ||
52 | + int n_; /*length of the rna*/ | ||
53 | + vector<vector<int>> type_; /*vector of base pair types*/ | ||
54 | + vector<pair<int, int>> coord_; /*vector of base pair coordinates*/ | ||
55 | + vector<vector<float>> pij_; /*vector of probabilities*/ | ||
56 | + uint nBP_; /*number of possible base pair*/ | ||
57 | +}; | ||
58 | + | ||
59 | +inline int RNA::get_n_() { return n_; } | ||
60 | +inline string RNA::get_name_() { return name_; } | ||
61 | +inline string RNA::get_seq_() { return seq_; } | ||
62 | +inline vector<vector<int>> RNA::get_type_() { return type_; } | ||
63 | +inline int RNA::get_type(int i, int j) { return type_[i][j]; } | ||
64 | +inline int RNA::get_type(int i) { return type_[get_coord(i).first][get_coord(i).second]; } | ||
65 | +inline vector<pair<int, int>> RNA::get_coord_() { return coord_; } | ||
66 | +inline pair<int, int> RNA::get_coord(int i) { return coord_[i]; } | ||
67 | +inline int RNA::get_coordF(int i) { return coord_[i].first; } | ||
68 | +inline int RNA::get_coordS(int i) { return coord_[i].second; } | ||
69 | +inline vector<vector<float>> RNA::get_pij_() { return pij_; } | ||
70 | +inline float RNA::get_pij(int i, int j) { return pij_[i][j]; } | ||
71 | +inline float RNA::get_pij(int i) { return pij_[get_coord(i).first][get_coord(i).second]; } | ||
72 | +inline uint RNA::get_RNA_length() const { return nBP_; } | ||
73 | + | ||
74 | +#endif |
cppsrc/rna1995.h
0 → 100644
This diff is collapsed. Click to expand it.
doc/RNA.bib
0 → 100644
This diff is collapsed. Click to expand it.
doc/background.png
0 → 100644
![](/lbecquey/biorseo/raw/8fa60daca086b868aca15973300d562d2f97a6ea/doc/background.png)
856 KB
doc/component.ipe
0 → 100644
This diff is collapsed. Click to expand it.
doc/component.png
0 → 100644
![](/lbecquey/biorseo/raw/8fa60daca086b868aca15973300d562d2f97a6ea/doc/component.png)
16.7 KB
doc/levels.ipe
0 → 100644
1 | +<?xml version="1.0"?> | ||
2 | +<!DOCTYPE ipe SYSTEM "ipe.dtd"> | ||
3 | +<ipe version="70206" creator="Ipe 7.2.7"> | ||
4 | +<info created="D:20181012182334" modified="D:20181012182334"/> | ||
5 | +<ipestyle name="basic"> | ||
6 | +<symbol name="arrow/arc(spx)"> | ||
7 | +<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen"> | ||
8 | +0 0 m | ||
9 | +-1 0.333 l | ||
10 | +-1 -0.333 l | ||
11 | +h | ||
12 | +</path> | ||
13 | +</symbol> | ||
14 | +<symbol name="arrow/farc(spx)"> | ||
15 | +<path stroke="sym-stroke" fill="white" pen="sym-pen"> | ||
16 | +0 0 m | ||
17 | +-1 0.333 l | ||
18 | +-1 -0.333 l | ||
19 | +h | ||
20 | +</path> | ||
21 | +</symbol> | ||
22 | +<symbol name="arrow/ptarc(spx)"> | ||
23 | +<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen"> | ||
24 | +0 0 m | ||
25 | +-1 0.333 l | ||
26 | +-0.8 0 l | ||
27 | +-1 -0.333 l | ||
28 | +h | ||
29 | +</path> | ||
30 | +</symbol> | ||
31 | +<symbol name="arrow/fptarc(spx)"> | ||
32 | +<path stroke="sym-stroke" fill="white" pen="sym-pen"> | ||
33 | +0 0 m | ||
34 | +-1 0.333 l | ||
35 | +-0.8 0 l | ||
36 | +-1 -0.333 l | ||
37 | +h | ||
38 | +</path> | ||
39 | +</symbol> | ||
40 | +<symbol name="mark/circle(sx)" transformations="translations"> | ||
41 | +<path fill="sym-stroke"> | ||
42 | +0.6 0 0 0.6 0 0 e | ||
43 | +0.4 0 0 0.4 0 0 e | ||
44 | +</path> | ||
45 | +</symbol> | ||
46 | +<symbol name="mark/disk(sx)" transformations="translations"> | ||
47 | +<path fill="sym-stroke"> | ||
48 | +0.6 0 0 0.6 0 0 e | ||
49 | +</path> | ||
50 | +</symbol> | ||
51 | +<symbol name="mark/fdisk(sfx)" transformations="translations"> | ||
52 | +<group> | ||
53 | +<path fill="sym-fill"> | ||
54 | +0.5 0 0 0.5 0 0 e | ||
55 | +</path> | ||
56 | +<path fill="sym-stroke" fillrule="eofill"> | ||
57 | +0.6 0 0 0.6 0 0 e | ||
58 | +0.4 0 0 0.4 0 0 e | ||
59 | +</path> | ||
60 | +</group> | ||
61 | +</symbol> | ||
62 | +<symbol name="mark/box(sx)" transformations="translations"> | ||
63 | +<path fill="sym-stroke" fillrule="eofill"> | ||
64 | +-0.6 -0.6 m | ||
65 | +0.6 -0.6 l | ||
66 | +0.6 0.6 l | ||
67 | +-0.6 0.6 l | ||
68 | +h | ||
69 | +-0.4 -0.4 m | ||
70 | +0.4 -0.4 l | ||
71 | +0.4 0.4 l | ||
72 | +-0.4 0.4 l | ||
73 | +h | ||
74 | +</path> | ||
75 | +</symbol> | ||
76 | +<symbol name="mark/square(sx)" transformations="translations"> | ||
77 | +<path fill="sym-stroke"> | ||
78 | +-0.6 -0.6 m | ||
79 | +0.6 -0.6 l | ||
80 | +0.6 0.6 l | ||
81 | +-0.6 0.6 l | ||
82 | +h | ||
83 | +</path> | ||
84 | +</symbol> | ||
85 | +<symbol name="mark/fsquare(sfx)" transformations="translations"> | ||
86 | +<group> | ||
87 | +<path fill="sym-fill"> | ||
88 | +-0.5 -0.5 m | ||
89 | +0.5 -0.5 l | ||
90 | +0.5 0.5 l | ||
91 | +-0.5 0.5 l | ||
92 | +h | ||
93 | +</path> | ||
94 | +<path fill="sym-stroke" fillrule="eofill"> | ||
95 | +-0.6 -0.6 m | ||
96 | +0.6 -0.6 l | ||
97 | +0.6 0.6 l | ||
98 | +-0.6 0.6 l | ||
99 | +h | ||
100 | +-0.4 -0.4 m | ||
101 | +0.4 -0.4 l | ||
102 | +0.4 0.4 l | ||
103 | +-0.4 0.4 l | ||
104 | +h | ||
105 | +</path> | ||
106 | +</group> | ||
107 | +</symbol> | ||
108 | +<symbol name="mark/cross(sx)" transformations="translations"> | ||
109 | +<group> | ||
110 | +<path fill="sym-stroke"> | ||
111 | +-0.43 -0.57 m | ||
112 | +0.57 0.43 l | ||
113 | +0.43 0.57 l | ||
114 | +-0.57 -0.43 l | ||
115 | +h | ||
116 | +</path> | ||
117 | +<path fill="sym-stroke"> | ||
118 | +-0.43 0.57 m | ||
119 | +0.57 -0.43 l | ||
120 | +0.43 -0.57 l | ||
121 | +-0.57 0.43 l | ||
122 | +h | ||
123 | +</path> | ||
124 | +</group> | ||
125 | +</symbol> | ||
126 | +<symbol name="arrow/fnormal(spx)"> | ||
127 | +<path stroke="sym-stroke" fill="white" pen="sym-pen"> | ||
128 | +0 0 m | ||
129 | +-1 0.333 l | ||
130 | +-1 -0.333 l | ||
131 | +h | ||
132 | +</path> | ||
133 | +</symbol> | ||
134 | +<symbol name="arrow/pointed(spx)"> | ||
135 | +<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen"> | ||
136 | +0 0 m | ||
137 | +-1 0.333 l | ||
138 | +-0.8 0 l | ||
139 | +-1 -0.333 l | ||
140 | +h | ||
141 | +</path> | ||
142 | +</symbol> | ||
143 | +<symbol name="arrow/fpointed(spx)"> | ||
144 | +<path stroke="sym-stroke" fill="white" pen="sym-pen"> | ||
145 | +0 0 m | ||
146 | +-1 0.333 l | ||
147 | +-0.8 0 l | ||
148 | +-1 -0.333 l | ||
149 | +h | ||
150 | +</path> | ||
151 | +</symbol> | ||
152 | +<symbol name="arrow/linear(spx)"> | ||
153 | +<path stroke="sym-stroke" pen="sym-pen"> | ||
154 | +-1 0.333 m | ||
155 | +0 0 l | ||
156 | +-1 -0.333 l | ||
157 | +</path> | ||
158 | +</symbol> | ||
159 | +<symbol name="arrow/fdouble(spx)"> | ||
160 | +<path stroke="sym-stroke" fill="white" pen="sym-pen"> | ||
161 | +0 0 m | ||
162 | +-1 0.333 l | ||
163 | +-1 -0.333 l | ||
164 | +h | ||
165 | +-1 0 m | ||
166 | +-2 0.333 l | ||
167 | +-2 -0.333 l | ||
168 | +h | ||
169 | +</path> | ||
170 | +</symbol> | ||
171 | +<symbol name="arrow/double(spx)"> | ||
172 | +<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen"> | ||
173 | +0 0 m | ||
174 | +-1 0.333 l | ||
175 | +-1 -0.333 l | ||
176 | +h | ||
177 | +-1 0 m | ||
178 | +-2 0.333 l | ||
179 | +-2 -0.333 l | ||
180 | +h | ||
181 | +</path> | ||
182 | +</symbol> | ||
183 | +<pen name="heavier" value="0.8"/> | ||
184 | +<pen name="fat" value="1.2"/> | ||
185 | +<pen name="ultrafat" value="2"/> | ||
186 | +<symbolsize name="large" value="5"/> | ||
187 | +<symbolsize name="small" value="2"/> | ||
188 | +<symbolsize name="tiny" value="1.1"/> | ||
189 | +<arrowsize name="large" value="10"/> | ||
190 | +<arrowsize name="small" value="5"/> | ||
191 | +<arrowsize name="tiny" value="3"/> | ||
192 | +<color name="red" value="1 0 0"/> | ||
193 | +<color name="green" value="0 1 0"/> | ||
194 | +<color name="blue" value="0 0 1"/> | ||
195 | +<color name="yellow" value="1 1 0"/> | ||
196 | +<color name="orange" value="1 0.647 0"/> | ||
197 | +<color name="gold" value="1 0.843 0"/> | ||
198 | +<color name="purple" value="0.627 0.125 0.941"/> | ||
199 | +<color name="gray" value="0.745"/> | ||
200 | +<color name="brown" value="0.647 0.165 0.165"/> | ||
201 | +<color name="navy" value="0 0 0.502"/> | ||
202 | +<color name="pink" value="1 0.753 0.796"/> | ||
203 | +<color name="seagreen" value="0.18 0.545 0.341"/> | ||
204 | +<color name="turquoise" value="0.251 0.878 0.816"/> | ||
205 | +<color name="violet" value="0.933 0.51 0.933"/> | ||
206 | +<color name="darkblue" value="0 0 0.545"/> | ||
207 | +<color name="darkcyan" value="0 0.545 0.545"/> | ||
208 | +<color name="darkgray" value="0.663"/> | ||
209 | +<color name="darkgreen" value="0 0.392 0"/> | ||
210 | +<color name="darkmagenta" value="0.545 0 0.545"/> | ||
211 | +<color name="darkorange" value="1 0.549 0"/> | ||
212 | +<color name="darkred" value="0.545 0 0"/> | ||
213 | +<color name="lightblue" value="0.678 0.847 0.902"/> | ||
214 | +<color name="lightcyan" value="0.878 1 1"/> | ||
215 | +<color name="lightgray" value="0.827"/> | ||
216 | +<color name="lightgreen" value="0.565 0.933 0.565"/> | ||
217 | +<color name="lightyellow" value="1 1 0.878"/> | ||
218 | +<dashstyle name="dashed" value="[4] 0"/> | ||
219 | +<dashstyle name="dotted" value="[1 3] 0"/> | ||
220 | +<dashstyle name="dash dotted" value="[4 2 1 2] 0"/> | ||
221 | +<dashstyle name="dash dot dotted" value="[4 2 1 2 1 2] 0"/> | ||
222 | +<textsize name="large" value="\large"/> | ||
223 | +<textsize name="Large" value="\Large"/> | ||
224 | +<textsize name="LARGE" value="\LARGE"/> | ||
225 | +<textsize name="huge" value="\huge"/> | ||
226 | +<textsize name="Huge" value="\Huge"/> | ||
227 | +<textsize name="small" value="\small"/> | ||
228 | +<textsize name="footnote" value="\footnotesize"/> | ||
229 | +<textsize name="tiny" value="\tiny"/> | ||
230 | +<textstyle name="center" begin="\begin{center}" end="\end{center}"/> | ||
231 | +<textstyle name="itemize" begin="\begin{itemize}" end="\end{itemize}"/> | ||
232 | +<textstyle name="item" begin="\begin{itemize}\item{}" end="\end{itemize}"/> | ||
233 | +<gridsize name="4 pts" value="4"/> | ||
234 | +<gridsize name="8 pts (~3 mm)" value="8"/> | ||
235 | +<gridsize name="16 pts (~6 mm)" value="16"/> | ||
236 | +<gridsize name="32 pts (~12 mm)" value="32"/> | ||
237 | +<gridsize name="10 pts (~3.5 mm)" value="10"/> | ||
238 | +<gridsize name="20 pts (~7 mm)" value="20"/> | ||
239 | +<gridsize name="14 pts (~5 mm)" value="14"/> | ||
240 | +<gridsize name="28 pts (~10 mm)" value="28"/> | ||
241 | +<gridsize name="56 pts (~20 mm)" value="56"/> | ||
242 | +<anglesize name="90 deg" value="90"/> | ||
243 | +<anglesize name="60 deg" value="60"/> | ||
244 | +<anglesize name="45 deg" value="45"/> | ||
245 | +<anglesize name="30 deg" value="30"/> | ||
246 | +<anglesize name="22.5 deg" value="22.5"/> | ||
247 | +<opacity name="10%" value="0.1"/> | ||
248 | +<opacity name="30%" value="0.3"/> | ||
249 | +<opacity name="50%" value="0.5"/> | ||
250 | +<opacity name="75%" value="0.75"/> | ||
251 | +<tiling name="falling" angle="-60" step="4" width="1"/> | ||
252 | +<tiling name="rising" angle="30" step="4" width="1"/> | ||
253 | +</ipestyle> | ||
254 | +<page> | ||
255 | +<layer name="alpha"/> | ||
256 | +<view layers="alpha" active="alpha"/> | ||
257 | +<path layer="alpha" matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat"> | ||
258 | +64 704 m | ||
259 | +320 704 l | ||
260 | +</path> | ||
261 | +<path matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat"> | ||
262 | +96 704 m | ||
263 | +32.249 0 0 -32.249 128 708 160 704 a | ||
264 | +</path> | ||
265 | +<path matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat"> | ||
266 | +208 704 m | ||
267 | +25.2982 0 0 -25.2982 232 696 256 704 a | ||
268 | +</path> | ||
269 | +<path matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat"> | ||
270 | +128 704 m | ||
271 | +77.746 0 0 -77.746 200 674.667 272 704 a | ||
272 | +</path> | ||
273 | +<path matrix="1 0 0 1 256 64" stroke="black" pen="ultrafat"> | ||
274 | +64 704 m | ||
275 | +320 704 l | ||
276 | +</path> | ||
277 | +<path matrix="1 0 0 1 256 64" stroke="black" pen="ultrafat"> | ||
278 | +208 704 m | ||
279 | +25.2982 0 0 -25.2982 232 696 256 704 a | ||
280 | +</path> | ||
281 | +<path matrix="1 0 0 1 256 64" stroke="black" pen="ultrafat"> | ||
282 | +128 704 m | ||
283 | +77.746 0 0 -77.746 200 674.667 272 704 a | ||
284 | +</path> | ||
285 | +<path matrix="1 0 0 1 256 -96" stroke="black" pen="ultrafat"> | ||
286 | +64 704 m | ||
287 | +320 704 l | ||
288 | +</path> | ||
289 | +<path matrix="1 0 0 1 256 -96" stroke="black" pen="ultrafat"> | ||
290 | +96 704 m | ||
291 | +32.249 0 0 -32.249 128 708 160 704 a | ||
292 | +</path> | ||
293 | +<text matrix="1 0 0 1 0 -16" transformations="translations" pos="64 688" stroke="black" type="label" width="202.242" height="17.213" depth="4.82" valign="baseline" size="Huge">structure $y$ with PK</text> | ||
294 | +<path stroke="black" pen="ultrafat" arrow="normal/normal"> | ||
295 | +256 736 m | ||
296 | +304 752 l | ||
297 | +</path> | ||
298 | +<path stroke="black" pen="ultrafat" arrow="normal/normal"> | ||
299 | +288 672 m | ||
300 | +320 640 l | ||
301 | +</path> | ||
302 | +<text matrix="1 0 0 1 32 -16" transformations="translations" pos="384 752" stroke="black" type="label" width="60.952" height="16.741" depth="4.02" valign="baseline" size="huge">level $y^1$</text> | ||
303 | +<text transformations="translations" pos="416 576" stroke="black" type="label" width="60.952" height="16.741" depth="4.02" valign="baseline" size="huge">level $y^2$</text> | ||
304 | +<text transformations="translations" pos="432 672" stroke="black" type="label" width="17.843" height="13.97" depth="1.57" valign="baseline" size="Huge">+</text> | ||
305 | +</page> | ||
306 | +</ipe> |
doc/levels.png
0 → 100644
![](/lbecquey/biorseo/raw/8fa60daca086b868aca15973300d562d2f97a6ea/doc/levels.png)
19.8 KB
doc/logoIBISC.png
0 → 100644
![](/lbecquey/biorseo/raw/8fa60daca086b868aca15973300d562d2f97a6ea/doc/logoIBISC.png)
14.9 KB
doc/minisaclay.png
0 → 100644
![](/lbecquey/biorseo/raw/8fa60daca086b868aca15973300d562d2f97a6ea/doc/minisaclay.png)
4.5 KB
doc/motif_forcing_constraints.pdf
0 → 100644
No preview for this file type
doc/motif_forcing_constraints.tex
0 → 100644
This diff is collapsed. Click to expand it.
rna1999.dG
0 → 100644
This diff is collapsed. Click to expand it.
searchForMotifSites.py
0 → 100755
This diff is collapsed. Click to expand it.
src/Component.h
deleted
100644 → 0
1 | -#ifndef COMPONENT_H | ||
2 | -#define COMPONENT_H | ||
3 | - | ||
4 | -#include <string> | ||
5 | - | ||
6 | -class Component | ||
7 | -{ | ||
8 | - public: | ||
9 | - Component(); | ||
10 | - Component(std::string& cons_seq, uint k); | ||
11 | - private: | ||
12 | - std::string _cons_seq; | ||
13 | - uint k; | ||
14 | -}; | ||
15 | - | ||
16 | - | ||
17 | -istream& operator>>(istream& is, Component& m); | ||
18 | -ostream& operator<<(ostream& os, const Component& m); | ||
19 | - | ||
20 | -#endif | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
src/Motif.cpp
deleted
100644 → 0
1 | -#include "Motif.h" | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
src/Motif.h
deleted
100644 → 0
1 | -#ifndef MOTIF_H | ||
2 | -#define MOTIF_H | ||
3 | - | ||
4 | -#include <vector> | ||
5 | -#include <string> | ||
6 | -#include "Component.h" | ||
7 | - | ||
8 | -class Motif | ||
9 | -{ | ||
10 | - public: | ||
11 | - Motif(); | ||
12 | - Motif(std::string filename); | ||
13 | - ~Motif(); | ||
14 | - std::vector<Component>& getComponents() const; | ||
15 | - Component& getComponent(uint k) const; | ||
16 | - std::string name; | ||
17 | - | ||
18 | - private: | ||
19 | - std::vector<Component> _comps; | ||
20 | - | ||
21 | -}; | ||
22 | - | ||
23 | -istream& operator>>(istream& is, Motif& m); | ||
24 | -ostream& operator<<(ostream& os, const Motif& m); | ||
25 | - | ||
26 | -#endif | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
src/RNA.cpp
deleted
100644 → 0
src/RNA.h
deleted
100644 → 0
1 | -#ifndef RNA_H | ||
2 | -#define RNA_H | ||
3 | - | ||
4 | -#include <string> | ||
5 | - | ||
6 | -class RNA { | ||
7 | - public: | ||
8 | - RNA(); | ||
9 | - RNA(std::string); | ||
10 | - ~RNA(); | ||
11 | - uint length() const; | ||
12 | - std::string str() const; | ||
13 | - // char seq(uint k) const; | ||
14 | - // bool isConsensus() const; | ||
15 | - // bool containsPseudoBases() const; | ||
16 | - private: | ||
17 | - std::string _seq; | ||
18 | - // bool _coding; | ||
19 | - // bool _consensus; | ||
20 | - // bool _containsPseudoNTs; | ||
21 | -}; | ||
22 | - | ||
23 | -inline uint RNA::length() const { return _seq.length(); } | ||
24 | -inline std::string RNA::str() const { return _seq; } | ||
25 | - | ||
26 | -#endif | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
src/main.cpp
deleted
100644 → 0
1 | -#include "Motif.h" | ||
2 | -#include <cstdlib> | ||
3 | -#include <iostream> | ||
4 | -#include <unistd.h> | ||
5 | -#include <boost/program_options.hpp> | ||
6 | -#include <boost/filesystem.hpp> | ||
7 | - | ||
8 | -#include "RNA.h" | ||
9 | - | ||
10 | -using namespace std; | ||
11 | -namespace bpo = boost::program_options; | ||
12 | -namespace bf = boost::filesystem; | ||
13 | - | ||
14 | -bool checkMotifFolder(bf::path & folder) | ||
15 | -{ | ||
16 | - if (not(bf::is_directory(folder) and bf::exists(folder))) return false; | ||
17 | - bf::directory_iterator end_itr; | ||
18 | - uint Ndesc = 0; | ||
19 | - for (bf::directory_iterator itr(folder); itr != end_itr; ++itr) | ||
20 | - { | ||
21 | - if (itr->path().leaf().string().find(string(".desc")) != std::string::npos) | ||
22 | - Ndesc++; | ||
23 | - } | ||
24 | - cout << "Found " << Ndesc << " .desc files in " << folder.string() << endl; | ||
25 | - if (Ndesc > 0) | ||
26 | - return true; | ||
27 | - return false; | ||
28 | -} | ||
29 | - | ||
30 | -int parseMotifs(bf::path & folder) | ||
31 | -{ | ||
32 | - bf::directory_iterator end_itr; | ||
33 | - for (bf::directory_iterator itr(folder); itr != end_itr; ++itr) | ||
34 | - { | ||
35 | - if (itr->path().leaf().string().find(string(".desc")) != std::string::npos) | ||
36 | - { | ||
37 | - | ||
38 | - } | ||
39 | - } | ||
40 | - | ||
41 | -} | ||
42 | - | ||
43 | -int main(int argc, char** argv) | ||
44 | -{ | ||
45 | - bf::path currentFolder(bf::current_path()); | ||
46 | - bf::path motifFolderPath(currentFolder); // By default, searching here... | ||
47 | - bpo::options_description desc("Options"); | ||
48 | - bpo::variables_map options_map; | ||
49 | - RNA querySequence; | ||
50 | - | ||
51 | - desc.add_options() | ||
52 | - ("help", "Print this help message") | ||
53 | - ("seq,s", bpo::value<string>()->required(), "RNA sequence to find motives in.") | ||
54 | - ("motifs,m", bpo::value<bf::path>(), "Path to folder of DESC files describing the motifs to be used") | ||
55 | - ; | ||
56 | - bpo::store(bpo::parse_command_line(argc, argv, desc), options_map); | ||
57 | - bpo::notify(options_map); | ||
58 | - | ||
59 | - if (options_map.count("help")) { | ||
60 | - cout << desc << endl; | ||
61 | - return EXIT_SUCCESS; | ||
62 | - } | ||
63 | - if (options_map.count("motifs")) { | ||
64 | - motifFolderPath = options_map["motifs"].as<bf::path>(); | ||
65 | - if (not(checkMotifFolder(motifFolderPath))) { return EXIT_FAILURE; } | ||
66 | - } | ||
67 | - if (options_map.count("seq")) { | ||
68 | - querySequence = RNA(options_map["seq"].as<string>()); | ||
69 | - } | ||
70 | - cout << "Working with sequence " << querySequence.str() << " (length " << querySequence.length() << ")." << endl; | ||
71 | - | ||
72 | - return EXIT_SUCCESS; | ||
73 | -} |
-
Please register or login to post a comment