Louis BECQUEY

Monoobjective resolution

1 # Prerequisites 1 # Prerequisites
2 *.d 2 *.d
3 3
4 +# LaTeX temporary files
5 +doc/*.toc
6 +doc/*.bbl
7 +doc/*.gz
8 +
4 # Compiled Object files 9 # Compiled Object files
5 *.slo 10 *.slo
6 *.lo 11 *.lo
......
1 +# Please set the following variables to the correct paths for JAR3D:
2 +jar3dexec="/home/persalteas/Software/jar3dbin/jar3d_2014-12-11.jar"
3 +ILmotifDir="/home/persalteas/Data/RNA/motifs/Matlab_results/IL/3.2/lib"
4 +HLmotifDir="/home/persalteas/Data/RNA/motifs/Matlab_results/HL/3.2/lib"
...\ No newline at end of file ...\ No newline at end of file
1 -# ------------------------------------------------ 1 +ICONCERT=/opt/ibm/ILOG/CPLEX_Studio_Community128/concert/include
2 -# Generic Makefile 2 +ICPLEX=/opt/ibm/ILOG/CPLEX_Studio_Community128/cplex/include
3 -# ------------------------------------------------ 3 +LCONCERT=/opt/ibm/ILOG/CPLEX_Studio_Community128/concert/lib/x86-64_linux/static_pic/
4 +LCPLEX=/opt/ibm/ILOG/CPLEX_Studio_Community128/cplex/lib/x86-64_linux/static_pic/
4 5
5 # project name (generate executable with this name) 6 # project name (generate executable with this name)
6 -TARGET = motifscan 7 +TARGET = biominserter
7 8
8 CC = g++ 9 CC = g++
9 # compiling flags here 10 # compiling flags here
10 -CFLAGS = -I. -O3 11 +CFLAGS = -Icppsrc/ -I$(ICONCERT) -I$(ICPLEX) -O3
11 -CXXFLAGS = -std=c++17 -Wall -Wpedantic -Wextra 12 +CXXFLAGS = -std=c++17 -Wall -Wpedantic -Wextra -Wno-ignored-attributes
12 13
13 LINKER = g++ 14 LINKER = g++
14 # linking flags here 15 # linking flags here
15 -LDFLAGS = -lboost_system -lboost_filesystem -lboost_program_options 16 +LDFLAGS = -lconcert -lilocplex -lcplex -lm -lpthread -ldl -lboost_system -lboost_filesystem -lboost_program_options -L$(LCONCERT) -L$(LCPLEX)
16 17
17 # change these to proper directories where each file should be 18 # change these to proper directories where each file should be
18 -SRCDIR = src 19 +SRCDIR = cppsrc
19 OBJDIR = obj 20 OBJDIR = obj
20 BINDIR = bin 21 BINDIR = bin
21 22
......
1 -# RNA_Motifs_Inserter 1 +This is a bi-objective integer programming algorithm.
2 -A C++ software interface to RNA motif databases, and algorithms to include known motifs in raw sequences. 2 +It predicts the secondary structure of a RNA sequence with pieces of 3D information (non-canonical contacts) at some places,
3 +by identifying zones that can fold like known motifs.
4 +
5 +1/ How it works
6 +===================================
7 +INPUT:
8 +- An RNA sequence (tested with sequences ~100 bases)
9 +
10 +THEN
11 +- Identifies possible 2D folds with RNAsubopt.
12 +- Knowing possible 2D folds, locate every possibly unpaired loop (hairpin loop, internal loop, multiple junction...)
13 +- align each unpaired loop to the catalogue of models of known RNA motifs (The 3D Motif Atlas of the BGSU RNA group)
14 +- retrieve a list of potential motif-insertion-sites in the RNA sequence. Use them to define the constraints for the IP problem.
15 +- Solve a bi-objective IP problem:
16 + * Maximize the expected accuracy of the secondary structure,
17 + * Maximize the number and size of motifs inserted in the structure.
18 +
19 +OUTPUT:
20 +- A set of secondary structures from the pareto front,
21 +- The list of known motif inserted in the corresponding structures (and the non-canonical contacts)
22 +- (Optionally, lower score structures from k-Pareto sets.)
23 +
24 +2/ Installation
25 +==================================
26 +- Download and install RNAsubopt from the ViennaRNA package (https://www.tbi.univie.ac.at/RNA/)
27 +- Download and install IBM ILOG Cplex optimization studio (https://www.ibm.com/analytics/cplex-optimizer), free academic account required
28 +- Download and install Java runtime (Tested with Java 10)
29 +- Download and install the latest JAR3D executable "jar3d_releasedate.jar" and motif models in this folder (http://rna.bgsu.edu/data/jar3d/models/)
30 + Note that for HL and ILs, only the latest version is required (not all the versions provided in the folders).
31 +- Download and install a C++ compiler and building dependencies and utilities (g++ or clang, automake, libboost)
......
1 +#include "MOIP.h"
2 +#include "SecondaryStructure.h"
3 +#include "rna.h"
4 +#include <algorithm>
5 +#include <boost/format.hpp>
6 +#include <cfloat>
7 +#include <cmath>
8 +#include <cstdlib>
9 +#include <iostream>
10 +#include <limits>
11 +#include <stdexcept>
12 +#include <utility>
13 +#include <vector>
14 +
15 +using std::cerr, std::cout, std::endl;
16 +using std::make_pair;
17 +using std::vector;
18 +
19 +uint MOIP::ncores = 0;
20 +
21 +MOIP::MOIP(const RNA& rna, const vector<Motif>& insertionSites)
22 +: rna_(rna), insertion_sites_(insertionSites), beta_(1.0), theta_{1.0 / (2.0 + 1.0)}
23 +{
24 + basepair_dv_ = IloNumVarArray(env_);
25 + insertion_dv_ = IloNumVarArray(env_);
26 +
27 + // Add the y^u_v decision variables
28 + uint u, v, c = 0;
29 + index_of_yuv_ = vector<vector<size_t>>(rna_.get_RNA_length() - 6, vector<size_t>(0));
30 + for (u = 0; u < rna_.get_RNA_length() - 6; u++) {
31 + for (v = u + 4; v < rna_.get_RNA_length(); v++) // A basepair is possible iff v > u+3
32 + {
33 + index_of_yuv_[u].push_back(c);
34 + c++;
35 + char name[15];
36 + sprintf(name, "y%d,%d", u, v);
37 + basepair_dv_.add(IloNumVar(env_, 0, 1, IloNumVar::Bool, name)); // A boolean whether u and v are paired
38 + }
39 + }
40 + // Add the Cx,i,p decision variables
41 + index_of_first_components.reserve(insertionSites.size());
42 + index_of_Cxip_ = vector<vector<size_t>>(0);
43 + index_of_Cxip_.reserve(insertionSites.size());
44 + size_t i = 0;
45 + for (const Motif m : insertionSites) {
46 + index_of_first_components.push_back(i);
47 + index_of_Cxip_.push_back(vector<size_t>(0));
48 + for (const Component c : m.comp) {
49 + index_of_Cxip_.back().push_back(i);
50 + if (c.k > 0) i++;
51 + char name[20];
52 + sprintf(
53 + name,
54 + "C%d,%d-%d",
55 + static_cast<int>(index_of_Cxip_.size() - 1),
56 + static_cast<int>(index_of_Cxip_.back().size() - 1),
57 + c.pos.first);
58 + insertion_dv_.add(IloNumVar(env_, 0, 1, IloNumVar::Bool, name)); // A boolean whether component i of motif x is inserted at position p
59 + }
60 + }
61 + cout << i + c << " decision variables are used !" << endl;
62 +}
63 +
64 +
65 +MOIP::~MOIP() { env_.end(); }
66 +
67 +
68 +bool MOIP::is_undominated_yet(const SecondaryStructure& s)
69 +{
70 + for (uint i = 0; i < pareto_.size(); i++) {
71 + if (pareto_[i] > s) return false;
72 + }
73 + return true;
74 +}
75 +
76 +void MOIP::solve_objective(int o, double min, double max)
77 +{
78 + IloModel model_ = IloModel(env_);
79 + cout << "Solving objective function " << o << "..." << endl;
80 + add_problem_constraints(model_);
81 + switch (o) {
82 + case 1: {
83 + // Add the motif objective function
84 + IloExpr obj1 = IloExpr(env_);
85 + for (uint i = 0; i < insertion_sites_.size(); i++) {
86 + IloNum n_compo_squared = IloNum(insertion_sites_[i].comp.size() * insertion_sites_[i].comp.size());
87 + obj1 += n_compo_squared * insertion_dv_[index_of_first_components[i]];
88 + }
89 + model_.add(IloMinimize(env_, obj1));
90 + } break;
91 + case 2: {
92 + // Add the likelihood objective function
93 + }
94 + }
95 + IloCplex cplex_ = IloCplex(model_);
96 + if (!cplex_.solve()) {
97 + env_.error() << "\t>Failed to optimize LP." << endl;
98 + throw(-1);
99 + }
100 + IloNumArray basepair_values(env_);
101 + IloNumArray insertion_values(env_);
102 + env_.out() << endl << "Solution status = " << cplex_.getStatus() << endl;
103 + env_.out() << endl << "Objective value = " << cplex_.getObjValue() << endl;
104 + cplex_.getValues(basepair_values, basepair_dv_);
105 + env_.out() << endl << "Basepair Values = " << basepair_values << endl;
106 + cplex_.getValues(insertion_values, basepair_dv_);
107 + env_.out() << endl << "Insertion Values = " << insertion_values << endl;
108 +
109 + // TODO : retrieve the secondary structure !!
110 +}
111 +
112 +void MOIP::add_problem_constraints(const IloModel& model_)
113 +{
114 + // ensure there only is 0 or 1 pairing by nucleotide:
115 + cout << "\t>ensuring there are 0 or 1 pairing by nucleotide..." << endl;
116 + uint u, v;
117 + uint n = rna_.get_RNA_length();
118 + for (u = 0; u < n - 6; u++) {
119 + IloExpr c1(env_);
120 + for (v = 0; v < u; v++)
121 + if (allowed_basepair(v, u)) c1 += y(v, u);
122 + for (v = u + 4; v < n; v++)
123 + if (allowed_basepair(u, v)) c1 += y(u, v);
124 + model_.add(c1 <= 1);
125 + // cout << "\t>It worked for base " << u << " : " << (c1 <= 1) << endl;
126 + }
127 + // forbid lonely basepairs
128 + cout << "\t>forbidding lonely basepairs..." << endl;
129 + for (u = 0; u < n - 6; u++) {
130 + IloExpr c2(env_); // for the case where s[u] is paired to s[v], v>u
131 + c2 += 1;
132 + for (v = u; v < n; v++)
133 + if (allowed_basepair(u - 1, v)) c2 += y(u - 1, v);
134 + for (v = u + 1; v < n; v++)
135 + if (allowed_basepair(u, v)) c2 -= y(u, v);
136 + for (v = u + 2; v < n; v++)
137 + if (allowed_basepair(u + 1, v)) c2 += y(u + 1, v);
138 + model_.add(c2 >= 1);
139 + // cout << "\t>It worked for base " << u << " : " << (c2 >= 1) << endl;
140 + }
141 + for (v = 5; v < n; v++) {
142 + IloExpr c2p(env_); // for the case where s[u] is paired to s[v], v<u
143 + c2p += 1;
144 + for (u = 1; u <= v - 2; u++)
145 + if (allowed_basepair(u, v - 1)) c2p += y(u, v - 1);
146 + for (u = 1; u <= v - 1; u++)
147 + if (allowed_basepair(u, v)) c2p -= y(u, v);
148 + for (u = 1; u <= v; u++)
149 + if (allowed_basepair(u, v + 1)) c2p += y(u, v + 1);
150 + model_.add(c2p >= 1);
151 + // cout << "\t>It worked for base " << u << " : " << (c2p >= 1) << endl;
152 + }
153 + // Forbid pairings inside every motif component if included
154 + cout << "\t>forbidding basepairs inside included motif's components..." << endl;
155 + for (size_t i = 0; i < insertion_sites_.size(); i++) {
156 + Motif& x = insertion_sites_[i];
157 + for (size_t j = 0; j < x.comp.size(); j++) {
158 + Component& c = x.comp[j];
159 + IloExpr c3(env_);
160 + IloNum kxi = IloNum(c.k);
161 + c3 += kxi * C(i, j);
162 + for (u = c.pos.first; u < c.pos.second; u++) {
163 + for (v = 0; v < n; v++)
164 + if (allowed_basepair(u, v)) c3 += y(u, v);
165 + }
166 + model_.add(c3 <= kxi);
167 + }
168 + }
169 + // To be continued ...
170 +}
171 +
172 +void MOIP::extend_pareto(double lambdaMin, double lambdaMax)
173 +{
174 + if (lambdaMin >= lambdaMax) {
175 + cerr << "lambdaMax < lambdaMin, something's wrong !" << endl;
176 + exit(EXIT_FAILURE);
177 + }
178 +
179 + // for any SecondaryStructure in pareto_ such that the value of the second
180 + // objective is between lambdaMin and lambdaMax
181 + // a DIFF() constraint and a mirror constraint is added
182 + for (uint i = 0; i < pareto_.size(); i++) {
183 + // DIFF()
184 + if (
185 + (abs(pareto_[i].get_objective_score(2) - lambdaMin) < PRECISION or pareto_[i].get_objective_score(2) > lambdaMin) and
186 + (abs(pareto_[i].get_objective_score(2) - lambdaMax) < PRECISION or pareto_[i].get_objective_score(2) < lambdaMax)) {
187 + // ip.add_bj_ct(pareto_[i]);
188 + }
189 + // mirror
190 + // if (
191 + // (abs(pareto_[i].get_obj2M_() - lambdaMin) < PRECISION or pareto_[i].get_obj2M_() > lambdaMin) and
192 + // (abs(pareto_[i].get_obj2M_() - lambdaMax) < PRECISION or pareto_[i].get_obj2M_() < lambdaMax)) {
193 + // ip.add_bj_ct_M(pareto_[i]);
194 + // }
195 + }
196 +
197 + // SecondaryStructure s = solve_objective(1, lambdaMin, lambdaMax);
198 +
199 + // if (is_undominated_yet(s)) {
200 + // // adding the SecondaryStructure s to the set pareto_
201 + // add_solution(s);
202 + // // run localPareto above the SecondaryStructure s
203 + // extend_pareto(s.get_objective_score(2), lambdaMax);
204 + // }
205 +}
206 +
207 +size_t MOIP::get_yuv_index(size_t u, size_t v) const
208 +{
209 + size_t a, b;
210 + a = (u < v) ? u : v;
211 + b = (u > v) ? u : v;
212 + return index_of_yuv_[a][b - 4 - a];
213 +}
214 +
215 +size_t MOIP::get_Cpxi_index(size_t x_i, size_t i_on_j) const { return index_of_Cxip_[x_i][i_on_j]; }
216 +
217 +
218 +bool MOIP::allowed_basepair(size_t u, size_t v) const
219 +{
220 + size_t a, b;
221 + a = (v > u) ? u : v;
222 + b = (v > u) ? v : u;
223 + if (b - a < 4) return false;
224 + if (a >= rna_.get_RNA_length() - 6) return false;
225 + if (b >= rna_.get_RNA_length()) return false;
226 + return true;
227 +}
...\ No newline at end of file ...\ No newline at end of file
1 +#ifndef MOIP_H_
2 +#define MOIP_H_
3 +
4 +#define IL_STD
5 +
6 +#include "SecondaryStructure.h"
7 +#include "rna.h"
8 +#include <ilcplex/ilocplex.h>
9 +
10 +using std::vector;
11 +
12 +const double PRECISION = 0.0001;
13 +
14 +
15 +class MOIP
16 +{
17 + public:
18 + static uint ncores;
19 + typedef enum { MIN, MAX } DirType;
20 + typedef enum { FR, LO, UP, DB, FX } BoundType;
21 + MOIP(const RNA& rna, const vector<Motif>& motifSites);
22 + ~MOIP(void);
23 + void solve_objective(int o, double min, double max);
24 + uint get_n_solutions(void) const;
25 + const SecondaryStructure& solution(uint i) const;
26 + void extend_pareto(double lambdaMin, double lambdaMax);
27 + bool allowed_basepair(size_t u, size_t v) const;
28 + void add_solution(const SecondaryStructure& s);
29 +
30 + private:
31 + bool is_undominated_yet(const SecondaryStructure& s);
32 + void add_problem_constraints(const IloModel& model_);
33 + size_t get_yuv_index(size_t u, size_t v) const;
34 + size_t get_Cpxi_index(size_t x_i, size_t i_on_j) const;
35 + IloNumExprArg& y(size_t u, size_t v); // Direct reference to y^u_v in basepair_dv_
36 + IloNumExprArg& C(size_t x, size_t i); // Direct reference to C_p^xi in insertion_dv_
37 +
38 +
39 + RNA rna_; // RNA object
40 + vector<Motif> insertion_sites_; // Potential Motif insertion sites
41 + const float beta_; // beta parameter of the probability function
42 + double lambdaMin_; // minimum threshold value for the probability value
43 + double lambdaMax_; // maximum threshold value for the probability value
44 + int vp_; // vp_ variable for penalization of the probability score
45 + float theta_; // theta parameter for the probability function
46 + IloEnv env_; // environment CPLEX object
47 + IloNumVarArray basepair_dv_; // Decision variables
48 + IloNumVarArray insertion_dv_; // Decision variables
49 + vector<SecondaryStructure> pareto_; // Vector of results
50 + vector<vector<size_t>> index_of_Cxip_; // Stores the indexes of the Cxip in insertion_dv_
51 + vector<vector<size_t>> index_of_yuv_; // Stores the indexes of the y^u_v in basepair_dv_ in a complex way. Use get_yuv_index(u,v) to retrieve.
52 + vector<size_t> index_of_first_components; // Stores the indexes of Cx1p in insertion_dv_
53 +};
54 +
55 +inline void MOIP::add_solution(const SecondaryStructure& s) { pareto_.push_back(s); }
56 +inline uint MOIP::get_n_solutions(void) const { return pareto_.size(); }
57 +inline const SecondaryStructure& MOIP::solution(uint i) const { return pareto_[i]; }
58 +inline IloNumExprArg& MOIP::y(size_t u, size_t v) { return basepair_dv_[get_yuv_index(u, v)]; }
59 +inline IloNumExprArg& MOIP::C(size_t x, size_t i) { return insertion_dv_[get_Cpxi_index(x, i)]; }
60 +
61 +#endif // MOIP_H_
...\ No newline at end of file ...\ No newline at end of file
1 +#include "SecondaryStructure.h"
2 +#include <boost/format.hpp>
3 +
4 +static const double PRECISION(0.0001);
5 +
6 +SecondaryStructure::SecondaryStructure(const vector<double>& scores, const vector<bool>& decision_variables, VII coord, int RNAlength)
7 +: objective_scores_(scores), dv_(decision_variables), coord_(coord), n_(RNAlength)
8 +{
9 +}
10 +
11 +string SecondaryStructure::to_DBN(void) const
12 +{
13 + string res(n_, '.');
14 + for (size_t i = 0; i < n_; i++) {
15 + if (dv_[i]) {
16 + res[coord_[i].first] = '(';
17 + res[coord_[i].second] = ')';
18 + }
19 + }
20 + return res;
21 +}
22 +
23 +string SecondaryStructure::to_string(void) const
24 +{
25 + return to_DBN() + "\t" + boost::str(boost::format("%.6f") % objective_scores_[0]) + "\t" +
26 + boost::str(boost::format("%.6f") % objective_scores_[1]);
27 +}
28 +
29 +bool operator>(const SecondaryStructure& s1, const SecondaryStructure& s2)
30 +{
31 + double s11 = s1.get_objective_score(0);
32 + double s12 = s1.get_objective_score(1);
33 + double s21 = s2.get_objective_score(0);
34 + double s22 = s2.get_objective_score(1);
35 +
36 + bool obj1 = false, obj2 = false, strict1 = false, strict2 = false;
37 +
38 + if (s11 > s21) {
39 + strict1 = true;
40 + obj1 = true;
41 + } else if (abs(s11 - s21) < PRECISION) {
42 + obj1 = true;
43 + }
44 + if (s12 > s22) {
45 + strict2 = true;
46 + obj2 = true;
47 + } else if (abs(s12 - s22) < PRECISION) {
48 + obj2 = true;
49 + }
50 +
51 + if (obj1 && obj2 && (strict1 || strict2)) {
52 + return true;
53 + }
54 +
55 + return false;
56 +}
57 +
58 +bool operator<(const SecondaryStructure& s1, const SecondaryStructure& s2)
59 +{
60 + double s11 = s1.get_objective_score(0);
61 + double s12 = s1.get_objective_score(1);
62 + double s21 = s2.get_objective_score(0);
63 + double s22 = s2.get_objective_score(1);
64 +
65 + bool obj1 = false, obj2 = false, strict1 = false, strict2 = false;
66 +
67 + if (s11 < s21) {
68 + strict1 = true;
69 + obj1 = true;
70 + } else if (abs(s11 - s21) < PRECISION) {
71 + obj1 = true;
72 + }
73 + if (s12 < s22) {
74 + strict2 = true;
75 + obj2 = true;
76 + } else if (abs(s12 - s22) < PRECISION) {
77 + obj2 = true;
78 + }
79 +
80 + if (obj1 && obj2 && (strict1 || strict2)) {
81 + return true;
82 + }
83 + return false;
84 +}
1 +#ifndef __INC_IP_SOL__
2 +#define __INC_IP_SOL__
3 +
4 +#include "rna.h"
5 +#include <string>
6 +#include <vector>
7 +
8 +using std::string;
9 +using std::vector;
10 +
11 +typedef vector<int> VI;
12 +typedef vector<VI> VVI;
13 +typedef vector<std::pair<int, int>> VII;
14 +
15 +
16 +class SecondaryStructure
17 +{
18 + public:
19 + SecondaryStructure(void);
20 + SecondaryStructure(const vector<double>& scores, const vector<bool>& decision_variables, VII coord, int RNAlength);
21 +
22 + double get_objective_score(int i) const;
23 + const vector<bool>& get_decision_variables() const;
24 + bool get_decision_value(int i) const;
25 + VII get_coord() const;
26 + int get_RNA_length() const;
27 +
28 + void set_objective_score(int i, double s);
29 + string to_DBN() const;
30 + string to_string() const;
31 +
32 + private:
33 + vector<double> objective_scores_; // values of the different objective functions for that SecondaryStructure
34 + vector<bool> dv_; // values of the decision variable of the integer program
35 + vector<Motif> motif_info_; // information about known motives in this secondary structure and their positions
36 + VII coord_; // coordinates of the dv_. dv_[i] == true <==> coord_[i][0] paired to coord_[i][1];
37 + size_t n_; // length of the RNA
38 +};
39 +
40 +// return if this SecondaryStructure s1 dominates s2
41 +bool operator>(const SecondaryStructure& s1, const SecondaryStructure& s2);
42 +// return if this SecondaryStructure s2 dominates s1
43 +bool operator<(const SecondaryStructure& s1, const SecondaryStructure& s2);
44 +
45 +inline double SecondaryStructure::get_objective_score(int i) const { return objective_scores_[i]; }
46 +inline const vector<bool>& SecondaryStructure::get_decision_variables() const { return dv_; }
47 +inline void SecondaryStructure::set_objective_score(int i, double s) { objective_scores_[i - 1] = s; }
48 +inline VII SecondaryStructure::get_coord() const { return coord_; }
49 +inline int SecondaryStructure::get_RNA_length() const { return n_; }
50 +inline bool SecondaryStructure::get_decision_value(int i) const { return dv_[i]; }
51 +
52 +#endif
1 +/***
2 + Biominserter
3 + Louis Becquey, starting from Audrey Legendre's code
4 + nov 2018
5 +***/
6 +
7 +#include <algorithm>
8 +#include <boost/algorithm/string.hpp>
9 +#include <cstdlib>
10 +#include <iostream>
11 +#include <iterator>
12 +#include <string>
13 +#include <thread>
14 +#include <vector>
15 +
16 +#include "MOIP.h"
17 +#include "fa.h"
18 +#include "nupack.h"
19 +
20 +using namespace std;
21 +
22 +Motif parse_csv_line(string line)
23 +{
24 + vector<string> tokens;
25 + boost::split(tokens, line, boost::is_any_of(","));
26 + Motif m;
27 + m.atlas_id = tokens[0];
28 + m.comp.push_back(Component(make_pair<int, int>(stoi(tokens[3]), stoi(tokens[4])), stoi(tokens[2])));
29 + if (tokens[5] != "-")
30 + m.comp.push_back(Component(make_pair<int, int>(stoi(tokens[5]), stoi(tokens[6])), stoi(tokens[2])));
31 + m.reversed = (tokens[1] == "True");
32 + return m;
33 +}
34 +
35 +int main(int argc, char* argv[])
36 +{
37 + // float time;
38 + // clock_t t1, t2;
39 + // t1 = clock();
40 +
41 + MOIP::ncores = thread::hardware_concurrency() - 1;
42 +
43 + if (argc != 3) {
44 + cerr << argc << " arguments specified !" << endl;
45 + cerr << "Please specify the following input files:" << endl;
46 + cerr << "biominserter sequence.fasta insertion.sites.csv" << endl;
47 + return EXIT_FAILURE;
48 + }
49 +
50 +
51 + const char* inputName = argv[1];
52 + const char* csvname = argv[2];
53 + cout << "Reading input files..." << endl;
54 + if (access(inputName, F_OK) == -1) {
55 + cerr << inputName << " not found" << endl;
56 + return EXIT_FAILURE;
57 + }
58 + if (access(csvname, F_OK) == -1) {
59 + cerr << csvname << " not found" << endl;
60 + return EXIT_FAILURE;
61 + }
62 +
63 + // load fasta file
64 + list<Fasta> f;
65 + Fasta::load(f, inputName);
66 + list<Fasta>::iterator fa = f.begin();
67 + cout << "loading " << fa->name() << "..." << endl;
68 + RNA myRNA = RNA(fa->name(), fa->seq());
69 + cout << "\t>" << inputName << " successfuly loaded" << endl;
70 +
71 + // load CSV file
72 + string line;
73 + ifstream motifs = ifstream(csvname);
74 + getline(motifs, line); // skip header
75 + vector<Motif> posInsertionSites;
76 + while (getline(motifs, line)) {
77 + posInsertionSites.push_back(parse_csv_line(line));
78 + }
79 + cout << "\t>" << csvname << " successfuly loaded" << endl;
80 +
81 + // creating the Multi-Objective problem:
82 + MOIP myMOIP = MOIP(myRNA, posInsertionSites); // using the constructor with arguments automatically defines the decision variables.
83 +
84 + // finding the best SecondaryStructures for each objective
85 + double max = myRNA.get_RNA_length();
86 + try {
87 + myMOIP.solve_objective(1, -max, max);
88 + } catch (IloCplex::Exception& e) {
89 + cerr << e << endl;
90 + }
91 + // SecondaryStructure bestSSO1 = myMOIP.solve_objective(1, -max, max);
92 + // SecondaryStructure bestSSO2 = myMOIP.solve_objective(2, -max, max);
93 + // double bestObj2 = bestSSO2.get_objective_score(2);
94 +
95 + // extend to the whole pareto set
96 + // myMOIP.add_solution(bestSSO1);
97 + // myMOIP.extend_pareto(bestObj2, max);
98 +
99 + // print the pareto set
100 + // cout << "Structure \t Free energy score \t Expected accuracy score" << endl;
101 + // for (uint i = 0; i < myMOIP.get_n_solutions(); i++) {
102 + // cout << myMOIP.solution(i).to_string() << endl;
103 + // }
104 + // cout << endl;
105 +
106 + return EXIT_SUCCESS;
107 +}
1 +/*
2 + * $Id$
3 + *
4 + * Copyright (C) 2010 Kengo Sato
5 + *
6 + * doublehis file comes from IPknot.
7 + *
8 + */
9 +
10 +#ifndef __INC_DP_TABLE_H__
11 +#define __INC_DP_TABLE_H__
12 +
13 +class DPtable2
14 +{
15 + public:
16 + DPtable2() : V_(), N_(0) {}
17 + void resize(int n)
18 + {
19 + N_ = n;
20 + V_.resize(N_ * (N_ + 1) / 2 + (N_ + 1));
21 + }
22 + void fill(const double& v) { std::fill(V_.begin(), V_.end(), v); }
23 + double& operator()(int i, int j) { return V_[index(i, j)]; }
24 + const double& operator()(int i, int j) const { return V_[index(i, j)]; }
25 +
26 + private:
27 + int index(int i, int j) const
28 + {
29 + assert(j <= N_);
30 + return j == i - 1 ? N_ * (N_ + 1) / 2 + i : i * N_ + j - i * (1 + i) / 2;
31 + }
32 + std::vector<double> V_;
33 + int N_;
34 +};
35 +
36 +class DPtable4
37 +{
38 + public:
39 + DPtable4() : V_(), N_(0) {}
40 + void resize(int n)
41 + {
42 + N_ = n;
43 + V_.resize(N_ * (N_ - 1) * (N_ - 2) * (N_ - 3) / 2 / 3 / 4);
44 + }
45 + void fill(const double& v) { std::fill(V_.begin(), V_.end(), v); }
46 + double& operator()(int i, int d, int e, int j) { return V_[index(i, d, e, j)]; }
47 + const double& operator()(int i, int d, int e, int j) const { return V_[index(i, d, e, j)]; }
48 +
49 + private:
50 + int index(int h, int r, int m, int s) const
51 + {
52 + int n = N_;
53 + int h2 = h * h;
54 + int h3 = h2 * h;
55 + int h4 = h3 * h;
56 + int m2 = m * m;
57 + int n2 = n * n;
58 + int n3 = n2 * n;
59 + int r2 = r * r;
60 + int r3 = r2 * r;
61 + assert(h <= r);
62 + assert(r <= m);
63 + assert(m <= s);
64 + assert(s <= N_);
65 + return (h == r && m == s) ? V_.size() - 1 :
66 + (
67 + -24 - 50 * h - 35 * h2 - 10 * h3 - h4 - 36 * m - 12 * m2 + 12 * n + 70 * h * n +
68 + 30 * h2 * n + 4 * h3 * n + 24 * m * n - 12 * n2 - 30 * h * n2 - 6 * h2 * n2 + 4 * h * n3 +
69 + 44 * r - 48 * n * r + 12 * n2 * r + 24 * r2 - 12 * n * r2 + 4 * r3 + 24 * s) /
70 + 24;
71 + }
72 + std::vector<double> V_;
73 + int N_;
74 +};
75 +
76 +class DPtableX
77 +{
78 + public:
79 + DPtableX() : V_(), N_(0), D_(0) {}
80 + void resize(int d, int n)
81 + {
82 + N_ = n;
83 + D_ = d;
84 + int max_sz = 0;
85 + for (int i = d; i < d + 3; ++i) max_sz = std::max(max_sz, (N_ - i) * (i - 5) * (i - 1) * (i - 2) / 2);
86 + V_.resize(max_sz);
87 + }
88 + void fill(const double& v) { std::fill(V_.begin(), V_.end(), v); }
89 + double& operator()(int i, int d, int e, int s) { return V_[index(i, d, e, s)]; }
90 + const double& operator()(int i, int d, int e, int s) const { return V_[index(i, d, e, s)]; }
91 + void swap(DPtableX& x)
92 + {
93 + std::swap(V_, x.V_);
94 + std::swap(N_, x.N_);
95 + std::swap(D_, x.D_);
96 + }
97 +
98 + private:
99 + int index(int i, int h1, int m1, int s) const
100 + {
101 + int d = D_;
102 + int d1d2 = (d - 1) * (d - 2);
103 + int d5 = d - 5;
104 + int h1_i_1 = h1 - i - 1;
105 + assert(i + d < N_);
106 + assert(d - 6 >= s);
107 + assert(i < h1);
108 + return i * d5 * d1d2 / 2 + s * d1d2 / 2 + h1_i_1 * (d - 1) - h1_i_1 * (h1 - i) / 2 + m1 - h1 - 1;
109 + }
110 +
111 + std::vector<double> V_;
112 + int N_;
113 + int D_;
114 +};
115 +
116 +#endif
1 +/*
2 + * $Id$
3 + *
4 + * Copyright (C) 2008-2010 Kengo Sato
5 + *
6 + * This file comes from Ipknot.
7 + */
8 +
9 +
10 +#include "fa.h"
11 +#include <iostream>
12 +#include <fstream>
13 +#include <sstream>
14 +#include <string>
15 +#include <cctype>
16 +#include <cstring>
17 +#include <cassert>
18 +
19 +typedef unsigned int uint;
20 +
21 +//static
22 +unsigned int Fasta::load(std::list<Fasta>& data, const char* file){
23 +
24 + std::string line, name, seq, str;
25 + std::ifstream ifs(file);
26 + while (std::getline(ifs, line)) {
27 + if (line[0]=='>') { // header
28 + if (!name.empty()) {
29 + assert(str.size()==0 || seq.size()==str.size());
30 + data.push_back(Fasta(name, seq, str));
31 + }
32 +
33 + name=line.substr(1);
34 + seq.clear();
35 + str.clear();
36 + continue;
37 + }
38 +
39 + if (std::strchr("()[].?xle ", line[0])==NULL) { // seq
40 + uint i;
41 + for (i=0; i!=line.size(); ++i)
42 + if (!isalpha(line[i])) break;
43 + seq+=line.substr(0, i);
44 + } else {
45 + uint i;
46 + for (i=0; i!=line.size(); ++i)
47 + if (std::strchr("()[].?xle ", line[i])==NULL) break;
48 + str+=line.substr(0, i);
49 + }
50 + }
51 +
52 + if (!name.empty())
53 + data.push_back(Fasta(name, seq, str));
54 +
55 + return data.size();
56 +}
57 +
1 +/*
2 + * $Id$
3 + *
4 + * Copyright (C) 2008-2010 Kengo Sato
5 + *
6 + * This file comes from Ipknot.
7 +*/
8 +
9 +#include <list>
10 +#include <string>
11 +
12 +using std::list;
13 +using std::string;
14 +
15 +class Fasta
16 +{
17 + public:
18 + Fasta() : name_(), seq_(), str_() {}
19 + Fasta(const string& name, const string& seq, const string& str = "") : name_(name), seq_(seq), str_(str) {}
20 + Fasta(const Fasta& fa) : name_(fa.name_), seq_(fa.seq_), str_(fa.str_) {}
21 + Fasta& operator=(const Fasta& fa)
22 + {
23 + if (this != &fa) {
24 + name_ = fa.name_;
25 + seq_ = fa.seq_;
26 + str_ = fa.str_;
27 + }
28 + return *this;
29 + }
30 + const string& name() const { return name_; }
31 + const string& seq() const { return seq_; }
32 + string& seq() { return seq_; }
33 + const string& str() const { return str_; }
34 + unsigned int size() const { return seq_.size(); }
35 +
36 + static unsigned int load(list<Fasta>& data, const char* file);
37 +
38 + private:
39 + string name_;
40 + string seq_;
41 + string str_;
42 +};
This diff is collapsed. Click to expand it.
1 +/*
2 + * $Id$
3 + *
4 + * Copyright (C) 2010 Kengo Sato
5 + *
6 + * This file comes from IPknot.
7 + *
8 + */
9 +
10 +#include <boost/multi_array.hpp>
11 +#include <string>
12 +#include <vector>
13 +#include <iostream>
14 +
15 +using std::string;
16 +using std::vector;
17 +
18 +#define kB 0.00198717 // Boltzmann constant in kcal/mol/K
19 +#define ZERO_C_IN_KELVIN 273.15 // Zero degrees C in Kelvin
20 +#define AVOGADRO 6.022e23 // Avogadro's number
21 +
22 +typedef float energy_t;
23 +
24 +class DPtable2
25 +{
26 + public:
27 + DPtable2() : V_(), N_(0) {}
28 + void resize(int n)
29 + {
30 + N_ = n;
31 + V_.resize(N_ * (N_ + 1) / 2 + (N_ + 1));
32 + }
33 + void fill(const float& v) { std::fill(V_.begin(), V_.end(), v); }
34 + float& operator()(int i, int j) { return V_[index(i, j)]; }
35 + const float& operator()(int i, int j) const { return V_[index(i, j)]; }
36 +
37 + private:
38 + int index(int i, int j) const
39 + {
40 + assert(j <= N_);
41 + return j == i - 1 ? N_ * (N_ + 1) / 2 + i : i * N_ + j - i * (1 + i) / 2;
42 + }
43 + std::vector<float> V_;
44 + int N_;
45 +};
46 +
47 +class DPtable4
48 +{
49 + public:
50 + DPtable4() : V_(), N_(0) {}
51 + void resize(int n)
52 + {
53 + N_ = n;
54 + std::cout << V_.max_size() << " - " << N_ << " - " << sizeof(float) * static_cast<unsigned long>(N_) * (N_ - 1) * (N_ - 2) * (N_ - 3) / 2 / 3 / 4 << std::endl;
55 + V_.resize(static_cast<unsigned long>(N_) * (N_ - 1) * (N_ - 2) * (N_ - 3) / 2 / 3 / 4); // This number can be HUGE
56 + std::cout << "c'est toi qui bad_allocque ?" << std::endl;
57 + }
58 + void fill(const float& v) { std::fill(V_.begin(), V_.end(), v); }
59 + float& operator()(int i, int d, int e, int j) { return V_[index(i, d, e, j)]; }
60 + const float& operator()(int i, int d, int e, int j) const { return V_[index(i, d, e, j)]; }
61 +
62 + private:
63 + int index(int h, int r, int m, int s) const
64 + {
65 + int n = N_;
66 + int h2 = h * h;
67 + int h3 = h2 * h;
68 + int h4 = h3 * h;
69 + int m2 = m * m;
70 + int n2 = n * n;
71 + int n3 = n2 * n;
72 + int r2 = r * r;
73 + int r3 = r2 * r;
74 + assert(h <= r);
75 + assert(r <= m);
76 + assert(m <= s);
77 + assert(s <= N_);
78 + return (h == r && m == s) ? V_.size() - 1 :
79 + (
80 + -24 - 50 * h - 35 * h2 - 10 * h3 - h4 - 36 * m - 12 * m2 + 12 * n + 70 * h * n +
81 + 30 * h2 * n + 4 * h3 * n + 24 * m * n - 12 * n2 - 30 * h * n2 - 6 * h2 * n2 + 4 * h * n3 +
82 + 44 * r - 48 * n * r + 12 * n2 * r + 24 * r2 - 12 * n * r2 + 4 * r3 + 24 * s) /
83 + 24;
84 + }
85 + std::vector<float> V_;
86 + int N_;
87 +};
88 +
89 +class DPtableX
90 +{
91 + public:
92 + DPtableX() : V_(), N_(0), D_(0) {}
93 + void resize(int d, int n)
94 + {
95 + N_ = n;
96 + D_ = d;
97 + int max_sz = 0;
98 + for (int i = d; i < d + 3; ++i) max_sz = std::max(max_sz, (N_ - i) * (i - 5) * (i - 1) * (i - 2) / 2);
99 + V_.resize(max_sz);
100 + }
101 + void fill(const float& v) { std::fill(V_.begin(), V_.end(), v); }
102 + float& operator()(int i, int d, int e, int s) { return V_[index(i, d, e, s)]; }
103 + const float& operator()(int i, int d, int e, int s) const { return V_[index(i, d, e, s)]; }
104 + void swap(DPtableX& x)
105 + {
106 + std::swap(V_, x.V_);
107 + std::swap(N_, x.N_);
108 + std::swap(D_, x.D_);
109 + }
110 +
111 + private:
112 + int index(int i, int h1, int m1, int s) const
113 + {
114 + int d = D_;
115 + int d1d2 = (d - 1) * (d - 2);
116 + int d5 = d - 5;
117 + int h1_i_1 = h1 - i - 1;
118 + assert(i + d < N_);
119 + assert(d - 6 >= s);
120 + assert(i < h1);
121 + return i * d5 * d1d2 / 2 + s * d1d2 / 2 + h1_i_1 * (d - 1) - h1_i_1 * (h1 - i) / 2 + m1 - h1 - 1;
122 + }
123 +
124 + std::vector<float> V_;
125 + int N_;
126 + int D_;
127 +};
128 +
129 +class Nupack
130 +{
131 + public:
132 + Nupack();
133 + void load_sequence(const string& s);
134 + void load_parameters_fm363(const vector<float>& v);
135 + void load_default_parameters(/*int which*/);
136 + bool load_parameters(const char* filename);
137 + void dump_parameters(std::ostream& os) const;
138 + float calculate_partition_function();
139 + void calculate_posterior();
140 + void get_posterior(vector<float>& bp, vector<int>& offset) const;
141 + void get_posterior(vector<float>& bp1, vector<float>& bp2, vector<int>& offset) const;
142 +
143 + private:
144 + void fastiloops(int i, int j, DPtable4& Qg, DPtableX& Qx, DPtableX& Qx2);
145 + void fastiloops_pr(int i, int j, DPtable4& Qg, DPtableX& Qx, DPtableX& Qx2, DPtable4& Pg, DPtableX& Px, DPtableX& Px2);
146 + energy_t score_hairpin(int i, int j) const;
147 + energy_t score_loop(int l) const;
148 + energy_t score_interior(int i, int d, int e, int j, bool pk) const;
149 + energy_t score_interior_mismatch(int i, int j) const;
150 + energy_t score_interior_mismatch(int i, int j, int k, int l) const;
151 + energy_t score_interior_asymmetry(int l1, int l2) const;
152 + energy_t score_multiloop(bool pk) const;
153 + energy_t score_multiloop_paired(int n, bool pk) const;
154 + energy_t score_multiloop_unpaired(int n, bool pk) const;
155 + energy_t score_at_penalty(int i, int j) const;
156 + energy_t score_dangle(int i, int j) const;
157 + energy_t score_pk() const;
158 + energy_t score_pk_multiloop() const;
159 + energy_t score_pk_pk() const;
160 + energy_t score_pk_paired(int n) const;
161 + energy_t score_pk_unpaired(int n) const;
162 + energy_t score_pk_band(int n) const;
163 + int base(char x) const;
164 + bool allow_paired(int i, int j) const;
165 + bool wc_pair(int i, int j) const;
166 + int pair_type(int i, int j) const;
167 + int pair_type(int i) const;
168 +
169 + vector<int> base_map;
170 + boost::multi_array<int, 2> pair_map;
171 + vector<int> seq;
172 + int N;
173 + float RT;
174 + DPtable2 Q;
175 + DPtable2 Qb;
176 + DPtable2 Qm;
177 + DPtable2 Qp;
178 + DPtable2 Qz;
179 + DPtable4 Qg;
180 + DPtable4 Qgl;
181 + DPtable4 Qgr;
182 + DPtable4 Qgls;
183 + DPtable4 Qgrs;
184 + DPtable2 P;
185 + DPtable2 Pb;
186 + DPtable2 Pm;
187 + DPtable2 Pp;
188 + DPtable2 Pz;
189 + DPtable2 Pbg;
190 + DPtable4 Pg;
191 + DPtable4 Pgl;
192 + DPtable4 Pgr;
193 + DPtable4 Pgls;
194 + DPtable4 Pgrs;
195 +
196 + // energy parameters
197 + energy_t hairpin37[30];
198 + energy_t bulge37[30];
199 + energy_t interior37[30];
200 + energy_t stack37[6][6];
201 + energy_t int11_37[6][6][4][4];
202 + energy_t int21_37[6][4][4][6][4];
203 + energy_t int22_37[6][6][4][4][4][4];
204 + energy_t dangle3_37[6][4];
205 + energy_t dangle5_37[6][4];
206 + energy_t triloop37[4][4][4][4][4];
207 + energy_t tloop37[4][4][4][4][4][4];
208 + energy_t mismatch_hairpin37[4][4][6];
209 + energy_t mismatch_interior37[4][4][6];
210 + energy_t asymmetry_penalty[4];
211 + energy_t polyC_penalty, polyC_slope, polyC_int;
212 + energy_t at_penalty;
213 + energy_t multiloop_penalty; // alpha1
214 + energy_t multiloop_paired_penalty; // alpha2
215 + energy_t multiloop_unpaired_penalty; // alpha3
216 + energy_t pk_penalty; // beta1
217 + energy_t pk_multiloop_penalty; // beta1m
218 + energy_t pk_pk_penalty; // beta1p
219 + energy_t pk_paired_penalty; // beta2
220 + energy_t pk_unpaired_penalty; // beta3
221 + energy_t pk_band_penalty;
222 + energy_t pk_stack_span;
223 + energy_t pk_interior_span;
224 + energy_t multiloop_penalty_pk;
225 + energy_t multiloop_paired_penalty_pk;
226 + energy_t multiloop_unpaired_penalty_pk;
227 + energy_t max_asymmetry;
228 + energy_t salt_correction;
229 + energy_t loop_greater30;
230 + energy_t hairpin_GGG;
231 + float intermolecular_initiation;
232 +};
1 +#include "rna.h"
2 +#include "nupack.h"
3 +#include <iostream>
4 +#include <string>
5 +#include <utility>
6 +#include <vector>
7 +
8 +using std::cout, std::cerr, std::endl;
9 +
10 +RNA::RNA(string name, string seq)
11 +{
12 + if (!check_seq(seq)) {
13 + cerr << "Unknown chars in input sequence. Please restrict to ACGTU." << endl;
14 + exit(EXIT_FAILURE);
15 + }
16 + name_ = name;
17 + seq_ = seq;
18 + format();
19 + n_ = seq_.size();
20 + cout << "\t>formatted sequence" << endl;
21 +
22 + /*define type_*/
23 + type_ = vector<vector<int>>(n_, vector<int>(n_));
24 + for (int i = 0; i < n_; i++) {
25 + for (int j = 0; j < n_; j++) {
26 + if (i < j) {
27 + std::stringstream ss;
28 + ss << seq_[i] << seq_[j];
29 + string str = ss.str();
30 + if (str.compare("AU") == 0) {
31 + type_[i][j] = 1;
32 + } else if (str.compare("CG") == 0) {
33 + type_[i][j] = 2;
34 +
35 + } else if (str.compare("GC") == 0) {
36 + type_[i][j] = 3;
37 + } else if (str.compare("GU") == 0) {
38 + type_[i][j] = 4;
39 + } else if (str.compare("UG") == 0) {
40 + type_[i][j] = 5;
41 + } else if (str.compare("UA") == 0) {
42 + type_[i][j] = 6;
43 + } else {
44 + type_[i][j] = 0;
45 + }
46 + } else {
47 + type_[i][j] = 0;
48 + }
49 + }
50 + }
51 + nBP_ = type_.size();
52 +
53 + /*define coord_*/
54 + for (int i = 0; i < n_; i++) {
55 + for (int j = 0; j < n_; j++) {
56 + if (i < j and type_[i][j] > 0) {
57 + if (i != 0 and i != n_ and j != 0 and j != n_) {
58 + if (type_[i - 1][j + 1] > 0 or type_[i + 1][j - 1] > 0) {
59 + coord_.push_back(std::make_pair(i, j));
60 + }
61 + } else if (i == 0 or j == n_) {
62 + if (type_[i + 1][j - 1] > 0) {
63 + coord_.push_back(std::make_pair(i, j));
64 + }
65 + }
66 + }
67 + }
68 + }
69 +
70 +
71 + /*define pij_*/
72 + vector<float> bp;
73 + vector<int> offset;
74 + Nupack nu;
75 + // nu.load_parameters("rna1999.dG");
76 + nu.load_default_parameters();
77 + cout << "\t>default parameters loaded (Serra and Turner, 1995)" << endl;
78 + nu.load_sequence(seq_);
79 + cout << "\t>computing pairing probabilities..." << endl;
80 + try {
81 + nu.calculate_partition_function();
82 + } catch (std::exception& e) {
83 + cerr << e.what() << endl;
84 + exit(EXIT_FAILURE);
85 + }
86 + nu.calculate_posterior();
87 + nu.get_posterior(bp, offset);
88 +
89 + pij_ = vector<vector<float>>(n_, vector<float>(n_));
90 + for (int i = 1; i <= n_; i++) {
91 + for (int j = 1; j <= n_; j++) {
92 + pij_[i - 1][j - 1] = bp[offset[i] + j];
93 + }
94 + }
95 + cout << "\t>pairing probabilities defined" << endl;
96 +}
97 +
98 +int RNA::find_coord(pair<int, int> p)
99 +{
100 + vector<pair<int, int>>::iterator it = find(coord_.begin(), coord_.end(), p);
101 + int r = -1;
102 + if (it != coord_.end()) r = distance(coord_.begin(), it);
103 + return r;
104 +}
105 +
106 +bool RNA::check_seq(string seq) // Checks if the sequences only contains ACGUT.
107 +{
108 + bool res = true;
109 + for (unsigned int i = 0; i < seq.size(); i++) {
110 + if (seq[i] != 'A' and seq[i] != 'U' and seq[i] != 'C' and seq[i] != 'G' and seq[i] != 'T') {
111 + res = false;
112 + break;
113 + }
114 + }
115 + return res;
116 +}
117 +
118 +void RNA::format()
119 +{
120 + for (unsigned int i = 0; i < seq_.size(); i++) {
121 + seq_[i] = toupper(seq_[i]);
122 + if (seq_[i] == 'T') {
123 + seq_[i] = 'U';
124 + break;
125 + }
126 + }
127 +}
1 +
2 +#ifndef DEF_RNA
3 +#define DEF_RNA
4 +
5 +#include <map>
6 +#include <string>
7 +#include <vector>
8 +
9 +using std::pair, std::string, std::vector, std::map;
10 +
11 +typedef struct Comp_ {
12 + pair<uint, uint> pos;
13 + int score;
14 + size_t k;
15 + Comp_(pair<int, int> p, int s) : pos(p), score(s) { k = 1 + pos.second - pos.first; }
16 +} Component;
17 +typedef struct {
18 + string atlas_id;
19 + vector<Component> comp;
20 + bool reversed;
21 +} Motif;
22 +
23 +class RNA
24 +{
25 + public:
26 + RNA();
27 + RNA(string name, string seq);
28 +
29 + int get_n_();
30 + string get_name_();
31 + string get_seq_();
32 + vector<vector<int>> get_type_();
33 + int get_type(int i, int j);
34 + int get_type(int i);
35 + vector<pair<int, int>> get_coord_();
36 + pair<int, int> get_coord(int i);
37 + int get_coordF(int i);
38 + int get_coordS(int i);
39 + int find_coord(pair<int, int>);
40 + vector<vector<float>> get_pij_();
41 + float get_pij(int i, int j);
42 + float get_pij(int i);
43 + int get_err_();
44 + uint get_RNA_length() const;
45 +
46 + bool check_seq(string seq);
47 + void format();
48 +
49 + private:
50 + string name_; /*name of the rna*/
51 + string seq_; /*sequence of the rna*/
52 + int n_; /*length of the rna*/
53 + vector<vector<int>> type_; /*vector of base pair types*/
54 + vector<pair<int, int>> coord_; /*vector of base pair coordinates*/
55 + vector<vector<float>> pij_; /*vector of probabilities*/
56 + uint nBP_; /*number of possible base pair*/
57 +};
58 +
59 +inline int RNA::get_n_() { return n_; }
60 +inline string RNA::get_name_() { return name_; }
61 +inline string RNA::get_seq_() { return seq_; }
62 +inline vector<vector<int>> RNA::get_type_() { return type_; }
63 +inline int RNA::get_type(int i, int j) { return type_[i][j]; }
64 +inline int RNA::get_type(int i) { return type_[get_coord(i).first][get_coord(i).second]; }
65 +inline vector<pair<int, int>> RNA::get_coord_() { return coord_; }
66 +inline pair<int, int> RNA::get_coord(int i) { return coord_[i]; }
67 +inline int RNA::get_coordF(int i) { return coord_[i].first; }
68 +inline int RNA::get_coordS(int i) { return coord_[i].second; }
69 +inline vector<vector<float>> RNA::get_pij_() { return pij_; }
70 +inline float RNA::get_pij(int i, int j) { return pij_[i][j]; }
71 +inline float RNA::get_pij(int i) { return pij_[get_coord(i).first][get_coord(i).second]; }
72 +inline uint RNA::get_RNA_length() const { return nBP_; }
73 +
74 +#endif
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
1 +<?xml version="1.0"?>
2 +<!DOCTYPE ipe SYSTEM "ipe.dtd">
3 +<ipe version="70206" creator="Ipe 7.2.7">
4 +<info created="D:20181012182334" modified="D:20181012182334"/>
5 +<ipestyle name="basic">
6 +<symbol name="arrow/arc(spx)">
7 +<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen">
8 +0 0 m
9 +-1 0.333 l
10 +-1 -0.333 l
11 +h
12 +</path>
13 +</symbol>
14 +<symbol name="arrow/farc(spx)">
15 +<path stroke="sym-stroke" fill="white" pen="sym-pen">
16 +0 0 m
17 +-1 0.333 l
18 +-1 -0.333 l
19 +h
20 +</path>
21 +</symbol>
22 +<symbol name="arrow/ptarc(spx)">
23 +<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen">
24 +0 0 m
25 +-1 0.333 l
26 +-0.8 0 l
27 +-1 -0.333 l
28 +h
29 +</path>
30 +</symbol>
31 +<symbol name="arrow/fptarc(spx)">
32 +<path stroke="sym-stroke" fill="white" pen="sym-pen">
33 +0 0 m
34 +-1 0.333 l
35 +-0.8 0 l
36 +-1 -0.333 l
37 +h
38 +</path>
39 +</symbol>
40 +<symbol name="mark/circle(sx)" transformations="translations">
41 +<path fill="sym-stroke">
42 +0.6 0 0 0.6 0 0 e
43 +0.4 0 0 0.4 0 0 e
44 +</path>
45 +</symbol>
46 +<symbol name="mark/disk(sx)" transformations="translations">
47 +<path fill="sym-stroke">
48 +0.6 0 0 0.6 0 0 e
49 +</path>
50 +</symbol>
51 +<symbol name="mark/fdisk(sfx)" transformations="translations">
52 +<group>
53 +<path fill="sym-fill">
54 +0.5 0 0 0.5 0 0 e
55 +</path>
56 +<path fill="sym-stroke" fillrule="eofill">
57 +0.6 0 0 0.6 0 0 e
58 +0.4 0 0 0.4 0 0 e
59 +</path>
60 +</group>
61 +</symbol>
62 +<symbol name="mark/box(sx)" transformations="translations">
63 +<path fill="sym-stroke" fillrule="eofill">
64 +-0.6 -0.6 m
65 +0.6 -0.6 l
66 +0.6 0.6 l
67 +-0.6 0.6 l
68 +h
69 +-0.4 -0.4 m
70 +0.4 -0.4 l
71 +0.4 0.4 l
72 +-0.4 0.4 l
73 +h
74 +</path>
75 +</symbol>
76 +<symbol name="mark/square(sx)" transformations="translations">
77 +<path fill="sym-stroke">
78 +-0.6 -0.6 m
79 +0.6 -0.6 l
80 +0.6 0.6 l
81 +-0.6 0.6 l
82 +h
83 +</path>
84 +</symbol>
85 +<symbol name="mark/fsquare(sfx)" transformations="translations">
86 +<group>
87 +<path fill="sym-fill">
88 +-0.5 -0.5 m
89 +0.5 -0.5 l
90 +0.5 0.5 l
91 +-0.5 0.5 l
92 +h
93 +</path>
94 +<path fill="sym-stroke" fillrule="eofill">
95 +-0.6 -0.6 m
96 +0.6 -0.6 l
97 +0.6 0.6 l
98 +-0.6 0.6 l
99 +h
100 +-0.4 -0.4 m
101 +0.4 -0.4 l
102 +0.4 0.4 l
103 +-0.4 0.4 l
104 +h
105 +</path>
106 +</group>
107 +</symbol>
108 +<symbol name="mark/cross(sx)" transformations="translations">
109 +<group>
110 +<path fill="sym-stroke">
111 +-0.43 -0.57 m
112 +0.57 0.43 l
113 +0.43 0.57 l
114 +-0.57 -0.43 l
115 +h
116 +</path>
117 +<path fill="sym-stroke">
118 +-0.43 0.57 m
119 +0.57 -0.43 l
120 +0.43 -0.57 l
121 +-0.57 0.43 l
122 +h
123 +</path>
124 +</group>
125 +</symbol>
126 +<symbol name="arrow/fnormal(spx)">
127 +<path stroke="sym-stroke" fill="white" pen="sym-pen">
128 +0 0 m
129 +-1 0.333 l
130 +-1 -0.333 l
131 +h
132 +</path>
133 +</symbol>
134 +<symbol name="arrow/pointed(spx)">
135 +<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen">
136 +0 0 m
137 +-1 0.333 l
138 +-0.8 0 l
139 +-1 -0.333 l
140 +h
141 +</path>
142 +</symbol>
143 +<symbol name="arrow/fpointed(spx)">
144 +<path stroke="sym-stroke" fill="white" pen="sym-pen">
145 +0 0 m
146 +-1 0.333 l
147 +-0.8 0 l
148 +-1 -0.333 l
149 +h
150 +</path>
151 +</symbol>
152 +<symbol name="arrow/linear(spx)">
153 +<path stroke="sym-stroke" pen="sym-pen">
154 +-1 0.333 m
155 +0 0 l
156 +-1 -0.333 l
157 +</path>
158 +</symbol>
159 +<symbol name="arrow/fdouble(spx)">
160 +<path stroke="sym-stroke" fill="white" pen="sym-pen">
161 +0 0 m
162 +-1 0.333 l
163 +-1 -0.333 l
164 +h
165 +-1 0 m
166 +-2 0.333 l
167 +-2 -0.333 l
168 +h
169 +</path>
170 +</symbol>
171 +<symbol name="arrow/double(spx)">
172 +<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen">
173 +0 0 m
174 +-1 0.333 l
175 +-1 -0.333 l
176 +h
177 +-1 0 m
178 +-2 0.333 l
179 +-2 -0.333 l
180 +h
181 +</path>
182 +</symbol>
183 +<pen name="heavier" value="0.8"/>
184 +<pen name="fat" value="1.2"/>
185 +<pen name="ultrafat" value="2"/>
186 +<symbolsize name="large" value="5"/>
187 +<symbolsize name="small" value="2"/>
188 +<symbolsize name="tiny" value="1.1"/>
189 +<arrowsize name="large" value="10"/>
190 +<arrowsize name="small" value="5"/>
191 +<arrowsize name="tiny" value="3"/>
192 +<color name="red" value="1 0 0"/>
193 +<color name="green" value="0 1 0"/>
194 +<color name="blue" value="0 0 1"/>
195 +<color name="yellow" value="1 1 0"/>
196 +<color name="orange" value="1 0.647 0"/>
197 +<color name="gold" value="1 0.843 0"/>
198 +<color name="purple" value="0.627 0.125 0.941"/>
199 +<color name="gray" value="0.745"/>
200 +<color name="brown" value="0.647 0.165 0.165"/>
201 +<color name="navy" value="0 0 0.502"/>
202 +<color name="pink" value="1 0.753 0.796"/>
203 +<color name="seagreen" value="0.18 0.545 0.341"/>
204 +<color name="turquoise" value="0.251 0.878 0.816"/>
205 +<color name="violet" value="0.933 0.51 0.933"/>
206 +<color name="darkblue" value="0 0 0.545"/>
207 +<color name="darkcyan" value="0 0.545 0.545"/>
208 +<color name="darkgray" value="0.663"/>
209 +<color name="darkgreen" value="0 0.392 0"/>
210 +<color name="darkmagenta" value="0.545 0 0.545"/>
211 +<color name="darkorange" value="1 0.549 0"/>
212 +<color name="darkred" value="0.545 0 0"/>
213 +<color name="lightblue" value="0.678 0.847 0.902"/>
214 +<color name="lightcyan" value="0.878 1 1"/>
215 +<color name="lightgray" value="0.827"/>
216 +<color name="lightgreen" value="0.565 0.933 0.565"/>
217 +<color name="lightyellow" value="1 1 0.878"/>
218 +<dashstyle name="dashed" value="[4] 0"/>
219 +<dashstyle name="dotted" value="[1 3] 0"/>
220 +<dashstyle name="dash dotted" value="[4 2 1 2] 0"/>
221 +<dashstyle name="dash dot dotted" value="[4 2 1 2 1 2] 0"/>
222 +<textsize name="large" value="\large"/>
223 +<textsize name="Large" value="\Large"/>
224 +<textsize name="LARGE" value="\LARGE"/>
225 +<textsize name="huge" value="\huge"/>
226 +<textsize name="Huge" value="\Huge"/>
227 +<textsize name="small" value="\small"/>
228 +<textsize name="footnote" value="\footnotesize"/>
229 +<textsize name="tiny" value="\tiny"/>
230 +<textstyle name="center" begin="\begin{center}" end="\end{center}"/>
231 +<textstyle name="itemize" begin="\begin{itemize}" end="\end{itemize}"/>
232 +<textstyle name="item" begin="\begin{itemize}\item{}" end="\end{itemize}"/>
233 +<gridsize name="4 pts" value="4"/>
234 +<gridsize name="8 pts (~3 mm)" value="8"/>
235 +<gridsize name="16 pts (~6 mm)" value="16"/>
236 +<gridsize name="32 pts (~12 mm)" value="32"/>
237 +<gridsize name="10 pts (~3.5 mm)" value="10"/>
238 +<gridsize name="20 pts (~7 mm)" value="20"/>
239 +<gridsize name="14 pts (~5 mm)" value="14"/>
240 +<gridsize name="28 pts (~10 mm)" value="28"/>
241 +<gridsize name="56 pts (~20 mm)" value="56"/>
242 +<anglesize name="90 deg" value="90"/>
243 +<anglesize name="60 deg" value="60"/>
244 +<anglesize name="45 deg" value="45"/>
245 +<anglesize name="30 deg" value="30"/>
246 +<anglesize name="22.5 deg" value="22.5"/>
247 +<opacity name="10%" value="0.1"/>
248 +<opacity name="30%" value="0.3"/>
249 +<opacity name="50%" value="0.5"/>
250 +<opacity name="75%" value="0.75"/>
251 +<tiling name="falling" angle="-60" step="4" width="1"/>
252 +<tiling name="rising" angle="30" step="4" width="1"/>
253 +</ipestyle>
254 +<page>
255 +<layer name="alpha"/>
256 +<view layers="alpha" active="alpha"/>
257 +<path layer="alpha" matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat">
258 +64 704 m
259 +320 704 l
260 +</path>
261 +<path matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat">
262 +96 704 m
263 +32.249 0 0 -32.249 128 708 160 704 a
264 +</path>
265 +<path matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat">
266 +208 704 m
267 +25.2982 0 0 -25.2982 232 696 256 704 a
268 +</path>
269 +<path matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat">
270 +128 704 m
271 +77.746 0 0 -77.746 200 674.667 272 704 a
272 +</path>
273 +<path matrix="1 0 0 1 256 64" stroke="black" pen="ultrafat">
274 +64 704 m
275 +320 704 l
276 +</path>
277 +<path matrix="1 0 0 1 256 64" stroke="black" pen="ultrafat">
278 +208 704 m
279 +25.2982 0 0 -25.2982 232 696 256 704 a
280 +</path>
281 +<path matrix="1 0 0 1 256 64" stroke="black" pen="ultrafat">
282 +128 704 m
283 +77.746 0 0 -77.746 200 674.667 272 704 a
284 +</path>
285 +<path matrix="1 0 0 1 256 -96" stroke="black" pen="ultrafat">
286 +64 704 m
287 +320 704 l
288 +</path>
289 +<path matrix="1 0 0 1 256 -96" stroke="black" pen="ultrafat">
290 +96 704 m
291 +32.249 0 0 -32.249 128 708 160 704 a
292 +</path>
293 +<text matrix="1 0 0 1 0 -16" transformations="translations" pos="64 688" stroke="black" type="label" width="202.242" height="17.213" depth="4.82" valign="baseline" size="Huge">structure $y$ with PK</text>
294 +<path stroke="black" pen="ultrafat" arrow="normal/normal">
295 +256 736 m
296 +304 752 l
297 +</path>
298 +<path stroke="black" pen="ultrafat" arrow="normal/normal">
299 +288 672 m
300 +320 640 l
301 +</path>
302 +<text matrix="1 0 0 1 32 -16" transformations="translations" pos="384 752" stroke="black" type="label" width="60.952" height="16.741" depth="4.02" valign="baseline" size="huge">level $y^1$</text>
303 +<text transformations="translations" pos="416 576" stroke="black" type="label" width="60.952" height="16.741" depth="4.02" valign="baseline" size="huge">level $y^2$</text>
304 +<text transformations="translations" pos="432 672" stroke="black" type="label" width="17.843" height="13.97" depth="1.57" valign="baseline" size="Huge">+</text>
305 +</page>
306 +</ipe>
No preview for this file type
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
1 -#ifndef COMPONENT_H
2 -#define COMPONENT_H
3 -
4 -#include <string>
5 -
6 -class Component
7 -{
8 - public:
9 - Component();
10 - Component(std::string& cons_seq, uint k);
11 - private:
12 - std::string _cons_seq;
13 - uint k;
14 -};
15 -
16 -
17 -istream& operator>>(istream& is, Component& m);
18 -ostream& operator<<(ostream& os, const Component& m);
19 -
20 -#endif
...\ No newline at end of file ...\ No newline at end of file
1 -#include "Motif.h"
...\ No newline at end of file ...\ No newline at end of file
1 -#ifndef MOTIF_H
2 -#define MOTIF_H
3 -
4 -#include <vector>
5 -#include <string>
6 -#include "Component.h"
7 -
8 -class Motif
9 -{
10 - public:
11 - Motif();
12 - Motif(std::string filename);
13 - ~Motif();
14 - std::vector<Component>& getComponents() const;
15 - Component& getComponent(uint k) const;
16 - std::string name;
17 -
18 - private:
19 - std::vector<Component> _comps;
20 -
21 -};
22 -
23 -istream& operator>>(istream& is, Motif& m);
24 -ostream& operator<<(ostream& os, const Motif& m);
25 -
26 -#endif
...\ No newline at end of file ...\ No newline at end of file
1 -#include "RNA.h"
2 -
3 -RNA::RNA() {}
4 -
5 -RNA::~RNA() {}
6 -
7 -RNA::RNA(std::string seq) : _seq(seq) { }
1 -#ifndef RNA_H
2 -#define RNA_H
3 -
4 -#include <string>
5 -
6 -class RNA {
7 - public:
8 - RNA();
9 - RNA(std::string);
10 - ~RNA();
11 - uint length() const;
12 - std::string str() const;
13 - // char seq(uint k) const;
14 - // bool isConsensus() const;
15 - // bool containsPseudoBases() const;
16 - private:
17 - std::string _seq;
18 - // bool _coding;
19 - // bool _consensus;
20 - // bool _containsPseudoNTs;
21 -};
22 -
23 -inline uint RNA::length() const { return _seq.length(); }
24 -inline std::string RNA::str() const { return _seq; }
25 -
26 -#endif
...\ No newline at end of file ...\ No newline at end of file
1 -#include "Motif.h"
2 -#include <cstdlib>
3 -#include <iostream>
4 -#include <unistd.h>
5 -#include <boost/program_options.hpp>
6 -#include <boost/filesystem.hpp>
7 -
8 -#include "RNA.h"
9 -
10 -using namespace std;
11 -namespace bpo = boost::program_options;
12 -namespace bf = boost::filesystem;
13 -
14 -bool checkMotifFolder(bf::path & folder)
15 -{
16 - if (not(bf::is_directory(folder) and bf::exists(folder))) return false;
17 - bf::directory_iterator end_itr;
18 - uint Ndesc = 0;
19 - for (bf::directory_iterator itr(folder); itr != end_itr; ++itr)
20 - {
21 - if (itr->path().leaf().string().find(string(".desc")) != std::string::npos)
22 - Ndesc++;
23 - }
24 - cout << "Found " << Ndesc << " .desc files in " << folder.string() << endl;
25 - if (Ndesc > 0)
26 - return true;
27 - return false;
28 -}
29 -
30 -int parseMotifs(bf::path & folder)
31 -{
32 - bf::directory_iterator end_itr;
33 - for (bf::directory_iterator itr(folder); itr != end_itr; ++itr)
34 - {
35 - if (itr->path().leaf().string().find(string(".desc")) != std::string::npos)
36 - {
37 -
38 - }
39 - }
40 -
41 -}
42 -
43 -int main(int argc, char** argv)
44 -{
45 - bf::path currentFolder(bf::current_path());
46 - bf::path motifFolderPath(currentFolder); // By default, searching here...
47 - bpo::options_description desc("Options");
48 - bpo::variables_map options_map;
49 - RNA querySequence;
50 -
51 - desc.add_options()
52 - ("help", "Print this help message")
53 - ("seq,s", bpo::value<string>()->required(), "RNA sequence to find motives in.")
54 - ("motifs,m", bpo::value<bf::path>(), "Path to folder of DESC files describing the motifs to be used")
55 - ;
56 - bpo::store(bpo::parse_command_line(argc, argv, desc), options_map);
57 - bpo::notify(options_map);
58 -
59 - if (options_map.count("help")) {
60 - cout << desc << endl;
61 - return EXIT_SUCCESS;
62 - }
63 - if (options_map.count("motifs")) {
64 - motifFolderPath = options_map["motifs"].as<bf::path>();
65 - if (not(checkMotifFolder(motifFolderPath))) { return EXIT_FAILURE; }
66 - }
67 - if (options_map.count("seq")) {
68 - querySequence = RNA(options_map["seq"].as<string>());
69 - }
70 - cout << "Working with sequence " << querySequence.str() << " (length " << querySequence.length() << ")." << endl;
71 -
72 - return EXIT_SUCCESS;
73 -}