Louis BECQUEY

Monoobjective resolution

# Prerequisites
*.d
# LaTeX temporary files
doc/*.toc
doc/*.bbl
doc/*.gz
# Compiled Object files
*.slo
*.lo
......
# Please set the following variables to the correct paths for JAR3D:
jar3dexec="/home/persalteas/Software/jar3dbin/jar3d_2014-12-11.jar"
ILmotifDir="/home/persalteas/Data/RNA/motifs/Matlab_results/IL/3.2/lib"
HLmotifDir="/home/persalteas/Data/RNA/motifs/Matlab_results/HL/3.2/lib"
\ No newline at end of file
# ------------------------------------------------
# Generic Makefile
# ------------------------------------------------
ICONCERT=/opt/ibm/ILOG/CPLEX_Studio_Community128/concert/include
ICPLEX=/opt/ibm/ILOG/CPLEX_Studio_Community128/cplex/include
LCONCERT=/opt/ibm/ILOG/CPLEX_Studio_Community128/concert/lib/x86-64_linux/static_pic/
LCPLEX=/opt/ibm/ILOG/CPLEX_Studio_Community128/cplex/lib/x86-64_linux/static_pic/
# project name (generate executable with this name)
TARGET = motifscan
TARGET = biominserter
CC = g++
# compiling flags here
CFLAGS = -I. -O3
CXXFLAGS = -std=c++17 -Wall -Wpedantic -Wextra
CFLAGS = -Icppsrc/ -I$(ICONCERT) -I$(ICPLEX) -O3
CXXFLAGS = -std=c++17 -Wall -Wpedantic -Wextra -Wno-ignored-attributes
LINKER = g++
# linking flags here
LDFLAGS = -lboost_system -lboost_filesystem -lboost_program_options
LDFLAGS = -lconcert -lilocplex -lcplex -lm -lpthread -ldl -lboost_system -lboost_filesystem -lboost_program_options -L$(LCONCERT) -L$(LCPLEX)
# change these to proper directories where each file should be
SRCDIR = src
SRCDIR = cppsrc
OBJDIR = obj
BINDIR = bin
......
# RNA_Motifs_Inserter
A C++ software interface to RNA motif databases, and algorithms to include known motifs in raw sequences.
This is a bi-objective integer programming algorithm.
It predicts the secondary structure of a RNA sequence with pieces of 3D information (non-canonical contacts) at some places,
by identifying zones that can fold like known motifs.
1/ How it works
===================================
INPUT:
- An RNA sequence (tested with sequences ~100 bases)
THEN
- Identifies possible 2D folds with RNAsubopt.
- Knowing possible 2D folds, locate every possibly unpaired loop (hairpin loop, internal loop, multiple junction...)
- align each unpaired loop to the catalogue of models of known RNA motifs (The 3D Motif Atlas of the BGSU RNA group)
- retrieve a list of potential motif-insertion-sites in the RNA sequence. Use them to define the constraints for the IP problem.
- Solve a bi-objective IP problem:
* Maximize the expected accuracy of the secondary structure,
* Maximize the number and size of motifs inserted in the structure.
OUTPUT:
- A set of secondary structures from the pareto front,
- The list of known motif inserted in the corresponding structures (and the non-canonical contacts)
- (Optionally, lower score structures from k-Pareto sets.)
2/ Installation
==================================
- Download and install RNAsubopt from the ViennaRNA package (https://www.tbi.univie.ac.at/RNA/)
- Download and install IBM ILOG Cplex optimization studio (https://www.ibm.com/analytics/cplex-optimizer), free academic account required
- Download and install Java runtime (Tested with Java 10)
- Download and install the latest JAR3D executable "jar3d_releasedate.jar" and motif models in this folder (http://rna.bgsu.edu/data/jar3d/models/)
Note that for HL and ILs, only the latest version is required (not all the versions provided in the folders).
- Download and install a C++ compiler and building dependencies and utilities (g++ or clang, automake, libboost)
......
#include "MOIP.h"
#include "SecondaryStructure.h"
#include "rna.h"
#include <algorithm>
#include <boost/format.hpp>
#include <cfloat>
#include <cmath>
#include <cstdlib>
#include <iostream>
#include <limits>
#include <stdexcept>
#include <utility>
#include <vector>
using std::cerr, std::cout, std::endl;
using std::make_pair;
using std::vector;
uint MOIP::ncores = 0;
MOIP::MOIP(const RNA& rna, const vector<Motif>& insertionSites)
: rna_(rna), insertion_sites_(insertionSites), beta_(1.0), theta_{1.0 / (2.0 + 1.0)}
{
basepair_dv_ = IloNumVarArray(env_);
insertion_dv_ = IloNumVarArray(env_);
// Add the y^u_v decision variables
uint u, v, c = 0;
index_of_yuv_ = vector<vector<size_t>>(rna_.get_RNA_length() - 6, vector<size_t>(0));
for (u = 0; u < rna_.get_RNA_length() - 6; u++) {
for (v = u + 4; v < rna_.get_RNA_length(); v++) // A basepair is possible iff v > u+3
{
index_of_yuv_[u].push_back(c);
c++;
char name[15];
sprintf(name, "y%d,%d", u, v);
basepair_dv_.add(IloNumVar(env_, 0, 1, IloNumVar::Bool, name)); // A boolean whether u and v are paired
}
}
// Add the Cx,i,p decision variables
index_of_first_components.reserve(insertionSites.size());
index_of_Cxip_ = vector<vector<size_t>>(0);
index_of_Cxip_.reserve(insertionSites.size());
size_t i = 0;
for (const Motif m : insertionSites) {
index_of_first_components.push_back(i);
index_of_Cxip_.push_back(vector<size_t>(0));
for (const Component c : m.comp) {
index_of_Cxip_.back().push_back(i);
if (c.k > 0) i++;
char name[20];
sprintf(
name,
"C%d,%d-%d",
static_cast<int>(index_of_Cxip_.size() - 1),
static_cast<int>(index_of_Cxip_.back().size() - 1),
c.pos.first);
insertion_dv_.add(IloNumVar(env_, 0, 1, IloNumVar::Bool, name)); // A boolean whether component i of motif x is inserted at position p
}
}
cout << i + c << " decision variables are used !" << endl;
}
MOIP::~MOIP() { env_.end(); }
bool MOIP::is_undominated_yet(const SecondaryStructure& s)
{
for (uint i = 0; i < pareto_.size(); i++) {
if (pareto_[i] > s) return false;
}
return true;
}
void MOIP::solve_objective(int o, double min, double max)
{
IloModel model_ = IloModel(env_);
cout << "Solving objective function " << o << "..." << endl;
add_problem_constraints(model_);
switch (o) {
case 1: {
// Add the motif objective function
IloExpr obj1 = IloExpr(env_);
for (uint i = 0; i < insertion_sites_.size(); i++) {
IloNum n_compo_squared = IloNum(insertion_sites_[i].comp.size() * insertion_sites_[i].comp.size());
obj1 += n_compo_squared * insertion_dv_[index_of_first_components[i]];
}
model_.add(IloMinimize(env_, obj1));
} break;
case 2: {
// Add the likelihood objective function
}
}
IloCplex cplex_ = IloCplex(model_);
if (!cplex_.solve()) {
env_.error() << "\t>Failed to optimize LP." << endl;
throw(-1);
}
IloNumArray basepair_values(env_);
IloNumArray insertion_values(env_);
env_.out() << endl << "Solution status = " << cplex_.getStatus() << endl;
env_.out() << endl << "Objective value = " << cplex_.getObjValue() << endl;
cplex_.getValues(basepair_values, basepair_dv_);
env_.out() << endl << "Basepair Values = " << basepair_values << endl;
cplex_.getValues(insertion_values, basepair_dv_);
env_.out() << endl << "Insertion Values = " << insertion_values << endl;
// TODO : retrieve the secondary structure !!
}
void MOIP::add_problem_constraints(const IloModel& model_)
{
// ensure there only is 0 or 1 pairing by nucleotide:
cout << "\t>ensuring there are 0 or 1 pairing by nucleotide..." << endl;
uint u, v;
uint n = rna_.get_RNA_length();
for (u = 0; u < n - 6; u++) {
IloExpr c1(env_);
for (v = 0; v < u; v++)
if (allowed_basepair(v, u)) c1 += y(v, u);
for (v = u + 4; v < n; v++)
if (allowed_basepair(u, v)) c1 += y(u, v);
model_.add(c1 <= 1);
// cout << "\t>It worked for base " << u << " : " << (c1 <= 1) << endl;
}
// forbid lonely basepairs
cout << "\t>forbidding lonely basepairs..." << endl;
for (u = 0; u < n - 6; u++) {
IloExpr c2(env_); // for the case where s[u] is paired to s[v], v>u
c2 += 1;
for (v = u; v < n; v++)
if (allowed_basepair(u - 1, v)) c2 += y(u - 1, v);
for (v = u + 1; v < n; v++)
if (allowed_basepair(u, v)) c2 -= y(u, v);
for (v = u + 2; v < n; v++)
if (allowed_basepair(u + 1, v)) c2 += y(u + 1, v);
model_.add(c2 >= 1);
// cout << "\t>It worked for base " << u << " : " << (c2 >= 1) << endl;
}
for (v = 5; v < n; v++) {
IloExpr c2p(env_); // for the case where s[u] is paired to s[v], v<u
c2p += 1;
for (u = 1; u <= v - 2; u++)
if (allowed_basepair(u, v - 1)) c2p += y(u, v - 1);
for (u = 1; u <= v - 1; u++)
if (allowed_basepair(u, v)) c2p -= y(u, v);
for (u = 1; u <= v; u++)
if (allowed_basepair(u, v + 1)) c2p += y(u, v + 1);
model_.add(c2p >= 1);
// cout << "\t>It worked for base " << u << " : " << (c2p >= 1) << endl;
}
// Forbid pairings inside every motif component if included
cout << "\t>forbidding basepairs inside included motif's components..." << endl;
for (size_t i = 0; i < insertion_sites_.size(); i++) {
Motif& x = insertion_sites_[i];
for (size_t j = 0; j < x.comp.size(); j++) {
Component& c = x.comp[j];
IloExpr c3(env_);
IloNum kxi = IloNum(c.k);
c3 += kxi * C(i, j);
for (u = c.pos.first; u < c.pos.second; u++) {
for (v = 0; v < n; v++)
if (allowed_basepair(u, v)) c3 += y(u, v);
}
model_.add(c3 <= kxi);
}
}
// To be continued ...
}
void MOIP::extend_pareto(double lambdaMin, double lambdaMax)
{
if (lambdaMin >= lambdaMax) {
cerr << "lambdaMax < lambdaMin, something's wrong !" << endl;
exit(EXIT_FAILURE);
}
// for any SecondaryStructure in pareto_ such that the value of the second
// objective is between lambdaMin and lambdaMax
// a DIFF() constraint and a mirror constraint is added
for (uint i = 0; i < pareto_.size(); i++) {
// DIFF()
if (
(abs(pareto_[i].get_objective_score(2) - lambdaMin) < PRECISION or pareto_[i].get_objective_score(2) > lambdaMin) and
(abs(pareto_[i].get_objective_score(2) - lambdaMax) < PRECISION or pareto_[i].get_objective_score(2) < lambdaMax)) {
// ip.add_bj_ct(pareto_[i]);
}
// mirror
// if (
// (abs(pareto_[i].get_obj2M_() - lambdaMin) < PRECISION or pareto_[i].get_obj2M_() > lambdaMin) and
// (abs(pareto_[i].get_obj2M_() - lambdaMax) < PRECISION or pareto_[i].get_obj2M_() < lambdaMax)) {
// ip.add_bj_ct_M(pareto_[i]);
// }
}
// SecondaryStructure s = solve_objective(1, lambdaMin, lambdaMax);
// if (is_undominated_yet(s)) {
// // adding the SecondaryStructure s to the set pareto_
// add_solution(s);
// // run localPareto above the SecondaryStructure s
// extend_pareto(s.get_objective_score(2), lambdaMax);
// }
}
size_t MOIP::get_yuv_index(size_t u, size_t v) const
{
size_t a, b;
a = (u < v) ? u : v;
b = (u > v) ? u : v;
return index_of_yuv_[a][b - 4 - a];
}
size_t MOIP::get_Cpxi_index(size_t x_i, size_t i_on_j) const { return index_of_Cxip_[x_i][i_on_j]; }
bool MOIP::allowed_basepair(size_t u, size_t v) const
{
size_t a, b;
a = (v > u) ? u : v;
b = (v > u) ? v : u;
if (b - a < 4) return false;
if (a >= rna_.get_RNA_length() - 6) return false;
if (b >= rna_.get_RNA_length()) return false;
return true;
}
\ No newline at end of file
#ifndef MOIP_H_
#define MOIP_H_
#define IL_STD
#include "SecondaryStructure.h"
#include "rna.h"
#include <ilcplex/ilocplex.h>
using std::vector;
const double PRECISION = 0.0001;
class MOIP
{
public:
static uint ncores;
typedef enum { MIN, MAX } DirType;
typedef enum { FR, LO, UP, DB, FX } BoundType;
MOIP(const RNA& rna, const vector<Motif>& motifSites);
~MOIP(void);
void solve_objective(int o, double min, double max);
uint get_n_solutions(void) const;
const SecondaryStructure& solution(uint i) const;
void extend_pareto(double lambdaMin, double lambdaMax);
bool allowed_basepair(size_t u, size_t v) const;
void add_solution(const SecondaryStructure& s);
private:
bool is_undominated_yet(const SecondaryStructure& s);
void add_problem_constraints(const IloModel& model_);
size_t get_yuv_index(size_t u, size_t v) const;
size_t get_Cpxi_index(size_t x_i, size_t i_on_j) const;
IloNumExprArg& y(size_t u, size_t v); // Direct reference to y^u_v in basepair_dv_
IloNumExprArg& C(size_t x, size_t i); // Direct reference to C_p^xi in insertion_dv_
RNA rna_; // RNA object
vector<Motif> insertion_sites_; // Potential Motif insertion sites
const float beta_; // beta parameter of the probability function
double lambdaMin_; // minimum threshold value for the probability value
double lambdaMax_; // maximum threshold value for the probability value
int vp_; // vp_ variable for penalization of the probability score
float theta_; // theta parameter for the probability function
IloEnv env_; // environment CPLEX object
IloNumVarArray basepair_dv_; // Decision variables
IloNumVarArray insertion_dv_; // Decision variables
vector<SecondaryStructure> pareto_; // Vector of results
vector<vector<size_t>> index_of_Cxip_; // Stores the indexes of the Cxip in insertion_dv_
vector<vector<size_t>> index_of_yuv_; // Stores the indexes of the y^u_v in basepair_dv_ in a complex way. Use get_yuv_index(u,v) to retrieve.
vector<size_t> index_of_first_components; // Stores the indexes of Cx1p in insertion_dv_
};
inline void MOIP::add_solution(const SecondaryStructure& s) { pareto_.push_back(s); }
inline uint MOIP::get_n_solutions(void) const { return pareto_.size(); }
inline const SecondaryStructure& MOIP::solution(uint i) const { return pareto_[i]; }
inline IloNumExprArg& MOIP::y(size_t u, size_t v) { return basepair_dv_[get_yuv_index(u, v)]; }
inline IloNumExprArg& MOIP::C(size_t x, size_t i) { return insertion_dv_[get_Cpxi_index(x, i)]; }
#endif // MOIP_H_
\ No newline at end of file
#include "SecondaryStructure.h"
#include <boost/format.hpp>
static const double PRECISION(0.0001);
SecondaryStructure::SecondaryStructure(const vector<double>& scores, const vector<bool>& decision_variables, VII coord, int RNAlength)
: objective_scores_(scores), dv_(decision_variables), coord_(coord), n_(RNAlength)
{
}
string SecondaryStructure::to_DBN(void) const
{
string res(n_, '.');
for (size_t i = 0; i < n_; i++) {
if (dv_[i]) {
res[coord_[i].first] = '(';
res[coord_[i].second] = ')';
}
}
return res;
}
string SecondaryStructure::to_string(void) const
{
return to_DBN() + "\t" + boost::str(boost::format("%.6f") % objective_scores_[0]) + "\t" +
boost::str(boost::format("%.6f") % objective_scores_[1]);
}
bool operator>(const SecondaryStructure& s1, const SecondaryStructure& s2)
{
double s11 = s1.get_objective_score(0);
double s12 = s1.get_objective_score(1);
double s21 = s2.get_objective_score(0);
double s22 = s2.get_objective_score(1);
bool obj1 = false, obj2 = false, strict1 = false, strict2 = false;
if (s11 > s21) {
strict1 = true;
obj1 = true;
} else if (abs(s11 - s21) < PRECISION) {
obj1 = true;
}
if (s12 > s22) {
strict2 = true;
obj2 = true;
} else if (abs(s12 - s22) < PRECISION) {
obj2 = true;
}
if (obj1 && obj2 && (strict1 || strict2)) {
return true;
}
return false;
}
bool operator<(const SecondaryStructure& s1, const SecondaryStructure& s2)
{
double s11 = s1.get_objective_score(0);
double s12 = s1.get_objective_score(1);
double s21 = s2.get_objective_score(0);
double s22 = s2.get_objective_score(1);
bool obj1 = false, obj2 = false, strict1 = false, strict2 = false;
if (s11 < s21) {
strict1 = true;
obj1 = true;
} else if (abs(s11 - s21) < PRECISION) {
obj1 = true;
}
if (s12 < s22) {
strict2 = true;
obj2 = true;
} else if (abs(s12 - s22) < PRECISION) {
obj2 = true;
}
if (obj1 && obj2 && (strict1 || strict2)) {
return true;
}
return false;
}
#ifndef __INC_IP_SOL__
#define __INC_IP_SOL__
#include "rna.h"
#include <string>
#include <vector>
using std::string;
using std::vector;
typedef vector<int> VI;
typedef vector<VI> VVI;
typedef vector<std::pair<int, int>> VII;
class SecondaryStructure
{
public:
SecondaryStructure(void);
SecondaryStructure(const vector<double>& scores, const vector<bool>& decision_variables, VII coord, int RNAlength);
double get_objective_score(int i) const;
const vector<bool>& get_decision_variables() const;
bool get_decision_value(int i) const;
VII get_coord() const;
int get_RNA_length() const;
void set_objective_score(int i, double s);
string to_DBN() const;
string to_string() const;
private:
vector<double> objective_scores_; // values of the different objective functions for that SecondaryStructure
vector<bool> dv_; // values of the decision variable of the integer program
vector<Motif> motif_info_; // information about known motives in this secondary structure and their positions
VII coord_; // coordinates of the dv_. dv_[i] == true <==> coord_[i][0] paired to coord_[i][1];
size_t n_; // length of the RNA
};
// return if this SecondaryStructure s1 dominates s2
bool operator>(const SecondaryStructure& s1, const SecondaryStructure& s2);
// return if this SecondaryStructure s2 dominates s1
bool operator<(const SecondaryStructure& s1, const SecondaryStructure& s2);
inline double SecondaryStructure::get_objective_score(int i) const { return objective_scores_[i]; }
inline const vector<bool>& SecondaryStructure::get_decision_variables() const { return dv_; }
inline void SecondaryStructure::set_objective_score(int i, double s) { objective_scores_[i - 1] = s; }
inline VII SecondaryStructure::get_coord() const { return coord_; }
inline int SecondaryStructure::get_RNA_length() const { return n_; }
inline bool SecondaryStructure::get_decision_value(int i) const { return dv_[i]; }
#endif
/***
Biominserter
Louis Becquey, starting from Audrey Legendre's code
nov 2018
***/
#include <algorithm>
#include <boost/algorithm/string.hpp>
#include <cstdlib>
#include <iostream>
#include <iterator>
#include <string>
#include <thread>
#include <vector>
#include "MOIP.h"
#include "fa.h"
#include "nupack.h"
using namespace std;
Motif parse_csv_line(string line)
{
vector<string> tokens;
boost::split(tokens, line, boost::is_any_of(","));
Motif m;
m.atlas_id = tokens[0];
m.comp.push_back(Component(make_pair<int, int>(stoi(tokens[3]), stoi(tokens[4])), stoi(tokens[2])));
if (tokens[5] != "-")
m.comp.push_back(Component(make_pair<int, int>(stoi(tokens[5]), stoi(tokens[6])), stoi(tokens[2])));
m.reversed = (tokens[1] == "True");
return m;
}
int main(int argc, char* argv[])
{
// float time;
// clock_t t1, t2;
// t1 = clock();
MOIP::ncores = thread::hardware_concurrency() - 1;
if (argc != 3) {
cerr << argc << " arguments specified !" << endl;
cerr << "Please specify the following input files:" << endl;
cerr << "biominserter sequence.fasta insertion.sites.csv" << endl;
return EXIT_FAILURE;
}
const char* inputName = argv[1];
const char* csvname = argv[2];
cout << "Reading input files..." << endl;
if (access(inputName, F_OK) == -1) {
cerr << inputName << " not found" << endl;
return EXIT_FAILURE;
}
if (access(csvname, F_OK) == -1) {
cerr << csvname << " not found" << endl;
return EXIT_FAILURE;
}
// load fasta file
list<Fasta> f;
Fasta::load(f, inputName);
list<Fasta>::iterator fa = f.begin();
cout << "loading " << fa->name() << "..." << endl;
RNA myRNA = RNA(fa->name(), fa->seq());
cout << "\t>" << inputName << " successfuly loaded" << endl;
// load CSV file
string line;
ifstream motifs = ifstream(csvname);
getline(motifs, line); // skip header
vector<Motif> posInsertionSites;
while (getline(motifs, line)) {
posInsertionSites.push_back(parse_csv_line(line));
}
cout << "\t>" << csvname << " successfuly loaded" << endl;
// creating the Multi-Objective problem:
MOIP myMOIP = MOIP(myRNA, posInsertionSites); // using the constructor with arguments automatically defines the decision variables.
// finding the best SecondaryStructures for each objective
double max = myRNA.get_RNA_length();
try {
myMOIP.solve_objective(1, -max, max);
} catch (IloCplex::Exception& e) {
cerr << e << endl;
}
// SecondaryStructure bestSSO1 = myMOIP.solve_objective(1, -max, max);
// SecondaryStructure bestSSO2 = myMOIP.solve_objective(2, -max, max);
// double bestObj2 = bestSSO2.get_objective_score(2);
// extend to the whole pareto set
// myMOIP.add_solution(bestSSO1);
// myMOIP.extend_pareto(bestObj2, max);
// print the pareto set
// cout << "Structure \t Free energy score \t Expected accuracy score" << endl;
// for (uint i = 0; i < myMOIP.get_n_solutions(); i++) {
// cout << myMOIP.solution(i).to_string() << endl;
// }
// cout << endl;
return EXIT_SUCCESS;
}
/*
* $Id$
*
* Copyright (C) 2010 Kengo Sato
*
* doublehis file comes from IPknot.
*
*/
#ifndef __INC_DP_TABLE_H__
#define __INC_DP_TABLE_H__
class DPtable2
{
public:
DPtable2() : V_(), N_(0) {}
void resize(int n)
{
N_ = n;
V_.resize(N_ * (N_ + 1) / 2 + (N_ + 1));
}
void fill(const double& v) { std::fill(V_.begin(), V_.end(), v); }
double& operator()(int i, int j) { return V_[index(i, j)]; }
const double& operator()(int i, int j) const { return V_[index(i, j)]; }
private:
int index(int i, int j) const
{
assert(j <= N_);
return j == i - 1 ? N_ * (N_ + 1) / 2 + i : i * N_ + j - i * (1 + i) / 2;
}
std::vector<double> V_;
int N_;
};
class DPtable4
{
public:
DPtable4() : V_(), N_(0) {}
void resize(int n)
{
N_ = n;
V_.resize(N_ * (N_ - 1) * (N_ - 2) * (N_ - 3) / 2 / 3 / 4);
}
void fill(const double& v) { std::fill(V_.begin(), V_.end(), v); }
double& operator()(int i, int d, int e, int j) { return V_[index(i, d, e, j)]; }
const double& operator()(int i, int d, int e, int j) const { return V_[index(i, d, e, j)]; }
private:
int index(int h, int r, int m, int s) const
{
int n = N_;
int h2 = h * h;
int h3 = h2 * h;
int h4 = h3 * h;
int m2 = m * m;
int n2 = n * n;
int n3 = n2 * n;
int r2 = r * r;
int r3 = r2 * r;
assert(h <= r);
assert(r <= m);
assert(m <= s);
assert(s <= N_);
return (h == r && m == s) ? V_.size() - 1 :
(
-24 - 50 * h - 35 * h2 - 10 * h3 - h4 - 36 * m - 12 * m2 + 12 * n + 70 * h * n +
30 * h2 * n + 4 * h3 * n + 24 * m * n - 12 * n2 - 30 * h * n2 - 6 * h2 * n2 + 4 * h * n3 +
44 * r - 48 * n * r + 12 * n2 * r + 24 * r2 - 12 * n * r2 + 4 * r3 + 24 * s) /
24;
}
std::vector<double> V_;
int N_;
};
class DPtableX
{
public:
DPtableX() : V_(), N_(0), D_(0) {}
void resize(int d, int n)
{
N_ = n;
D_ = d;
int max_sz = 0;
for (int i = d; i < d + 3; ++i) max_sz = std::max(max_sz, (N_ - i) * (i - 5) * (i - 1) * (i - 2) / 2);
V_.resize(max_sz);
}
void fill(const double& v) { std::fill(V_.begin(), V_.end(), v); }
double& operator()(int i, int d, int e, int s) { return V_[index(i, d, e, s)]; }
const double& operator()(int i, int d, int e, int s) const { return V_[index(i, d, e, s)]; }
void swap(DPtableX& x)
{
std::swap(V_, x.V_);
std::swap(N_, x.N_);
std::swap(D_, x.D_);
}
private:
int index(int i, int h1, int m1, int s) const
{
int d = D_;
int d1d2 = (d - 1) * (d - 2);
int d5 = d - 5;
int h1_i_1 = h1 - i - 1;
assert(i + d < N_);
assert(d - 6 >= s);
assert(i < h1);
return i * d5 * d1d2 / 2 + s * d1d2 / 2 + h1_i_1 * (d - 1) - h1_i_1 * (h1 - i) / 2 + m1 - h1 - 1;
}
std::vector<double> V_;
int N_;
int D_;
};
#endif
/*
* $Id$
*
* Copyright (C) 2008-2010 Kengo Sato
*
* This file comes from Ipknot.
*/
#include "fa.h"
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <cctype>
#include <cstring>
#include <cassert>
typedef unsigned int uint;
//static
unsigned int Fasta::load(std::list<Fasta>& data, const char* file){
std::string line, name, seq, str;
std::ifstream ifs(file);
while (std::getline(ifs, line)) {
if (line[0]=='>') { // header
if (!name.empty()) {
assert(str.size()==0 || seq.size()==str.size());
data.push_back(Fasta(name, seq, str));
}
name=line.substr(1);
seq.clear();
str.clear();
continue;
}
if (std::strchr("()[].?xle ", line[0])==NULL) { // seq
uint i;
for (i=0; i!=line.size(); ++i)
if (!isalpha(line[i])) break;
seq+=line.substr(0, i);
} else {
uint i;
for (i=0; i!=line.size(); ++i)
if (std::strchr("()[].?xle ", line[i])==NULL) break;
str+=line.substr(0, i);
}
}
if (!name.empty())
data.push_back(Fasta(name, seq, str));
return data.size();
}
/*
* $Id$
*
* Copyright (C) 2008-2010 Kengo Sato
*
* This file comes from Ipknot.
*/
#include <list>
#include <string>
using std::list;
using std::string;
class Fasta
{
public:
Fasta() : name_(), seq_(), str_() {}
Fasta(const string& name, const string& seq, const string& str = "") : name_(name), seq_(seq), str_(str) {}
Fasta(const Fasta& fa) : name_(fa.name_), seq_(fa.seq_), str_(fa.str_) {}
Fasta& operator=(const Fasta& fa)
{
if (this != &fa) {
name_ = fa.name_;
seq_ = fa.seq_;
str_ = fa.str_;
}
return *this;
}
const string& name() const { return name_; }
const string& seq() const { return seq_; }
string& seq() { return seq_; }
const string& str() const { return str_; }
unsigned int size() const { return seq_.size(); }
static unsigned int load(list<Fasta>& data, const char* file);
private:
string name_;
string seq_;
string str_;
};
This diff is collapsed. Click to expand it.
/*
* $Id$
*
* Copyright (C) 2010 Kengo Sato
*
* This file comes from IPknot.
*
*/
#include <boost/multi_array.hpp>
#include <string>
#include <vector>
#include <iostream>
using std::string;
using std::vector;
#define kB 0.00198717 // Boltzmann constant in kcal/mol/K
#define ZERO_C_IN_KELVIN 273.15 // Zero degrees C in Kelvin
#define AVOGADRO 6.022e23 // Avogadro's number
typedef float energy_t;
class DPtable2
{
public:
DPtable2() : V_(), N_(0) {}
void resize(int n)
{
N_ = n;
V_.resize(N_ * (N_ + 1) / 2 + (N_ + 1));
}
void fill(const float& v) { std::fill(V_.begin(), V_.end(), v); }
float& operator()(int i, int j) { return V_[index(i, j)]; }
const float& operator()(int i, int j) const { return V_[index(i, j)]; }
private:
int index(int i, int j) const
{
assert(j <= N_);
return j == i - 1 ? N_ * (N_ + 1) / 2 + i : i * N_ + j - i * (1 + i) / 2;
}
std::vector<float> V_;
int N_;
};
class DPtable4
{
public:
DPtable4() : V_(), N_(0) {}
void resize(int n)
{
N_ = n;
std::cout << V_.max_size() << " - " << N_ << " - " << sizeof(float) * static_cast<unsigned long>(N_) * (N_ - 1) * (N_ - 2) * (N_ - 3) / 2 / 3 / 4 << std::endl;
V_.resize(static_cast<unsigned long>(N_) * (N_ - 1) * (N_ - 2) * (N_ - 3) / 2 / 3 / 4); // This number can be HUGE
std::cout << "c'est toi qui bad_allocque ?" << std::endl;
}
void fill(const float& v) { std::fill(V_.begin(), V_.end(), v); }
float& operator()(int i, int d, int e, int j) { return V_[index(i, d, e, j)]; }
const float& operator()(int i, int d, int e, int j) const { return V_[index(i, d, e, j)]; }
private:
int index(int h, int r, int m, int s) const
{
int n = N_;
int h2 = h * h;
int h3 = h2 * h;
int h4 = h3 * h;
int m2 = m * m;
int n2 = n * n;
int n3 = n2 * n;
int r2 = r * r;
int r3 = r2 * r;
assert(h <= r);
assert(r <= m);
assert(m <= s);
assert(s <= N_);
return (h == r && m == s) ? V_.size() - 1 :
(
-24 - 50 * h - 35 * h2 - 10 * h3 - h4 - 36 * m - 12 * m2 + 12 * n + 70 * h * n +
30 * h2 * n + 4 * h3 * n + 24 * m * n - 12 * n2 - 30 * h * n2 - 6 * h2 * n2 + 4 * h * n3 +
44 * r - 48 * n * r + 12 * n2 * r + 24 * r2 - 12 * n * r2 + 4 * r3 + 24 * s) /
24;
}
std::vector<float> V_;
int N_;
};
class DPtableX
{
public:
DPtableX() : V_(), N_(0), D_(0) {}
void resize(int d, int n)
{
N_ = n;
D_ = d;
int max_sz = 0;
for (int i = d; i < d + 3; ++i) max_sz = std::max(max_sz, (N_ - i) * (i - 5) * (i - 1) * (i - 2) / 2);
V_.resize(max_sz);
}
void fill(const float& v) { std::fill(V_.begin(), V_.end(), v); }
float& operator()(int i, int d, int e, int s) { return V_[index(i, d, e, s)]; }
const float& operator()(int i, int d, int e, int s) const { return V_[index(i, d, e, s)]; }
void swap(DPtableX& x)
{
std::swap(V_, x.V_);
std::swap(N_, x.N_);
std::swap(D_, x.D_);
}
private:
int index(int i, int h1, int m1, int s) const
{
int d = D_;
int d1d2 = (d - 1) * (d - 2);
int d5 = d - 5;
int h1_i_1 = h1 - i - 1;
assert(i + d < N_);
assert(d - 6 >= s);
assert(i < h1);
return i * d5 * d1d2 / 2 + s * d1d2 / 2 + h1_i_1 * (d - 1) - h1_i_1 * (h1 - i) / 2 + m1 - h1 - 1;
}
std::vector<float> V_;
int N_;
int D_;
};
class Nupack
{
public:
Nupack();
void load_sequence(const string& s);
void load_parameters_fm363(const vector<float>& v);
void load_default_parameters(/*int which*/);
bool load_parameters(const char* filename);
void dump_parameters(std::ostream& os) const;
float calculate_partition_function();
void calculate_posterior();
void get_posterior(vector<float>& bp, vector<int>& offset) const;
void get_posterior(vector<float>& bp1, vector<float>& bp2, vector<int>& offset) const;
private:
void fastiloops(int i, int j, DPtable4& Qg, DPtableX& Qx, DPtableX& Qx2);
void fastiloops_pr(int i, int j, DPtable4& Qg, DPtableX& Qx, DPtableX& Qx2, DPtable4& Pg, DPtableX& Px, DPtableX& Px2);
energy_t score_hairpin(int i, int j) const;
energy_t score_loop(int l) const;
energy_t score_interior(int i, int d, int e, int j, bool pk) const;
energy_t score_interior_mismatch(int i, int j) const;
energy_t score_interior_mismatch(int i, int j, int k, int l) const;
energy_t score_interior_asymmetry(int l1, int l2) const;
energy_t score_multiloop(bool pk) const;
energy_t score_multiloop_paired(int n, bool pk) const;
energy_t score_multiloop_unpaired(int n, bool pk) const;
energy_t score_at_penalty(int i, int j) const;
energy_t score_dangle(int i, int j) const;
energy_t score_pk() const;
energy_t score_pk_multiloop() const;
energy_t score_pk_pk() const;
energy_t score_pk_paired(int n) const;
energy_t score_pk_unpaired(int n) const;
energy_t score_pk_band(int n) const;
int base(char x) const;
bool allow_paired(int i, int j) const;
bool wc_pair(int i, int j) const;
int pair_type(int i, int j) const;
int pair_type(int i) const;
vector<int> base_map;
boost::multi_array<int, 2> pair_map;
vector<int> seq;
int N;
float RT;
DPtable2 Q;
DPtable2 Qb;
DPtable2 Qm;
DPtable2 Qp;
DPtable2 Qz;
DPtable4 Qg;
DPtable4 Qgl;
DPtable4 Qgr;
DPtable4 Qgls;
DPtable4 Qgrs;
DPtable2 P;
DPtable2 Pb;
DPtable2 Pm;
DPtable2 Pp;
DPtable2 Pz;
DPtable2 Pbg;
DPtable4 Pg;
DPtable4 Pgl;
DPtable4 Pgr;
DPtable4 Pgls;
DPtable4 Pgrs;
// energy parameters
energy_t hairpin37[30];
energy_t bulge37[30];
energy_t interior37[30];
energy_t stack37[6][6];
energy_t int11_37[6][6][4][4];
energy_t int21_37[6][4][4][6][4];
energy_t int22_37[6][6][4][4][4][4];
energy_t dangle3_37[6][4];
energy_t dangle5_37[6][4];
energy_t triloop37[4][4][4][4][4];
energy_t tloop37[4][4][4][4][4][4];
energy_t mismatch_hairpin37[4][4][6];
energy_t mismatch_interior37[4][4][6];
energy_t asymmetry_penalty[4];
energy_t polyC_penalty, polyC_slope, polyC_int;
energy_t at_penalty;
energy_t multiloop_penalty; // alpha1
energy_t multiloop_paired_penalty; // alpha2
energy_t multiloop_unpaired_penalty; // alpha3
energy_t pk_penalty; // beta1
energy_t pk_multiloop_penalty; // beta1m
energy_t pk_pk_penalty; // beta1p
energy_t pk_paired_penalty; // beta2
energy_t pk_unpaired_penalty; // beta3
energy_t pk_band_penalty;
energy_t pk_stack_span;
energy_t pk_interior_span;
energy_t multiloop_penalty_pk;
energy_t multiloop_paired_penalty_pk;
energy_t multiloop_unpaired_penalty_pk;
energy_t max_asymmetry;
energy_t salt_correction;
energy_t loop_greater30;
energy_t hairpin_GGG;
float intermolecular_initiation;
};
#include "rna.h"
#include "nupack.h"
#include <iostream>
#include <string>
#include <utility>
#include <vector>
using std::cout, std::cerr, std::endl;
RNA::RNA(string name, string seq)
{
if (!check_seq(seq)) {
cerr << "Unknown chars in input sequence. Please restrict to ACGTU." << endl;
exit(EXIT_FAILURE);
}
name_ = name;
seq_ = seq;
format();
n_ = seq_.size();
cout << "\t>formatted sequence" << endl;
/*define type_*/
type_ = vector<vector<int>>(n_, vector<int>(n_));
for (int i = 0; i < n_; i++) {
for (int j = 0; j < n_; j++) {
if (i < j) {
std::stringstream ss;
ss << seq_[i] << seq_[j];
string str = ss.str();
if (str.compare("AU") == 0) {
type_[i][j] = 1;
} else if (str.compare("CG") == 0) {
type_[i][j] = 2;
} else if (str.compare("GC") == 0) {
type_[i][j] = 3;
} else if (str.compare("GU") == 0) {
type_[i][j] = 4;
} else if (str.compare("UG") == 0) {
type_[i][j] = 5;
} else if (str.compare("UA") == 0) {
type_[i][j] = 6;
} else {
type_[i][j] = 0;
}
} else {
type_[i][j] = 0;
}
}
}
nBP_ = type_.size();
/*define coord_*/
for (int i = 0; i < n_; i++) {
for (int j = 0; j < n_; j++) {
if (i < j and type_[i][j] > 0) {
if (i != 0 and i != n_ and j != 0 and j != n_) {
if (type_[i - 1][j + 1] > 0 or type_[i + 1][j - 1] > 0) {
coord_.push_back(std::make_pair(i, j));
}
} else if (i == 0 or j == n_) {
if (type_[i + 1][j - 1] > 0) {
coord_.push_back(std::make_pair(i, j));
}
}
}
}
}
/*define pij_*/
vector<float> bp;
vector<int> offset;
Nupack nu;
// nu.load_parameters("rna1999.dG");
nu.load_default_parameters();
cout << "\t>default parameters loaded (Serra and Turner, 1995)" << endl;
nu.load_sequence(seq_);
cout << "\t>computing pairing probabilities..." << endl;
try {
nu.calculate_partition_function();
} catch (std::exception& e) {
cerr << e.what() << endl;
exit(EXIT_FAILURE);
}
nu.calculate_posterior();
nu.get_posterior(bp, offset);
pij_ = vector<vector<float>>(n_, vector<float>(n_));
for (int i = 1; i <= n_; i++) {
for (int j = 1; j <= n_; j++) {
pij_[i - 1][j - 1] = bp[offset[i] + j];
}
}
cout << "\t>pairing probabilities defined" << endl;
}
int RNA::find_coord(pair<int, int> p)
{
vector<pair<int, int>>::iterator it = find(coord_.begin(), coord_.end(), p);
int r = -1;
if (it != coord_.end()) r = distance(coord_.begin(), it);
return r;
}
bool RNA::check_seq(string seq) // Checks if the sequences only contains ACGUT.
{
bool res = true;
for (unsigned int i = 0; i < seq.size(); i++) {
if (seq[i] != 'A' and seq[i] != 'U' and seq[i] != 'C' and seq[i] != 'G' and seq[i] != 'T') {
res = false;
break;
}
}
return res;
}
void RNA::format()
{
for (unsigned int i = 0; i < seq_.size(); i++) {
seq_[i] = toupper(seq_[i]);
if (seq_[i] == 'T') {
seq_[i] = 'U';
break;
}
}
}
#ifndef DEF_RNA
#define DEF_RNA
#include <map>
#include <string>
#include <vector>
using std::pair, std::string, std::vector, std::map;
typedef struct Comp_ {
pair<uint, uint> pos;
int score;
size_t k;
Comp_(pair<int, int> p, int s) : pos(p), score(s) { k = 1 + pos.second - pos.first; }
} Component;
typedef struct {
string atlas_id;
vector<Component> comp;
bool reversed;
} Motif;
class RNA
{
public:
RNA();
RNA(string name, string seq);
int get_n_();
string get_name_();
string get_seq_();
vector<vector<int>> get_type_();
int get_type(int i, int j);
int get_type(int i);
vector<pair<int, int>> get_coord_();
pair<int, int> get_coord(int i);
int get_coordF(int i);
int get_coordS(int i);
int find_coord(pair<int, int>);
vector<vector<float>> get_pij_();
float get_pij(int i, int j);
float get_pij(int i);
int get_err_();
uint get_RNA_length() const;
bool check_seq(string seq);
void format();
private:
string name_; /*name of the rna*/
string seq_; /*sequence of the rna*/
int n_; /*length of the rna*/
vector<vector<int>> type_; /*vector of base pair types*/
vector<pair<int, int>> coord_; /*vector of base pair coordinates*/
vector<vector<float>> pij_; /*vector of probabilities*/
uint nBP_; /*number of possible base pair*/
};
inline int RNA::get_n_() { return n_; }
inline string RNA::get_name_() { return name_; }
inline string RNA::get_seq_() { return seq_; }
inline vector<vector<int>> RNA::get_type_() { return type_; }
inline int RNA::get_type(int i, int j) { return type_[i][j]; }
inline int RNA::get_type(int i) { return type_[get_coord(i).first][get_coord(i).second]; }
inline vector<pair<int, int>> RNA::get_coord_() { return coord_; }
inline pair<int, int> RNA::get_coord(int i) { return coord_[i]; }
inline int RNA::get_coordF(int i) { return coord_[i].first; }
inline int RNA::get_coordS(int i) { return coord_[i].second; }
inline vector<vector<float>> RNA::get_pij_() { return pij_; }
inline float RNA::get_pij(int i, int j) { return pij_[i][j]; }
inline float RNA::get_pij(int i) { return pij_[get_coord(i).first][get_coord(i).second]; }
inline uint RNA::get_RNA_length() const { return nBP_; }
#endif
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
<?xml version="1.0"?>
<!DOCTYPE ipe SYSTEM "ipe.dtd">
<ipe version="70206" creator="Ipe 7.2.7">
<info created="D:20181012182334" modified="D:20181012182334"/>
<ipestyle name="basic">
<symbol name="arrow/arc(spx)">
<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen">
0 0 m
-1 0.333 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/farc(spx)">
<path stroke="sym-stroke" fill="white" pen="sym-pen">
0 0 m
-1 0.333 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/ptarc(spx)">
<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen">
0 0 m
-1 0.333 l
-0.8 0 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/fptarc(spx)">
<path stroke="sym-stroke" fill="white" pen="sym-pen">
0 0 m
-1 0.333 l
-0.8 0 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="mark/circle(sx)" transformations="translations">
<path fill="sym-stroke">
0.6 0 0 0.6 0 0 e
0.4 0 0 0.4 0 0 e
</path>
</symbol>
<symbol name="mark/disk(sx)" transformations="translations">
<path fill="sym-stroke">
0.6 0 0 0.6 0 0 e
</path>
</symbol>
<symbol name="mark/fdisk(sfx)" transformations="translations">
<group>
<path fill="sym-fill">
0.5 0 0 0.5 0 0 e
</path>
<path fill="sym-stroke" fillrule="eofill">
0.6 0 0 0.6 0 0 e
0.4 0 0 0.4 0 0 e
</path>
</group>
</symbol>
<symbol name="mark/box(sx)" transformations="translations">
<path fill="sym-stroke" fillrule="eofill">
-0.6 -0.6 m
0.6 -0.6 l
0.6 0.6 l
-0.6 0.6 l
h
-0.4 -0.4 m
0.4 -0.4 l
0.4 0.4 l
-0.4 0.4 l
h
</path>
</symbol>
<symbol name="mark/square(sx)" transformations="translations">
<path fill="sym-stroke">
-0.6 -0.6 m
0.6 -0.6 l
0.6 0.6 l
-0.6 0.6 l
h
</path>
</symbol>
<symbol name="mark/fsquare(sfx)" transformations="translations">
<group>
<path fill="sym-fill">
-0.5 -0.5 m
0.5 -0.5 l
0.5 0.5 l
-0.5 0.5 l
h
</path>
<path fill="sym-stroke" fillrule="eofill">
-0.6 -0.6 m
0.6 -0.6 l
0.6 0.6 l
-0.6 0.6 l
h
-0.4 -0.4 m
0.4 -0.4 l
0.4 0.4 l
-0.4 0.4 l
h
</path>
</group>
</symbol>
<symbol name="mark/cross(sx)" transformations="translations">
<group>
<path fill="sym-stroke">
-0.43 -0.57 m
0.57 0.43 l
0.43 0.57 l
-0.57 -0.43 l
h
</path>
<path fill="sym-stroke">
-0.43 0.57 m
0.57 -0.43 l
0.43 -0.57 l
-0.57 0.43 l
h
</path>
</group>
</symbol>
<symbol name="arrow/fnormal(spx)">
<path stroke="sym-stroke" fill="white" pen="sym-pen">
0 0 m
-1 0.333 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/pointed(spx)">
<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen">
0 0 m
-1 0.333 l
-0.8 0 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/fpointed(spx)">
<path stroke="sym-stroke" fill="white" pen="sym-pen">
0 0 m
-1 0.333 l
-0.8 0 l
-1 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/linear(spx)">
<path stroke="sym-stroke" pen="sym-pen">
-1 0.333 m
0 0 l
-1 -0.333 l
</path>
</symbol>
<symbol name="arrow/fdouble(spx)">
<path stroke="sym-stroke" fill="white" pen="sym-pen">
0 0 m
-1 0.333 l
-1 -0.333 l
h
-1 0 m
-2 0.333 l
-2 -0.333 l
h
</path>
</symbol>
<symbol name="arrow/double(spx)">
<path stroke="sym-stroke" fill="sym-stroke" pen="sym-pen">
0 0 m
-1 0.333 l
-1 -0.333 l
h
-1 0 m
-2 0.333 l
-2 -0.333 l
h
</path>
</symbol>
<pen name="heavier" value="0.8"/>
<pen name="fat" value="1.2"/>
<pen name="ultrafat" value="2"/>
<symbolsize name="large" value="5"/>
<symbolsize name="small" value="2"/>
<symbolsize name="tiny" value="1.1"/>
<arrowsize name="large" value="10"/>
<arrowsize name="small" value="5"/>
<arrowsize name="tiny" value="3"/>
<color name="red" value="1 0 0"/>
<color name="green" value="0 1 0"/>
<color name="blue" value="0 0 1"/>
<color name="yellow" value="1 1 0"/>
<color name="orange" value="1 0.647 0"/>
<color name="gold" value="1 0.843 0"/>
<color name="purple" value="0.627 0.125 0.941"/>
<color name="gray" value="0.745"/>
<color name="brown" value="0.647 0.165 0.165"/>
<color name="navy" value="0 0 0.502"/>
<color name="pink" value="1 0.753 0.796"/>
<color name="seagreen" value="0.18 0.545 0.341"/>
<color name="turquoise" value="0.251 0.878 0.816"/>
<color name="violet" value="0.933 0.51 0.933"/>
<color name="darkblue" value="0 0 0.545"/>
<color name="darkcyan" value="0 0.545 0.545"/>
<color name="darkgray" value="0.663"/>
<color name="darkgreen" value="0 0.392 0"/>
<color name="darkmagenta" value="0.545 0 0.545"/>
<color name="darkorange" value="1 0.549 0"/>
<color name="darkred" value="0.545 0 0"/>
<color name="lightblue" value="0.678 0.847 0.902"/>
<color name="lightcyan" value="0.878 1 1"/>
<color name="lightgray" value="0.827"/>
<color name="lightgreen" value="0.565 0.933 0.565"/>
<color name="lightyellow" value="1 1 0.878"/>
<dashstyle name="dashed" value="[4] 0"/>
<dashstyle name="dotted" value="[1 3] 0"/>
<dashstyle name="dash dotted" value="[4 2 1 2] 0"/>
<dashstyle name="dash dot dotted" value="[4 2 1 2 1 2] 0"/>
<textsize name="large" value="\large"/>
<textsize name="Large" value="\Large"/>
<textsize name="LARGE" value="\LARGE"/>
<textsize name="huge" value="\huge"/>
<textsize name="Huge" value="\Huge"/>
<textsize name="small" value="\small"/>
<textsize name="footnote" value="\footnotesize"/>
<textsize name="tiny" value="\tiny"/>
<textstyle name="center" begin="\begin{center}" end="\end{center}"/>
<textstyle name="itemize" begin="\begin{itemize}" end="\end{itemize}"/>
<textstyle name="item" begin="\begin{itemize}\item{}" end="\end{itemize}"/>
<gridsize name="4 pts" value="4"/>
<gridsize name="8 pts (~3 mm)" value="8"/>
<gridsize name="16 pts (~6 mm)" value="16"/>
<gridsize name="32 pts (~12 mm)" value="32"/>
<gridsize name="10 pts (~3.5 mm)" value="10"/>
<gridsize name="20 pts (~7 mm)" value="20"/>
<gridsize name="14 pts (~5 mm)" value="14"/>
<gridsize name="28 pts (~10 mm)" value="28"/>
<gridsize name="56 pts (~20 mm)" value="56"/>
<anglesize name="90 deg" value="90"/>
<anglesize name="60 deg" value="60"/>
<anglesize name="45 deg" value="45"/>
<anglesize name="30 deg" value="30"/>
<anglesize name="22.5 deg" value="22.5"/>
<opacity name="10%" value="0.1"/>
<opacity name="30%" value="0.3"/>
<opacity name="50%" value="0.5"/>
<opacity name="75%" value="0.75"/>
<tiling name="falling" angle="-60" step="4" width="1"/>
<tiling name="rising" angle="30" step="4" width="1"/>
</ipestyle>
<page>
<layer name="alpha"/>
<view layers="alpha" active="alpha"/>
<path layer="alpha" matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat">
64 704 m
320 704 l
</path>
<path matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat">
96 704 m
32.249 0 0 -32.249 128 708 160 704 a
</path>
<path matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat">
208 704 m
25.2982 0 0 -25.2982 232 696 256 704 a
</path>
<path matrix="1 0 0 1 -32 0" stroke="black" pen="ultrafat">
128 704 m
77.746 0 0 -77.746 200 674.667 272 704 a
</path>
<path matrix="1 0 0 1 256 64" stroke="black" pen="ultrafat">
64 704 m
320 704 l
</path>
<path matrix="1 0 0 1 256 64" stroke="black" pen="ultrafat">
208 704 m
25.2982 0 0 -25.2982 232 696 256 704 a
</path>
<path matrix="1 0 0 1 256 64" stroke="black" pen="ultrafat">
128 704 m
77.746 0 0 -77.746 200 674.667 272 704 a
</path>
<path matrix="1 0 0 1 256 -96" stroke="black" pen="ultrafat">
64 704 m
320 704 l
</path>
<path matrix="1 0 0 1 256 -96" stroke="black" pen="ultrafat">
96 704 m
32.249 0 0 -32.249 128 708 160 704 a
</path>
<text matrix="1 0 0 1 0 -16" transformations="translations" pos="64 688" stroke="black" type="label" width="202.242" height="17.213" depth="4.82" valign="baseline" size="Huge">structure $y$ with PK</text>
<path stroke="black" pen="ultrafat" arrow="normal/normal">
256 736 m
304 752 l
</path>
<path stroke="black" pen="ultrafat" arrow="normal/normal">
288 672 m
320 640 l
</path>
<text matrix="1 0 0 1 32 -16" transformations="translations" pos="384 752" stroke="black" type="label" width="60.952" height="16.741" depth="4.02" valign="baseline" size="huge">level $y^1$</text>
<text transformations="translations" pos="416 576" stroke="black" type="label" width="60.952" height="16.741" depth="4.02" valign="baseline" size="huge">level $y^2$</text>
<text transformations="translations" pos="432 672" stroke="black" type="label" width="17.843" height="13.97" depth="1.57" valign="baseline" size="Huge">+</text>
</page>
</ipe>
No preview for this file type
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
#ifndef COMPONENT_H
#define COMPONENT_H
#include <string>
class Component
{
public:
Component();
Component(std::string& cons_seq, uint k);
private:
std::string _cons_seq;
uint k;
};
istream& operator>>(istream& is, Component& m);
ostream& operator<<(ostream& os, const Component& m);
#endif
\ No newline at end of file
#include "Motif.h"
\ No newline at end of file
#ifndef MOTIF_H
#define MOTIF_H
#include <vector>
#include <string>
#include "Component.h"
class Motif
{
public:
Motif();
Motif(std::string filename);
~Motif();
std::vector<Component>& getComponents() const;
Component& getComponent(uint k) const;
std::string name;
private:
std::vector<Component> _comps;
};
istream& operator>>(istream& is, Motif& m);
ostream& operator<<(ostream& os, const Motif& m);
#endif
\ No newline at end of file
#include "RNA.h"
RNA::RNA() {}
RNA::~RNA() {}
RNA::RNA(std::string seq) : _seq(seq) { }
#ifndef RNA_H
#define RNA_H
#include <string>
class RNA {
public:
RNA();
RNA(std::string);
~RNA();
uint length() const;
std::string str() const;
// char seq(uint k) const;
// bool isConsensus() const;
// bool containsPseudoBases() const;
private:
std::string _seq;
// bool _coding;
// bool _consensus;
// bool _containsPseudoNTs;
};
inline uint RNA::length() const { return _seq.length(); }
inline std::string RNA::str() const { return _seq; }
#endif
\ No newline at end of file
#include "Motif.h"
#include <cstdlib>
#include <iostream>
#include <unistd.h>
#include <boost/program_options.hpp>
#include <boost/filesystem.hpp>
#include "RNA.h"
using namespace std;
namespace bpo = boost::program_options;
namespace bf = boost::filesystem;
bool checkMotifFolder(bf::path & folder)
{
if (not(bf::is_directory(folder) and bf::exists(folder))) return false;
bf::directory_iterator end_itr;
uint Ndesc = 0;
for (bf::directory_iterator itr(folder); itr != end_itr; ++itr)
{
if (itr->path().leaf().string().find(string(".desc")) != std::string::npos)
Ndesc++;
}
cout << "Found " << Ndesc << " .desc files in " << folder.string() << endl;
if (Ndesc > 0)
return true;
return false;
}
int parseMotifs(bf::path & folder)
{
bf::directory_iterator end_itr;
for (bf::directory_iterator itr(folder); itr != end_itr; ++itr)
{
if (itr->path().leaf().string().find(string(".desc")) != std::string::npos)
{
}
}
}
int main(int argc, char** argv)
{
bf::path currentFolder(bf::current_path());
bf::path motifFolderPath(currentFolder); // By default, searching here...
bpo::options_description desc("Options");
bpo::variables_map options_map;
RNA querySequence;
desc.add_options()
("help", "Print this help message")
("seq,s", bpo::value<string>()->required(), "RNA sequence to find motives in.")
("motifs,m", bpo::value<bf::path>(), "Path to folder of DESC files describing the motifs to be used")
;
bpo::store(bpo::parse_command_line(argc, argv, desc), options_map);
bpo::notify(options_map);
if (options_map.count("help")) {
cout << desc << endl;
return EXIT_SUCCESS;
}
if (options_map.count("motifs")) {
motifFolderPath = options_map["motifs"].as<bf::path>();
if (not(checkMotifFolder(motifFolderPath))) { return EXIT_FAILURE; }
}
if (options_map.count("seq")) {
querySequence = RNA(options_map["seq"].as<string>());
}
cout << "Working with sequence " << querySequence.str() << " (length " << querySequence.length() << ")." << endl;
return EXIT_SUCCESS;
}