Louis BECQUEY

Working "k-Pareto sets"

......@@ -9,7 +9,7 @@ TARGET = biominserter
CC = clang++
# compiling flags here
CFLAGS = -Icppsrc/ -I/usr/local/include -I$(ICONCERT) -I$(ICPLEX) -I$(INUPACK) -I$(IEIGEN) -g
CFLAGS = -Icppsrc/ -I/usr/local/include -I$(ICONCERT) -I$(ICPLEX) -I$(INUPACK) -I$(IEIGEN) -O3
CXXFLAGS = --std=c++17 -Wall -Wpedantic -Wextra -Wno-ignored-attributes -Wno-unused-variable
LINKER = clang++
......
This diff is collapsed. Click to expand it.
......@@ -14,21 +14,28 @@ class MOIP
{
public:
MOIP(void);
MOIP(const RNA& rna, const vector<Motif>& motifSites, float pthreshold, bool verbose);
MOIP(const RNA& rna, const vector<Motif>& motifSites, uint nsets, float pthreshold, bool verbose);
~MOIP(void);
SecondaryStructure solve_objective(int o, double min, double max);
SecondaryStructure solve_objective(int o);
uint get_n_solutions(void) const;
const SecondaryStructure& solution(uint i) const;
void extend_pareto(double lambdaMin, double lambdaMax);
void search_between(double lambdaMin, double lambdaMax);
bool allowed_basepair(size_t u, size_t v) const;
void add_solution(const SecondaryStructure& s);
void remove_solution(uint i);
static uint obj_to_solve_; // What objective do you prefer to solve in mono-objective portions of the algorithm ?
static double precision_; // decimals to keep in objective values, to avoid numerical issues. otherwise, solution with objective 5.0000000009 dominates solution with 5.0 =(
static double epsilon_;
private:
bool is_undominated_yet(const SecondaryStructure& s);
void define_problem_constraints(void);
size_t get_yuv_index(size_t u, size_t v) const;
size_t get_Cpxi_index(size_t x_i, size_t i_on_j) const;
bool is_undominated_yet(const SecondaryStructure& s);
void define_problem_constraints(void);
size_t get_yuv_index(size_t u, size_t v) const;
size_t get_Cpxi_index(size_t x_i, size_t i_on_j) const;
bool exists_vertical_outdated_labels(const SecondaryStructure& s) const;
bool exists_horizontal_outdated_labels(const SecondaryStructure& s) const;
IloNumExprArg& y(size_t u, size_t v); // Direct reference to y^u_v in basepair_dv_
IloNumExprArg& C(size_t x, size_t i); // Direct reference to C_p^xi in insertion_dv_
......@@ -38,6 +45,7 @@ class MOIP
RNA rna_; // RNA object
vector<Motif> insertion_sites_; // Potential Motif insertion sites
vector<SecondaryStructure> pareto_; // Vector of results
uint n_sets_; // number of Pareto sets to return
// Objectives related
float theta_; // theta parameter for the probability function
......
#include "SecondaryStructure.h"
#include "MOIP.h"
#include <algorithm>
#include <boost/format.hpp>
......@@ -13,7 +14,7 @@ SecondaryStructure::SecondaryStructure() {}
SecondaryStructure::SecondaryStructure(const RNA& rna)
: objective_scores_(vector<double>(2)), n_(rna.get_RNA_length()), nBP_(0), rna_(rna)
: objective_scores_(vector<double>(2)), n_(rna.get_RNA_length()), nBP_(0), k_(0), rna_(rna)
{
is_empty_structure = false;
}
......@@ -21,6 +22,7 @@ SecondaryStructure::SecondaryStructure(const RNA& rna)
SecondaryStructure::SecondaryStructure(bool empty) : rna_(RNA()) { is_empty_structure = empty; }
string SecondaryStructure::to_DBN(void) const
{
......@@ -175,16 +177,16 @@ bool operator>(const SecondaryStructure& s1, const SecondaryStructure& s2)
bool obj1 = false, obj2 = false, strict1 = false, strict2 = false;
if (s11 > s21) {
if (s11 - s21 > MOIP::precision_) {
strict1 = true;
obj1 = true;
} else if (s11 == s21) {
} else if (abs(s11 - s21) < MOIP::precision_) {
obj1 = true;
}
if (s12 > s22) {
if (s12 - s22 > MOIP::precision_) {
strict2 = true;
obj2 = true;
} else if (s12 == s22) {
} else if (abs(s12 - s22) < MOIP::precision_) {
obj2 = true;
}
......@@ -204,16 +206,16 @@ bool operator<(const SecondaryStructure& s1, const SecondaryStructure& s2)
bool obj1 = false, obj2 = false, strict1 = false, strict2 = false;
if (s11 < s21) {
if (MOIP::precision_ < s21 - s11) {
strict1 = true;
obj1 = true;
} else if (s11 == s21) {
} else if (abs(s11-s21) < MOIP::precision_) {
obj1 = true;
}
if (s12 < s22) {
if (MOIP::precision_ < s22 - s12) {
strict2 = true;
obj2 = true;
} else if (s12 == s22) {
} else if (abs(s12-s22) < MOIP::precision_) {
obj2 = true;
}
......@@ -232,20 +234,20 @@ bool operator>=(const SecondaryStructure& s1, const SecondaryStructure& s2)
bool obj1 = false, obj2 = false, strict1 = false, strict2 = false;
if (s11 > s21) {
if (s11 - s21 > MOIP::precision_) {
strict1 = true;
obj1 = true;
} else if (s11 == s21) {
} else if (abs(s11-s21) < MOIP::precision_) {
obj1 = true;
}
if (s12 > s22) {
if (s12 - s22 > MOIP::precision_) {
strict2 = true;
obj2 = true;
} else if (s12 == s22) {
} else if (abs(s12-s22) < MOIP::precision_) {
obj2 = true;
}
if ((obj1 && obj2 && (strict1 || strict2)) || ((s11 == s21 && s12 == s22))) {
if ((obj1 && obj2 && (strict1 || strict2)) || ((abs(s11-s21) < MOIP::precision_ && abs(s12-s22) < MOIP::precision_))) {
return true;
}
......@@ -261,20 +263,20 @@ bool operator<=(const SecondaryStructure& s1, const SecondaryStructure& s2)
bool obj1 = false, obj2 = false, strict1 = false, strict2 = false;
if (s11 < s21) {
if (MOIP::precision_ < s21 - s11) {
strict1 = true;
obj1 = true;
} else if (s11 == s21) {
} else if (abs(s11-s21) < MOIP::precision_) {
obj1 = true;
}
if (s12 < s22) {
if (MOIP::precision_ < s22 - s12) {
strict2 = true;
obj2 = true;
} else if (s12 == s22) {
} else if (abs(s12-s22) < MOIP::precision_) {
obj2 = true;
}
if ((obj1 && obj2 && (strict1 || strict2)) || ((s11 == s21 && s12 == s22))) {
if ((obj1 && obj2 && (strict1 || strict2)) || ((abs(s11-s21) < MOIP::precision_ && abs(s12-s22) < MOIP::precision_))) {
return true;
}
return false;
......@@ -289,6 +291,8 @@ bool operator==(const Component& c1, const Component& c2)
bool operator!=(const Component& c1, const Component& c2) { return not(c1 == c2); }
bool operator==(const Motif& m1, const Motif& m2)
{
if (m1.atlas_id != m2.atlas_id) return false;
......@@ -301,6 +305,8 @@ bool operator==(const Motif& m1, const Motif& m2)
bool operator!=(const Motif& m1, const Motif& m2) { return not(m1 == m2); }
bool operator==(const SecondaryStructure& s1, const SecondaryStructure& s2)
{
// Checks wether the secondary structures are exactly the same, including the inserted motifs.
......@@ -317,4 +323,11 @@ bool operator==(const SecondaryStructure& s1, const SecondaryStructure& s2)
for (uint i = 0; i < s1.get_n_motifs(); i++)
if (s1.motif_info_[i] != s2.motif_info_[i]) return false;
return true;
}
bool operator!=(const SecondaryStructure& s1, const SecondaryStructure& s2)
{
// Checks wether the secondary structures are different, including the inserted motifs.
return not(s1 == s2);
}
\ No newline at end of file
......
#ifndef __INC_IP_SOL__
#define __INC_IP_SOL__
#define IL_STD
#include "rna.h"
#include <ilconcert/ilomodel.h>
#include <ilcplex/ilocplex.h>
#include <iostream>
#include <string>
#include <vector>
......@@ -45,6 +49,8 @@ class SecondaryStructure
void insert_motif(const Motif& m);
double get_objective_score(int i) const;
void set_objective_score(int i, double s);
void set_pareto_set(uint k);
uint get_pareto_set(void) const;
uint get_n_motifs(void) const;
uint get_n_bp(void) const;
void print(void) const;
......@@ -56,9 +62,11 @@ class SecondaryStructure
vector<pair<uint, uint>> basepairs_; // values of the decision variable of the integer program
vector<Motif> motif_info_; // information about known motives in this secondary structure and their positions
size_t n_; // length of the RNA
size_t nBP_;
RNA rna_; // RNA object which is folded
size_t nBP_; // number of basepairs
uint k_; // Secondary Structure belongs to the kth Pareto set
RNA rna_; // RNA object which is folded
bool is_empty_structure; // Empty structure, returned when the solver does not find solutions anymore
IloConstraint forbid_this_; // Add it to a cplex model to forbid that solution
};
// return if this SecondaryStructure s1 dominates s2
......@@ -67,8 +75,9 @@ bool operator>=(const SecondaryStructure& s1, const SecondaryStructure& s2);
// return if this SecondaryStructure s2 dominates s1
bool operator<(const SecondaryStructure& s1, const SecondaryStructure& s2);
bool operator<=(const SecondaryStructure& s1, const SecondaryStructure& s2);
// return wether SecondaryStructures are identical
// return wether SecondaryStructures are identical or not
bool operator==(const SecondaryStructure& s1, const SecondaryStructure& s2);
bool operator!=(const SecondaryStructure& s1, const SecondaryStructure& s2);
// utilities to compare secondary structures:
bool operator==(const Motif& m1, const Motif& m2);
bool operator!=(const Motif& m1, const Motif& m2);
......@@ -81,5 +90,7 @@ inline double SecondaryStructure::get_objective_score(int i) const { return obje
inline void SecondaryStructure::set_objective_score(int i, double s) { objective_scores_[i - 1] = s; }
inline uint SecondaryStructure::get_n_motifs(void) const { return motif_info_.size(); }
inline uint SecondaryStructure::get_n_bp(void) const { return nBP_; }
inline uint SecondaryStructure::get_pareto_set(void) const { return k_; }
inline void SecondaryStructure::set_pareto_set(uint k) { k_ = k; }
#endif
......
......@@ -66,30 +66,28 @@ int main(int argc, char* argv[])
{
/* ARGUMENT CHECKING */
if (argc != 5) {
if (argc != 6) {
cerr << argc << " arguments specified !" << endl;
cerr << "Please specify the following input files:" << endl;
cerr << "biominserter sequence.fasta insertion.sites.csv prob_threshold verbose" << endl;
cerr << "biominserter sequence.fasta insertion.sites.csv prob_threshold verbose obj" << endl;
return EXIT_FAILURE;
}
/* VARIABLE DECLARATIONS */
const char* inputName = argv[1];
const char* csvname = argv[2];
bool verbose = (atoi(argv[4]) != 0);
string fastaname = string(inputName);
string basename = remove_ext(inputName, '.', '/');
float theta_p_threshold = atof(argv[3]);
list<Fasta> f;
list<Fasta>::iterator fa = f.begin();
string line;
ifstream motifs;
vector<Motif> posInsertionSites;
ofstream outfile;
SecondaryStructure bestSSO1, bestSSO2;
RNA myRNA;
MOIP myMOIP;
const char* inputName = argv[1];
const char* csvname = argv[2];
bool verbose = (atoi(argv[4]) != 0);
string basename = remove_ext(inputName, '.', '/');
float theta_p_threshold = atof(argv[3]);
list<Fasta> f;
string line;
ifstream motifs;
vector<Motif> posInsertionSites;
ofstream outfile;
SecondaryStructure bestSSO1, bestSSO2;
RNA myRNA;
MOIP::obj_to_solve_ = atoi(argv[5]);
/* FILE PARSING */
......@@ -100,6 +98,7 @@ int main(int argc, char* argv[])
return EXIT_FAILURE;
}
Fasta::load(f, inputName);
list<Fasta>::iterator fa = f.begin();
if (verbose) cout << "loading " << fa->name() << "..." << endl;
myRNA = RNA(fa->name(), fa->seq(), verbose);
if (verbose) cout << "\t>" << inputName << " successfuly loaded (" << myRNA.get_RNA_length() << " nt)" << endl;
......@@ -117,19 +116,31 @@ int main(int argc, char* argv[])
/* FIND K-PARETO SETS */
myMOIP = MOIP(myRNA, posInsertionSites, theta_p_threshold, verbose);
MOIP myMOIP = MOIP(myRNA, posInsertionSites, 1, theta_p_threshold, verbose);
try {
bestSSO1 = myMOIP.solve_objective(1, -__DBL_MAX__, __DBL_MAX__);
bestSSO2 = myMOIP.solve_objective(2, -__DBL_MAX__, __DBL_MAX__);
if (verbose) {
cout << "Best solution according to objective 1 :" << bestSSO1.to_string();
cout << "Best solution according to objective 2 :" << bestSSO2.to_string();
}
// extend to the whole pareto set
bestSSO1.set_pareto_set(1);
bestSSO2.set_pareto_set(1);
myMOIP.add_solution(bestSSO1);
myMOIP.add_solution(bestSSO2);
myMOIP.extend_pareto(bestSSO2.get_objective_score(1), bestSSO1.get_objective_score(1));
if (verbose) {
cout << endl << "Best solution according to objective 1 :" << bestSSO1.to_string() << endl;
cout << "Best solution according to objective 2 :" << bestSSO2.to_string() << endl;
}
// extend to the whole pareto set
if (MOIP::obj_to_solve_ == 1) {
if (verbose) cout << endl << "Solving obj1 on top of best solution 1." << endl;
myMOIP.search_between(bestSSO1.get_objective_score(2) + MOIP::epsilon_, bestSSO2.get_objective_score(2));
if (verbose) cout << endl << "Solving obj1 below best solution 1." << endl;
myMOIP.search_between(-__DBL_MAX__, bestSSO1.get_objective_score(2));
} else {
if (verbose) cout << endl << "Solving obj2 on top of best solution 2." << endl;
myMOIP.search_between(bestSSO2.get_objective_score(1) + MOIP::epsilon_, bestSSO1.get_objective_score(1));
if (verbose) cout << endl << "Solving obj2 below best solution 2." << endl;
myMOIP.search_between(-__DBL_MAX__, bestSSO2.get_objective_score(1));
}
} catch (IloAlgorithm::NotExtractedException& e) {
cerr << e << endl;
exit(EXIT_FAILURE);
......@@ -138,6 +149,22 @@ int main(int argc, char* argv[])
exit(EXIT_FAILURE);
}
/* REMOVE SOLUTIONS WITH TOO HIGH LABEL */
vector<size_t> to_remove;
if (verbose) cout << endl;
for (uint i = 0; i < myMOIP.get_n_solutions(); i++)
if (myMOIP.solution(i).get_pareto_set() > 1) { // Some solution is fromm a Pareto set of too high order
if (verbose)
cout << "Removing structure from Pareto set " << myMOIP.solution(i).get_pareto_set() << " : "
<< myMOIP.solution(i).to_string() << endl;
to_remove.push_back(i);
}
if (to_remove.size()) {
for (size_t i = to_remove.size() - 1; i != 0; i--) myMOIP.remove_solution(to_remove[i]);
myMOIP.remove_solution(to_remove[0]);
}
/* DISPLAY RESULTS */
// print the pareto set
......
......@@ -111,7 +111,7 @@ RNA::RNA(string name, string seq, bool verbose)
for (i = 0; i < length; i++) {
p_unpaired[i] = pairPr[(length + 1) * i + j];
double sum = 0.0;
for (j = 0; j < length; j++) sum += pij_(i, j);
for (j = 0; j < length; j++) sum += i < j ? pij_(i, j) : pij_(j, i);
printf("\t\t%d\tunpaired: %.4e\tpaired(pK+noPK): %.4e\tTotal: %f\n", i + 1, p_unpaired[i], sum, p_unpaired[i] + sum);
}
cout << "\t\t>pairing probabilities defined" << endl;
......