Louis BECQUEY

Nettoyage + command line tests

......@@ -6,8 +6,6 @@ from math import sqrt, ceil
import numpy as np
import matplotlib.pyplot as plt
log_path = "test.log"
log = open(log_path, 'a')
......@@ -112,7 +110,7 @@ def get_list_str_by_seq(name, estimator, function, list_str, true_str, modules):
max_mcc = get_mcc_structs_max(path_benchmark, name, estimator, function, extension, modules, true_str)
list_str.append(max_mcc)
# ================== Code from Louis Beckey Benchark.py ==============================
# ================== Code from Louis Becquey Benchark.py ==============================
def dbn_to_basepairs(structure):
parenthesis = []
brackets = []
......@@ -199,7 +197,7 @@ def f1_score(tp, tn, fp, fn):
def specificity(tp, tn, fp, fn):
return tn / (tn + fp)
# ================== Code from Louis Beckey Benchark.py ==============================
# ================== Code from Louis Becquey Benchark.py ==============================
#Get the best MCC value for all prediction of the results file of the sequence in argument
def get_mcc_structs_max(path_benchmark, sequence_id, estimator, function, extension, modules, true_structure):
......
......@@ -9,7 +9,7 @@ CC = g++
CFLAGS = -Icppsrc/ -I/usr/local/include -I$(CPLEX)/concert/include -I$(CPLEX)/cplex/include -g -O3
CXXFLAGS = --std=c++17 -Wall -Wpedantic -Wextra -Wno-deprecated-copy -Wno-ignored-attributes
LINKER = g++
LDFLAGS = -L$(CPLEX)/concert/lib/x86-64_linux/static_pic/ -L$(CPLEX)/cplex/lib/x86-64_linux/static_pic/ -lboost_system -lboost_filesystem -lboost_program_options -lgomp -lconcert -lilocplex -lcplex -lpthread -ldl -lRNA -lm
LDFLAGS = -Wno-free-nonheap-object -L$(CPLEX)/concert/lib/x86-64_linux/static_pic/ -L$(CPLEX)/cplex/lib/x86-64_linux/static_pic/ -lboost_system -lboost_filesystem -lboost_program_options -lgomp -lconcert -lilocplex -lcplex -lpthread -ldl -lRNA -lm
# change these to proper directories where each file should be
SRCDIR = cppsrc
......@@ -32,7 +32,7 @@ $(OBJECTS): $(OBJDIR)/%.o : $(SRCDIR)/%.cpp $(INCLUDES)
@echo -e "\033[00;32mCompiled "$<".\033[00m"
.PHONY: all
all: $(BINDIR)/$(TARGET) doc
all: $(BINDIR)/$(TARGET)
.PHONY: re
re: remove clean all
......
......@@ -87,11 +87,6 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
} else {
index_of_yuv_[u].push_back(rna_.get_RNA_length() * rna_.get_RNA_length() + 1);
}
/*for (u = 0; u < index_of_yuv_.size(); u++) {
for (v = 0; v < index_of_yuv_[u].size(); v++) {
cout << "["<< u << "]["<< v <<"]: " << index_of_yuv_[u][v] << endl;
}
}*/
if (verbose_) cout << endl;
// Add the x_i,j decision variables
......@@ -100,7 +95,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
index_of_xij_ = vector<vector<size_t>>(rna_.get_RNA_length() - 6, vector<size_t>(0));
for (u = 0; u < rna_.get_RNA_length() - 6; u++)
for (v = u + 4; v < rna_.get_RNA_length(); v++) // A basepair is possible if v > u+3
if (rna_.get_pij(u, v) > theta and rna_.get_pij(u + 1, v - 1) > theta) { // ou u-1 v+1 ??
if (rna_.get_pij(u, v) > theta and rna_.get_pij(u + 1, v - 1) > theta) { // or u-1 v+1 ??
if (verbose_) cout << u << '-' << v << " ";
index_of_xij_[u].push_back(c);
c++;
......@@ -110,11 +105,6 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
} else {
index_of_xij_[u].push_back(rna_.get_RNA_length() * rna_.get_RNA_length() + 1);
}
/*for (u = 0; u < index_of_xij_.size(); u++) {
for (v = 0; v < index_of_xij_[u].size(); v++) {
cout << "["<< u << "]["<< v <<"]: " << index_of_xij_[u][v] << endl;
}
}*/
if (verbose_) cout << endl;
// Look for insertions sites, then create the appropriate Cxip variables
......@@ -221,25 +211,25 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
thread_pool.push_back(thread(&Pool::infinite_loop_func, &pool));
// Read every RIN file and add it to the queue (iff valid)
char error;
char error;
for (auto it : recursive_directory_range(source_path))
{
if ((error = Motif::is_valid_RIN(it.path().string()))) // Returns error if RIN file is incorrect
{
if (verbose)
if ((error = Motif::is_valid_RIN(it.path().string()))) // Returns error if RIN file is incorrect
{
if (verbose)
{
cerr << "\t> Ignoring RIN " << it.path().stem();
switch (error)
{
case 'l': cerr << ", too short to be considered."; break;
case 'x': cerr << ", because not constraining the secondary structure."; break;
default: cerr << ", unknown reason";
default: cerr << ", unknown reason";
}
cerr << endl;
}
errors++;
errors++;
continue;
}
}
accepted++;
args_of_parallel_func args(it.path(), posInsertionSites_access);
inserted++;
......@@ -264,12 +254,12 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
size_t errors = 0;
// Read every JSON files
vector<pair<uint,char>> errors_id;
vector<pair<uint,char>> errors_id;
for (auto it : recursive_directory_range(source_path))
{
errors_id = Motif::is_valid_JSON(it.path().string());
if (!(errors_id.empty())) // Returns error if JSON file is incorrect
{
if (!(errors_id.empty())) // Returns error if JSON file is incorrect
{
for(uint j = 0; j < errors_id.size(); j++)
{
if(verbose) {
......@@ -293,7 +283,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
}
}
errors++;
}
}
accepted++;
args_of_parallel_func args(it.path(), posInsertionSites_access);
inserted++;
......@@ -306,7 +296,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
}
else
{
cout << "!!! Unknown module source" << endl;
cout << "Err: Unknown module source." << endl;
}
// Add the Cx,i,p decision variables
......@@ -1103,7 +1093,7 @@ void MOIP::allowed_motifs_from_rin(args_of_parallel_func arg_struct)
mutex& posInsertionSites_access = arg_struct.posInsertionSites_mutex;
std::ifstream motif;
string filepath = rinfile.string();
string filepath = rinfile.string();
vector<vector<Component>> vresults, r_vresults;
vector<string> component_sequences;
uint carnaval_id;
......@@ -1112,7 +1102,7 @@ void MOIP::allowed_motifs_from_rin(args_of_parallel_func arg_struct)
string reversed_rna = rna_.get_seq();
std::reverse(reversed_rna.begin(), reversed_rna.end());
filenumber = filepath.substr(filepath.find("Subfiles/")+9, filepath.find(".txt"));
filenumber = filepath.substr(filepath.find("Subfiles/")+9, filepath.find(".txt"));
carnaval_id = 1 + stoi(filenumber); // Start counting at 1 to be consistant with the website numbering
motif = std::ifstream(rinfile.string());
......@@ -1137,13 +1127,13 @@ void MOIP::allowed_motifs_from_rin(args_of_parallel_func arg_struct)
{
Motif temp_motif = Motif(v, rinfile, carnaval_id, false);
bool unprobable = false;
for (const Link& l : temp_motif.links_)
{
if (!allowed_basepair(l.nts.first,l.nts.second))
unprobable = true;
}
if (unprobable) continue;
bool unprobable = false;
for (const Link& l : temp_motif.links_)
{
if (!allowed_basepair(l.nts.first,l.nts.second))
unprobable = true;
}
if (unprobable) continue;
// Add it to the results vector
unique_lock<mutex> lock(posInsertionSites_access);
......@@ -1328,7 +1318,7 @@ void MOIP::allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pai
mutex& posInsertionSites_access = arg_struct.posInsertionSites_mutex;
std::ifstream motif;
string filepath = jsonfile.string();
string filepath = jsonfile.string();
vector<vector<Component>> vresults, r_vresults;
vector<string> component_sequences;
vector<string> component_contacts;
......
/***
Biorseo, Louis Becquey, nov 2018-August 2020
Special thanks to Lénaic Durand for working a lot on version 2.0
louis.becquey@univ-evry.fr
Biorseo, Louis Becquey, nov 2018-nov 2021
Special thanks to Lénaic Durand and Nathalie Bertrand for working a lot on version 2.0
louis.becquey@univ-evry.fr (likely expired email account, find me on the internet)
***/
#include <cmath>
......@@ -22,268 +22,262 @@ namespace po = boost::program_options;
string remove_ext(const char* mystr, char dot, char sep)
{
// COPYPASTA from stackoverflow
char *retstr, *lastdot, *lastsep;
// Error checks and allocate string.
if (mystr == nullptr) return nullptr;
if ((retstr = static_cast<char*>(malloc(strlen(mystr) + 1))) == nullptr) return nullptr;
// Make a copy and find the relevant characters.
strcpy(retstr, mystr);
lastdot = strrchr(retstr, dot);
lastsep = (sep == 0) ? nullptr : strrchr(retstr, sep);
// If it has an extension separator.
if (lastdot != nullptr) {
// and it's before the extenstion separator.
if (lastsep != nullptr) {
if (lastsep < lastdot) {
// then remove it.
*lastdot = '\0';
}
} else {
// Has extension separator with no path separator.
*lastdot = '\0';
}
}
// Return the modified string.
return string(retstr);
// COPYPASTA from stackoverflow
char *retstr, *lastdot, *lastsep;
// Error checks and allocate string.
if (mystr == nullptr) return nullptr;
if ((retstr = static_cast<char*>(malloc(strlen(mystr) + 1))) == nullptr) return nullptr;
// Make a copy and find the relevant characters.
strcpy(retstr, mystr);
lastdot = strrchr(retstr, dot);
lastsep = (sep == 0) ? nullptr : strrchr(retstr, sep);
// If it has an extension separator.
if (lastdot != nullptr) {
// and it's before the extenstion separator.
if (lastsep != nullptr) {
if (lastsep < lastdot) {
// then remove it.
*lastdot = '\0';
}
} else {
// Has extension separator with no path separator.
*lastdot = '\0';
}
}
// Return the modified string.
return string(retstr);
}
int main(int argc, char* argv[])
{
/* VARIABLE DECLARATIONS */
string inputName, outputName, motifs_path_name, basename;
bool verbose = false;
float theta_p_threshold;
char obj_function_nbr = 'B';
char mea_or_mfe = 'b';
list<Fasta> f;
ofstream outfile;
SecondaryStructure bestSSO1, bestSSO2;
RNA myRNA;
/* ARGUMENT CHECKING */
po::options_description desc("Options");
desc.add_options()
("help,h", "Print the help message")
("version", "Print the program version")
("seq,s", po::value<string>(&inputName)->required(), "Fasta file containing the RNA sequence")
("descfolder,d", po::value<string>(&motifs_path_name), "A folder containing modules in .desc format, as produced by Djelloul & Denise's catalog program")
("rinfolder,x", po::value<string>(&motifs_path_name), "A folder containing CaRNAval's RINs in .txt format, as produced by script transform_caRNAval_pickle.py")
("jsonfolder,a", po::value<string>(&motifs_path_name), "A folder containing the motif library of Isaure in .json format")
("jar3dcsv,j", po::value<string>(&motifs_path_name), "A file containing the output of JAR3D's search for motifs in the sequence, as produced by biorseo.py")
("bayespaircsv,b", po::value<string>(&motifs_path_name), "A file containing the output of BayesPairing's search for motifs in the sequence, as produced by biorseo.py")
("first-objective,c", po::value<unsigned int>(&MOIP::obj_to_solve_)->default_value(2), "Objective to solve in the mono-objective portions of the algorithm")
("output,o", po::value<string>(&outputName), "A file to summarize the computation results")
("theta,t", po::value<float>(&theta_p_threshold)->default_value(0.001), "Pairing probability threshold to consider or not the possibility of pairing")
("function,f", po::value<char>(&obj_function_nbr)->default_value('B'), "What objective function to use to include motifs: square of motif size in nucleotides like "
"RNA-MoIP (A), light motif size + high number of components (B), site score (C), light motif size + site score + high number of components (D)")
("MFE,e", "Use as function for objective 2 MFE (Minimum Free Energy)")
("MEA,a", "Use as function for objective 2 MEA (Maximum Expected Accuracy) it's also choose by default")
("disable-pseudoknots,n", "Add constraints forbidding the formation of pseudoknots")
("limit,l", po::value<unsigned int>(&MOIP::max_sol_nbr_)->default_value(500), "Intermediate number of solutions in the Pareto set above which we give up the calculation.")
("verbose,v", "Print what is happening to stdout");
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
basename = remove_ext(inputName.c_str(), '.', '/');
try {
po::store(po::parse_command_line(argc, argv, desc), vm); // can throw
if (vm.count("help") or vm.count("-h")) {
cout << "Biorseo, bio-objective integer linear programming framework to predict RNA secondary "
"structures by including known RNA modules."
<< endl
<< "developped by Louis Becquey (louis.becquey@univ-evry.fr), 2018-2020" << endl
<< endl
<< desc << endl;
return EXIT_SUCCESS;
}
if (vm.count("version")) {
cout << "Biorseo v2.0, dockerized, August 2020" << endl;
return EXIT_SUCCESS;
}
if (vm.count("MFE")) mea_or_mfe = 'a';
if (vm.count("MEA")) {
mea_or_mfe = 'b';
}
if (vm.count("verbose")) verbose = true;
if (vm.count("disable-pseudoknots")) MOIP::allow_pk_ = false;
if (!vm.count("jar3dcsv") and !vm.count("bayespaircsv") and !vm.count("descfolder") and !vm.count("rinfolder") and !vm.count("jsonfolder")) {
cerr << "\033[31mYou must provide at least one of --descfolder, --rinfolder, --jar3dcsv or --bayespaircsv.\033[0m See --help "
"for more information."
<< endl;
return EXIT_FAILURE;
}
if ((vm.count("-d") or vm.count("-x")) and (obj_function_nbr == 'C' or obj_function_nbr == 'D')) {
cerr << "\033[31mYou must provide --jar3dcsv or --bayespaircsv to use --function C or --function D.\033[0m See "
"--help for more information."
<< endl;
return EXIT_FAILURE;
}
po::notify(vm); // throws on error, so do after help in case there are any problems
} catch (po::error& e) {
cerr << "ERROR: \033[31m" << e.what() << "\033[0m" << endl;
cerr << desc << endl;
return EXIT_FAILURE;
}
MOIP::obj_function_nbr_ = obj_function_nbr;
MOIP::obj_function2_nbr_ = mea_or_mfe;
/* FILE PARSING */
// load fasta file
if (verbose) cout << "Reading input files..." << endl;
if (access(inputName.c_str(), F_OK) == -1) {
cerr << "\033[31m" << inputName << " not found\033[0m" << endl;
return EXIT_FAILURE;
}
Fasta::load(f, inputName.c_str());
list<Fasta>::iterator fa = f.begin();
if (verbose) cout << "loading " << fa->name() << "..." << endl;
myRNA = RNA(fa->name(), fa->seq(), verbose);
if (verbose) cout << "\t> " << inputName << " successfuly loaded (" << myRNA.get_RNA_length() << " nt)" << endl;
// load CSV file
if (access(motifs_path_name.c_str(), F_OK) == -1) {
cerr << "\033[31m" << motifs_path_name << " not found\033[0m" << endl;
return EXIT_FAILURE;
}
/* FIND PARETO SET */
string source;
if (vm.count("jar3dcsv"))
source = "jar3dcsv";
else if (vm.count("bayespaircsv"))
source = "bayespaircsv";
else if (vm.count("rinfolder"))
source = "rinfolder";
else if (vm.count("descfolder"))
source = "descfolder";
else
source = "jsonfolder";
MOIP myMOIP = MOIP(myRNA, source, motifs_path_name.c_str(), theta_p_threshold, verbose);
double min, max;
IloConstraintArray F(myMOIP.get_env());
// return 0;
//test
/*string best_score;
bool flag = false;*/
if (verbose)
cout << "Solving..." << endl;
try {
bestSSO1 = myMOIP.solve_objective(1, -__DBL_MAX__, __DBL_MAX__);
if (verbose) cout << endl;
bestSSO2 = myMOIP.solve_objective(2, -__DBL_MAX__, __DBL_MAX__);
if (verbose) {
cout << endl << "Best solution according to objective 1 :" << bestSSO1.to_string() << endl;
cout << "Best solution according to objective 2 :" << bestSSO2.to_string() << endl;
}
//test
/*string s = bestSSO1.to_string();
string delimiter = "\t";
best_score = bestSSO1.to_string().substr(s.find(delimiter) + delimiter.size());
best_score = best_score.substr(0, best_score.find(delimiter));*/
// extend the Pareto set on top
if (MOIP::obj_to_solve_ == 1) {
myMOIP.add_solution(bestSSO1);
min = bestSSO1.get_objective_score(2) + MOIP::precision_;
max = bestSSO2.get_objective_score(2) + MOIP::precision_;
if (verbose) cout << endl << "Solving obj1 on top of best solution 1." << endl;
} else {
myMOIP.add_solution(bestSSO2);
min = bestSSO2.get_objective_score(1) + MOIP::precision_;
max = bestSSO1.get_objective_score(1) + MOIP::precision_;
if (verbose) cout << endl << "Solving obj2 on top of best solution 2." << endl;
}
if (verbose)
cout << setprecision(-log10(MOIP::precision_) + 4) << "\nSolving objective function " << MOIP::obj_to_solve_ << ", on top of "
<< min << ": Obj" << 3 - MOIP::obj_to_solve_ << " being in [" << min << ", " << max << "]..." << endl;
myMOIP.search_between(min, max);
// extend the Pareto set below
if (MOIP::obj_to_solve_ == 1) {
if (verbose) cout << endl << "Solving obj1 below best solution 1." << endl;
min = -__DBL_MAX__;
max = bestSSO1.get_objective_score(2);
} else {
if (verbose) cout << endl << "Solving obj2 below best solution 2." << endl;
min = -__DBL_MAX__;
max = bestSSO2.get_objective_score(1);
}
if (verbose)
cout << setprecision(-log10(MOIP::precision_) + 4) << "\nSolving objective function " << MOIP::obj_to_solve_
<< ", below (or eq. to) " << max << ": Obj" << 3 - MOIP::obj_to_solve_ << " being in [" << min << ", "
<< max << "]..." << endl
<< "\t> Forbidding " << F.getSize() << " solutions found in [" << setprecision(10)
<< min - MOIP::precision_ << ", " << max + MOIP::precision_ << ']' << endl;
myMOIP.search_between(min, max);
} catch (IloCplex::Exception& e) {
cerr << "\033[31mCplex Exception: " << e.getMessage() << "\033[0m" << endl;
exit(EXIT_FAILURE);
}
/* DISPLAY RESULTS */
// print the pareto set
if (verbose) {
cout << endl << endl << "---------------------------------------------------------------" << endl;
cout << "Whole Pareto Set:" << endl;
for (uint i = 0; i < myMOIP.get_n_solutions(); i++) myMOIP.solution(i).print();
cout << endl;
cout << myMOIP.get_n_candidates() << " candidate insertion sites, " << myMOIP.get_n_solutions() << " solutions kept." << endl;
cout << "Best value for Motif insertion objective: " << bestSSO1.get_objective_score(1) << endl;
cout << "Best value for structure expected accuracy: " << bestSSO2.get_objective_score(2) << endl;
}
// Save it to file
if (vm.count("output")) {
if (verbose) cout << "Saving structures to " << outputName << "..." << endl;
outfile.open(outputName);
outfile << fa->name() << endl << fa->seq() << endl;
//cout << "----struc----" << endl << myMOIP.solution(0).to_string() << endl;
for (uint i = 0; i < myMOIP.get_n_solutions(); i++) {
outfile << myMOIP.solution(i).to_string() << endl << structure_with_contacts(myMOIP.solution(i)) << endl;
string str1 = myMOIP.solution(i).to_string();
//Check if the best score for obj2 is correctly include in the results
/*string delimiter = "\t";
string obj2 = str1.substr(str1.find(delimiter) + delimiter.size());
obj2 = obj2.substr(0, obj2.find(delimiter));
if (obj2.compare(best_score) == 0)
flag = true;*/
}
/*if (!flag)
cout << "\033[1m\033[31mBest score not find for " << outputName << " !\033[0m" << endl;
else
cout << "OK for "<< outputName << "!" << endl;*/
outfile.close();
}
/* QUIT */
return EXIT_SUCCESS;
/* VARIABLE DECLARATIONS */
string inputName, outputName, motifs_path_name, basename;
bool verbose = false;
float theta_p_threshold;
int obj_function_nbr, mychar;
char mea_or_mfe = 'b'; // a for MFE, b for MEA
list<Fasta> f;
ofstream outfile;
SecondaryStructure bestSSO1, bestSSO2;
RNA myRNA;
/* ARGUMENT CHECKING */
po::options_description desc("Options");
desc.add_options()
("help,h", "Print the help message")
("version", "Print the program version")
("seq,s", po::value<string>(&inputName)->required(), "Fasta file containing the RNA sequence")
("descfolder,d", po::value<string>(&motifs_path_name), "A folder containing modules in .desc format, as produced by Djelloul & Denise's catalog program")
("rinfolder,x", po::value<string>(&motifs_path_name), "A folder containing CaRNAval's RINs in .txt format, as produced by script transform_caRNAval_pickle.py")
("jsonfolder,a", po::value<string>(&motifs_path_name), "A folder containing a custom motif library in .json format")
("jar3dcsv,j", po::value<string>(&motifs_path_name), "A file containing the output of JAR3D's search for motifs in the sequence, as produced by biorseo.py")
("bayespaircsv,b", po::value<string>(&motifs_path_name), "A file containing the output of BayesPairing's search for motifs in the sequence, as produced by biorseo.py")
("first-objective,c", po::value<unsigned int>(&MOIP::obj_to_solve_)->default_value(2), "Objective to solve in the mono-objective portions of the algorithm")
("output,o", po::value<string>(&outputName), "A file to summarize the computation results")
("theta,t", po::value<float>(&theta_p_threshold)->default_value(1e-3, "0.001"), "Pairing probability threshold to consider or not the possibility of pairing")
("function,f", po::value<int>(&obj_function_nbr)->default_value('B'), "What objective function to use to include motifs: square of motif size in nucleotides like "
"RNA-MoIP (A), light motif size + high number of components (B), site score (C), light motif size + site score + high number of components (D)")
("plop,p", po::value<int>(&mychar), "Just a test")
("mfe,E", "Minimize stacking energies as second criteria (should lead to MFE structure)")
("mea,A", "(default) Maximize expected accuracy as second criteria (should lead to MEA structure)")
("disable-pseudoknots,n", "Add constraints forbidding the formation of pseudoknots")
("limit,l", po::value<unsigned int>(&MOIP::max_sol_nbr_)->default_value(500), "Intermediate number of solutions in the Pareto set above which we give up the calculation.")
("verbose,v", "Print what is happening to stdout");
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
basename = remove_ext(inputName.c_str(), '.', '/');
try {
po::store(po::parse_command_line(argc, argv, desc), vm); // can throw
if (vm.count("help") or vm.count("-h")) {
cout << "Biorseo, bio-objective integer linear programming framework to predict RNA secondary "
"structures by including known RNA modules."
<< endl
<< "developped by Louis Becquey, 2018-2021" << endl
<< "Lénaïc Durand, 2019" << endl
<< "Nathalie Bernard, 2021" << endl
<< endl
<< desc << endl;
return EXIT_SUCCESS;
}
if (vm.count("version")) {
cout << "Biorseo v2.1, dockerized, November 2021" << endl;
return EXIT_SUCCESS;
}
if (vm.count("mfe")) mea_or_mfe = 'a';
if (vm.count("mea")) mea_or_mfe = 'b';
if (vm.count("verbose")) verbose = true;
if (vm.count("disable-pseudoknots")) MOIP::allow_pk_ = false;
if (!vm.count("jar3dcsv") and !vm.count("bayespaircsv") and !vm.count("descfolder") and !vm.count("rinfolder") and !vm.count("jsonfolder")) {
cerr << "\033[31mYou must provide at least one of --descfolder, --rinfolder, --jsonfolder, --jar3dcsv or --bayespaircsv.\033[0m See --help "
"for more information."
<< endl;
return EXIT_FAILURE;
}
if ((vm.count("descfolder") or vm.count("jsonfolder") or vm.count("rinfolder")) and (obj_function_nbr == 'C' or obj_function_nbr == 'D')) {
cerr << "\033[31mYou must provide --jar3dcsv or --bayespaircsv to use --function C or --function D.\033[0m See "
"--help for more information."
<< endl;
return EXIT_FAILURE;
} else {
cout << "char: " << char(obj_function_nbr) << " defaulted: " << vm["function"].defaulted() << " plop: " << vm.count("plop") << " " << char(mychar) << endl;
}
po::notify(vm); // throws on error, so do after help in case there are any problems
} catch (po::error& e) {
cerr << "ERROR: \033[31m" << e.what() << "\033[0m" << endl;
cerr << desc << endl;
return EXIT_FAILURE;
}
MOIP::obj_function_nbr_ = obj_function_nbr;
MOIP::obj_function2_nbr_ = mea_or_mfe;
/* FILE PARSING */
// load fasta file
if (verbose) cout << "Reading input files..." << endl;
if (access(inputName.c_str(), F_OK) == -1) {
cerr << "\033[31m" << inputName << " not found\033[0m" << endl;
return EXIT_FAILURE;
}
Fasta::load(f, inputName.c_str());
list<Fasta>::iterator fa = f.begin();
if (verbose) cout << "loading " << fa->name() << "..." << endl;
myRNA = RNA(fa->name(), fa->seq(), verbose);
if (verbose) cout << "\t> " << inputName << " successfuly loaded (" << myRNA.get_RNA_length() << " nt)" << endl;
// load CSV file
if (access(motifs_path_name.c_str(), F_OK) == -1) {
cerr << "\033[31m" << motifs_path_name << " not found\033[0m" << endl;
return EXIT_FAILURE;
}
/* FIND PARETO SET */
string source;
if (vm.count("jar3dcsv"))
source = "jar3dcsv";
else if (vm.count("bayespaircsv"))
source = "bayespaircsv";
else if (vm.count("rinfolder"))
source = "rinfolder";
else if (vm.count("descfolder"))
source = "descfolder";
else if (vm.count("jsonfolder"))
source = "jsonfolder";
else
cerr << "ERR: no source of modules provided !" << endl;
return 0;
MOIP myMOIP = MOIP(myRNA, source, motifs_path_name.c_str(), theta_p_threshold, verbose);
double min, max;
IloConstraintArray F(myMOIP.get_env());
if (verbose)
cout << "Solving..." << endl;
try {
bestSSO1 = myMOIP.solve_objective(1, -__DBL_MAX__, __DBL_MAX__);
if (verbose) cout << endl;
bestSSO2 = myMOIP.solve_objective(2, -__DBL_MAX__, __DBL_MAX__);
if (verbose) {
cout << endl << "Best solution according to objective 1 :" << bestSSO1.to_string() << endl;
cout << "Best solution according to objective 2 :" << bestSSO2.to_string() << endl;
}
// extend the Pareto set on top
if (MOIP::obj_to_solve_ == 1) {
myMOIP.add_solution(bestSSO1);
min = bestSSO1.get_objective_score(2) + MOIP::precision_;
max = bestSSO2.get_objective_score(2) + MOIP::precision_;
if (verbose) cout << endl << "Solving obj1 on top of best solution 1." << endl;
} else {
myMOIP.add_solution(bestSSO2);
min = bestSSO2.get_objective_score(1) + MOIP::precision_;
max = bestSSO1.get_objective_score(1) + MOIP::precision_;
if (verbose) cout << endl << "Solving obj2 on top of best solution 2." << endl;
}
if (verbose)
cout << setprecision(-log10(MOIP::precision_) + 4) << "\nSolving objective function " << MOIP::obj_to_solve_ << ", on top of "
<< min << ": Obj" << 3 - MOIP::obj_to_solve_ << " being in [" << min << ", " << max << "]..." << endl;
myMOIP.search_between(min, max);
// extend the Pareto set below
if (MOIP::obj_to_solve_ == 1) {
if (verbose) cout << endl << "Solving obj1 below best solution 1." << endl;
min = -__DBL_MAX__;
max = bestSSO1.get_objective_score(2);
} else {
if (verbose) cout << endl << "Solving obj2 below best solution 2." << endl;
min = -__DBL_MAX__;
max = bestSSO2.get_objective_score(1);
}
if (verbose)
cout << setprecision(-log10(MOIP::precision_) + 4) << "\nSolving objective function " << MOIP::obj_to_solve_
<< ", below (or eq. to) " << max << ": Obj" << 3 - MOIP::obj_to_solve_ << " being in [" << min << ", "
<< max << "]..." << endl
<< "\t> Forbidding " << F.getSize() << " solutions found in [" << setprecision(10)
<< min - MOIP::precision_ << ", " << max + MOIP::precision_ << ']' << endl;
myMOIP.search_between(min, max);
} catch (IloCplex::Exception& e) {
cerr << "\033[31mCplex Exception: " << e.getMessage() << "\033[0m" << endl;
exit(EXIT_FAILURE);
}
/* DISPLAY RESULTS */
// print the pareto set
if (verbose) {
cout << endl << endl << "---------------------------------------------------------------" << endl;
cout << "Whole Pareto Set:" << endl;
for (uint i = 0; i < myMOIP.get_n_solutions(); i++) myMOIP.solution(i).print();
cout << endl;
cout << myMOIP.get_n_candidates() << " candidate insertion sites, " << myMOIP.get_n_solutions() << " solutions kept." << endl;
cout << "Best value for Motif insertion objective: " << bestSSO1.get_objective_score(1) << endl;
cout << "Best value for structure expected accuracy: " << bestSSO2.get_objective_score(2) << endl;
}
// Save it to file
if (vm.count("output")) {
if (verbose) cout << "Saving structures to " << outputName << "..." << endl;
outfile.open(outputName);
outfile << fa->name() << endl << fa->seq() << endl;
for (uint i = 0; i < myMOIP.get_n_solutions(); i++) {
outfile << myMOIP.solution(i).to_string() << endl << structure_with_contacts(myMOIP.solution(i)) << endl;
string str1 = myMOIP.solution(i).to_string();
// Check if the best score for obj2 is actually included in the results
// string delimiter = "\t";
// string obj2 = str1.substr(str1.find(delimiter) + delimiter.size());
// obj2 = obj2.substr(0, obj2.find(delimiter));
// if (obj2.compare(best_score) == 0)
// flag = true;*/
}
// if (!flag)
// cout << "\033[1m\033[31mBest score not found for " << outputName << " !\033[0m" << endl;
// else
// cout << "OK for "<< outputName << "!" << endl;
outfile.close();
}
/* QUIT */
return EXIT_SUCCESS;
}
......
No preview for this file type
__'CRYSTAL_STRUCTURE_OF_A_TIGHT-BINDING_GLUTAMINE_TRNA_BOUND_TO_GLUTAMINE_AMINOACYL_TRNA_SYNTHETASE_'_(PDB_00376)
GGGGUAUCGCCAAGCGGUAAGGCACCGGAUUCUGAUUCCGGAGGUCGAGGUUCGAAUCCUCGUACCCCAGCCA
(((((((((([..)))....(((.(((((.({..).))))).])))((((.(.}.).)))))))))))..... + JSON432 + JSON615 32.0000000 19.2280288
................****.................................................****
(((((((((([..)))....(((.((((((....).))))).])))((((.(...).)))))))))))..... + JSON169 + JSON432 + JSON615 48.0000000 19.2123884
................****..........****...................................****
(((((((.(([.....))..(((.(((((.({..).))))).])))((((.(.}.).)))))))))))..... + JSON322 + JSON478 52.0000000 18.3718588
............****................**...................................****
((((((((((...)))....(([.((((((....).)))))...))((((.(...).))))))))))).]... + JSON169 + JSON615 + JSON763 57.0000000 18.3583187
................****..........****.......***...........................**
(((((((((([..)))....(((.((((((....).))))).])))(((.........))))))))))..... + JSON169 + JSON386 + JSON432 + JSON615 64.0000000 18.2180422
................****..........****...............****................****
(((((([(((...))).)..(((.(((((.(...).))))).){))((((.(...).))))]})))))..... + JSON322 + JSON471 80.0000000 17.8060915
........**.....................***................*...........*........**
(((((([((([..))).)..((..((((((....).))))).]{))((((.(...).))))]})))))..... + JSON169 + JSON432 + JSON471 96.0000000 17.7717258
........**............*.......****......................*.....*......****
(((((.((((...)))....(((.(((((.[.....))))).){))((((.(..]).)))))})))))..... + JSON154 + JSON432 + JSON471 105.0000000 17.5888835
........**......**.............****.....................*.....*......****
(((((.(((([..)))....((..((((((....).))))).][))((((.(...).)))))])))))..... + JSON169 + JSON432 + JSON471 + JSON615 112.0000000 17.5772109
........**......****..*.......****......................*.....*......****
(((((.((((...)))....(((.((((([[.....))))).){))(((.....]]..))))})))))..... + JSON154 + JSON386 + JSON432 + JSON471 121.0000000 16.5955778
........**......**.............****..............****...*.....*......****
(((((.(((([..)))....((..((((((....).))))).][))(((.........))))])))))..... + JSON169 + JSON386 + JSON432 + JSON471 + JSON615 128.0000000 16.5828646
........**......****..*.......****...............****...*.....*......****
(((((.(.(([.........{)).(((((.(...).))))).][(}[[[[.[..)].]]]])])))))..... + JSON322 + JSON471 + JSON478 + JSON615 132.0000000 15.2549277
........**..********...........***................*...........*......****
(((((((.............(...((((((....).)))))....)(((.........))))))))))..... + JSON169 + JSON386 + JSON432 + JSON432 + JSON473 + JSON478 + JSON615 141.0000000 14.8831569
........************.**.......****.......****....****...............*****
((((((..(((.........))).(((((.......)))))....(((((.(...).)))))))))))..... + JSON38 2916.0000000 14.7452026
*******.*******......*****.....******............................********
((((((..(((.........))).(((((.......)))))....((((.........))))))))))..... + JSON38 + JSON386 2932.0000000 13.7508564
*******.*******......*****.....******............****............********
((((((..(((.........))).(((((.......)))))....((....(...)....))))))))..... + JSON38 + JSON473 2941.0000000 11.8254010
*******.*******......*****.....******..........****...*..........********
(((((((.............(...((((((....).)))))....)(((.........))))))))))..... + JSON169 + JSON386 + JSON432 + JSON432 + JSON473 + JSON478 + JSON615 141.0000000 14.8831569
........************.**.......****.......****....****..*.............****
(((((.(.(([.........{)).(((((.(...).))))).][(}[[[[.[..)].]]]])])))))..... + JSON322 + JSON471 + JSON478 + JSON615 132.0000000 15.2549277
........**..********...........***......................*.....*......****
(((((.(((([..)))....((..((((((....).))))).][))((((.(...).)))))])))))..... + JSON169 + JSON432 + JSON471 + JSON615 112.0000000 17.5772109
........**......****..*.......****................*...........*......****
(((((.((((...)))....(((.(((((.[.....))))).){))((((.(..]).)))))})))))..... + JSON154 + JSON432 + JSON471 105.0000000 17.5888835
........**......**.............****...............*...........*......****
(((((([((([..))).)..((..((((((....).))))).]{))((((.(...).))))]})))))..... + JSON169 + JSON432 + JSON471 96.0000000 17.7717258
........**............*.......****................*...........*......****
(((((([(((...))).)..(((.(((((.(...).))))).){))((((.(...).))))]})))))..... + JSON322 + JSON471 80.0000000 17.8060915
........**.....................***......................*.....*........**
(((((((((([..)))....(((.(((((.({..).))))).])))((((.(.}.).)))))))))))..... + JSON322 + JSON615 32.0000000 19.2280288
................****............**.....................................**
__'CRYSTAL_STRUCTURE_OF_A_TIGHT-BINDING_GLUTAMINE_TRNA_BOUND_TO_GLUTAMINE_AMINOACYL_TRNA_SYNTHETASE_'_(PDB_00376)
GGGGUAUCGCCAAGCGGUAAGGCACCGGAUUCUGAUUCCGGAGGUCGAGGUUCGAAUCCUCGUACCCCAGCCA
(((((((((([..)))....(((.(((((.({..).))))).])))((((.(.}.).)))))))))))..... + JSON322 + JSON615 1.5000000 19.2280288
................****............**.....................................**
(((((((((([..)))....(((.(((((.(...).))))).])))((((.(...).)))))))))))..... + JSON322 + JSON615 1.5000200 19.2269926
................****............**.....................................**
(((((((.(([.....))..(((.(((((.({..).))))).])))((((.(.}.).)))))))))))..... + JSON322 + JSON478 1.7737056 18.3718588
............****................**...................................****
((((((((((...)))....(([.((((((....).)))))...))((((.(...).))))))))))).]... + JSON169 + JSON615 + JSON763 1.8613531 18.3583187
................****..........****.......***...........................**
.((((((((([..)))....(((.(((((.({..).))))).])))((((.(.}.).))))))))))...... + JSON322 + JSON432 + JSON615 2.0000000 18.2455939
................****............**.................................******
(((((((((({..)))....(((.((((([([..).))))).})))(((....].]..))))))))))..... + JSON322 + JSON386 + JSON615 2.0000200 18.2347086
................****............**...............****..................**
(((((([(((...))).)..(((.(((((.(...).))))).){))((((.(...).))))]})))))..... + JSON322 + JSON471 3.0000000 17.8060915
........**.....................***......................*.....*........**
(((((.((((...)))....(((.(((((.(...).))))).)[))((((.(...).)))))])))))..... + JSON322 + JSON471 + JSON615 3.5000000 17.6115766
........**......****...........***......................*.....*........**
.((((.((((...)))....(((.(((((.(...).))))).)[))((((.(...).)))))]))))...... + JSON322 + JSON432 + JSON471 + JSON615 4.0000000 16.6291417
........**......****...........***......................*.....*....******
.((((.((((...)))....(((.((((([(...).))))).){))(((......]..))))}))))...... + JSON322 + JSON386 + JSON432 + JSON471 + JSON615 4.5000000 15.6358360
........**......****...........***...............****...*.....*....******
(((((.(.(([.........{)).(((((<(...).))))).][(}[[[.....)>..]]])])))))..... + JSON322 + JSON386 + JSON471 + JSON478 + JSON615 4.7737056 14.2616220
........**..********...........***...............****...*.....*......****
.((((.((((...)))....(((.(((((.(...).)))))[){))(....(...)...]))}))))...... + JSON322 + JSON432 + JSON471 + JSON473 + JSON615 4.8613531 13.7313706
........**......****...........***.............****...*.*.....*....******
.((((.((.....[[)....(]].((((([(...).)))))..{[)(((.....]]..))))}))))...... + JSON322 + JSON386 + JSON432 + JSON432 + JSON471 + JSON615 5.0000000 13.7177100
........****.**.****...........***...............****...*.....*....******
(((((.(.((..........[)).(((((.(...).)))))(({(][....[..)]..))])})))))..... + JSON322 + JSON471 + JSON473 + JSON478 + JSON615 5.1350587 12.3759713
........**..********...........***.............****.....*.....*.....*****
.((((.....(..[[)....(]]..(((([(...).))))...{[)(((.....]]..))).}))))...... + JSON322 + JSON386 + JSON432 + JSON471 + JSON615 + JSON632 5.2737056 11.9911280
......****...**.****...**......***...............****...*.....*....******
.((((.((.....[[)....(]].(((((.(...).)))))[[{.)(....(...)..]]))}))))...... + JSON322 + JSON432 + JSON432 + JSON471 + JSON473 + JSON615 5.3613531 11.8157013
........****.**.****...........***.............****...*.*.....*....******
.((....(.....[[)....(]].((((([(...).))))).{{[)(((.....]]..))).}.}))...... + JSON322 + JSON386 + JSON432 + JSON432 + JSON471 + JSON615 + JSON928 5.5000000 11.2444565
...****.****.**.****...........***...............****...*.....*....******
.(........(..[[)....(]].((((([(...).))))).{{[)(((.....]]..))).}..})...... + JSON322 + JSON386 + JSON432 + JSON433 + JSON471 + JSON615 + JSON632 5.7737056 10.1268257
..********...**.****...........***...............****...*.....***..******
.((....(.....[[)....(]].(((((.(...).)))))[[{[)(....(..])..]]).}..))...... + JSON322 + JSON432 + JSON432 + JSON471 + JSON473 + JSON615 + JSON928 5.8613531 9.3610813
...****.****.**.****...........***.............****.....*.....**...******
.((....(.....[[)....(]].((.[.[(...)....)).{{[)(((.....]].]))).}.}))...... + JSON156 + JSON322 + JSON386 + JSON432 + JSON432 + JSON471 + JSON615 + JSON928 6.0000000 8.2749650
...****.****.**.****...........***.****..........****...*.....*....******
.(........(..[[)....(]].(((((.(...).)))))[[{.)(....(...)..]]).}...)...... + JSON322 + JSON432 + JSON433 + JSON471 + JSON473 + JSON615 + JSON632 6.1350587 8.2098083
..********...**.****...........***.............****...*.*.....***..******
.(........(..[[)....(]].((.[.[(...)....)).{{[)(((.....]].]))).}..})...... + JSON156 + JSON322 + JSON386 + JSON432 + JSON433 + JSON471 + JSON615 + JSON632 6.2737056 7.1573341
..********...**.****...........***.****..........****...*.....***..******
.((....(.....[[)....(]].((.[..(...)....)){{<[)(....(..]).]}}).>..))...... + JSON156 + JSON322 + JSON432 + JSON432 + JSON471 + JSON473 + JSON615 + JSON928 6.3613531 6.3915897
...****.****.**.****...........***.****........****.....*.....**...******
.(........(..[[)....(]].((.[..(...)....)){{<.)(....(...).]}}).>...)...... + JSON156 + JSON322 + JSON432 + JSON433 + JSON471 + JSON473 + JSON615 + JSON632 6.6350587 5.2403168
..********...**.****...........***.****........****...*.*.....***..******
.((((.((((...)))....((..(((((.({..).)))))[[<))(....(.}.)..]]))>))))...... + JSON322 + JSON432 + JSON471 + JSON473 + JSON615 4.8613531 13.7314599
........**......****..*.........**.............****...*.*.....*....******
.((((.((((...)))....(((.(((((.(...).))))).)[))((((.(...).)))))]))))...... + JSON322 + JSON432 + JSON471 + JSON615 4.0000000 16.6291417
........**......****...........***................*...........*....******
(((((.((((...)))....(((.(((((.(...).))))).)[))((((.(...).)))))])))))..... + JSON322 + JSON471 + JSON615 3.5000000 17.6115766
........**......****...........***................*...........*........**
(((((([(((...))).)..(((.(((((.(...).))))).){))((((.(...).))))]})))))..... + JSON322 + JSON471 3.0000000 17.8060915
........**.....................***................*...........*........**
__'CRYSTAL_STRUCTURE_OF_A_TIGHT-BINDING_GLUTAMINE_TRNA_BOUND_TO_GLUTAMINE_AMINOACYL_TRNA_SYNTHETASE_'_(PDB_00376)
GGGGUAUCGCCAAGCGGUAAGGCACCGGAUUCUGAUUCCGGAGGUCGAGGUUCGAAUCCUCGUACCCCAGCCA
(((((((((([..)))....(((.(((((.({..).))))).])))((((.(.}.).)))))))))))..... 0.0000000 19.2280288
.........................................................................
(((((((((([..)))....(((.(((((.({..).))))).])))((((.(.}.).)))))))))))..... + JSON322 0.0000000 19.2280288
................................**.....................................**