Louis BECQUEY

Biokop mode

...@@ -57,7 +57,7 @@ MOIP::MOIP() {} ...@@ -57,7 +57,7 @@ MOIP::MOIP() {}
57 57
58 MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool verbose) : verbose_{verbose}, rna_(rna) 58 MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool verbose) : verbose_{verbose}, rna_(rna)
59 { 59 {
60 - if (!exists(source_path)) 60 + if (obj_function2_nbr_ != 'c' and !exists(source_path))
61 { 61 {
62 cerr << "ERR: Hmh, i can't find this: " << source_path << endl; 62 cerr << "ERR: Hmh, i can't find this: " << source_path << endl;
63 exit(EXIT_FAILURE); 63 exit(EXIT_FAILURE);
...@@ -73,14 +73,14 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool ...@@ -73,14 +73,14 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
73 73
74 // Add the y^u_v decision variables 74 // Add the y^u_v decision variables
75 if (verbose_) cout << "\t> Legal basepairs : "; 75 if (verbose_) cout << "\t> Legal basepairs : ";
76 - uint u, v, c = 0; 76 + uint u, v, cy = 0;
77 index_of_yuv_ = vector<vector<size_t>>(rna_.get_RNA_length() - 6, vector<size_t>(0)); 77 index_of_yuv_ = vector<vector<size_t>>(rna_.get_RNA_length() - 6, vector<size_t>(0));
78 for (u = 0; u < rna_.get_RNA_length() - 6; u++) 78 for (u = 0; u < rna_.get_RNA_length() - 6; u++)
79 for (v = u + 4; v < rna_.get_RNA_length(); v++) // A basepair is possible if v > u+3 79 for (v = u + 4; v < rna_.get_RNA_length(); v++) // A basepair is possible if v > u+3
80 if (rna_.get_pij(u, v) > theta) { 80 if (rna_.get_pij(u, v) > theta) {
81 if (verbose_) cout << u << '-' << v << " "; 81 if (verbose_) cout << u << '-' << v << " ";
82 - index_of_yuv_[u].push_back(c); 82 + index_of_yuv_[u].push_back(cy);
83 - c++; 83 + cy++;
84 char name[15]; 84 char name[15];
85 sprintf(name, "y%d,%d", u, v); 85 sprintf(name, "y%d,%d", u, v);
86 basepair_dv_.add(IloNumVar(env_, 0, 1, IloNumVar::Bool, name)); // A boolean whether u and v are paired 86 basepair_dv_.add(IloNumVar(env_, 0, 1, IloNumVar::Bool, name)); // A boolean whether u and v are paired
...@@ -91,14 +91,14 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool ...@@ -91,14 +91,14 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
91 91
92 // Add the x_i,j decision variables 92 // Add the x_i,j decision variables
93 if (verbose_) cout << "\t> The possible stacks of two base pairs (i,j) and (i+1,j-1) : "; 93 if (verbose_) cout << "\t> The possible stacks of two base pairs (i,j) and (i+1,j-1) : ";
94 - c = 0; 94 + uint cx = 0;
95 index_of_xij_ = vector<vector<size_t>>(rna_.get_RNA_length() - 6, vector<size_t>(0)); 95 index_of_xij_ = vector<vector<size_t>>(rna_.get_RNA_length() - 6, vector<size_t>(0));
96 for (u = 0; u < rna_.get_RNA_length() - 6; u++) 96 for (u = 0; u < rna_.get_RNA_length() - 6; u++)
97 for (v = u + 4; v < rna_.get_RNA_length(); v++) // A basepair is possible if v > u+3 97 for (v = u + 4; v < rna_.get_RNA_length(); v++) // A basepair is possible if v > u+3
98 if (rna_.get_pij(u, v) > theta and rna_.get_pij(u + 1, v - 1) > theta) { // or u-1 v+1 ?? 98 if (rna_.get_pij(u, v) > theta and rna_.get_pij(u + 1, v - 1) > theta) { // or u-1 v+1 ??
99 if (verbose_) cout << u << '-' << v << " "; 99 if (verbose_) cout << u << '-' << v << " ";
100 - index_of_xij_[u].push_back(c); 100 + index_of_xij_[u].push_back(cx);
101 - c++; 101 + cx++;
102 char name[15]; 102 char name[15];
103 sprintf(name, "x%d,%d", u, v); 103 sprintf(name, "x%d,%d", u, v);
104 stacks_dv_.add(IloNumVar(env_, 0, 1, IloNumVar::Bool, name)); // A boolean whether (u,v) and (u+1,v-1) are a stack 104 stacks_dv_.add(IloNumVar(env_, 0, 1, IloNumVar::Bool, name)); // A boolean whether (u,v) and (u+1,v-1) are a stack
...@@ -109,7 +109,6 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool ...@@ -109,7 +109,6 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
109 109
110 // Look for insertions sites, then create the appropriate Cxip variables 110 // Look for insertions sites, then create the appropriate Cxip variables
111 insertion_sites_ = vector<Motif>(); 111 insertion_sites_ = vector<Motif>();
112 -
113 if (verbose_) cout << "\t> Looking for insertion sites..." << endl; 112 if (verbose_) cout << "\t> Looking for insertion sites..." << endl;
114 113
115 if (source == "csvfile") 114 if (source == "csvfile")
...@@ -295,7 +294,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool ...@@ -295,7 +294,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
295 294
296 if (verbose_) cout << "\t> " << insertion_sites_.size() << " candidate motifs on " << accepted + errors << " (" << errors << " ignored motifs), after applying probability threshold of " << theta << endl; 295 if (verbose_) cout << "\t> " << insertion_sites_.size() << " candidate motifs on " << accepted + errors << " (" << errors << " ignored motifs), after applying probability threshold of " << theta << endl;
297 } 296 }
298 - else 297 + else if (obj_function2_nbr_ != 'c')
299 { 298 {
300 cout << "Err: Unknown module source." << endl; 299 cout << "Err: Unknown module source." << endl;
301 } 300 }
...@@ -328,7 +327,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool ...@@ -328,7 +327,7 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
328 } 327 }
329 } 328 }
330 329
331 - if (verbose_) cout << "\t> " << c << " + " << i << " (yuv + Cpxi) decision variables are used." << endl; 330 + if (verbose_) cout << "\t> " << cy << " + " << cx << " + " << i << " (yuv + xuv + Cpxi) decision variables are used." << endl;
332 331
333 // Adding the problem's constraints 332 // Adding the problem's constraints
334 model_ = IloModel(env_); 333 model_ = IloModel(env_);
...@@ -340,36 +339,50 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool ...@@ -340,36 +339,50 @@ MOIP::MOIP(const RNA& rna, string source, string source_path, float theta, bool
340 // exit(1); 339 // exit(1);
341 // } 340 // }
342 341
343 - // Define the motif objective function:
344 - obj1 = IloExpr(env_);
345 - for (uint i = 0; i < insertion_sites_.size(); i++) {
346 - IloNum sum_k = 0;
347 -
348 - switch (obj_function_nbr_) {
349 - case 'A':
350 - // RNA MoIP style
351 - for (const Component& c : insertion_sites_[i].comp) sum_k += c.k;
352 - obj1 += IloNum(sum_k * sum_k) * insertion_dv_[index_of_first_components[i]];
353 - break;
354 -
355 - case 'B':
356 - // everything but the Jar3D/Bayespairing score
357 - for (const Component& c : insertion_sites_[i].comp) sum_k += c.k;
358 - obj1 += IloNum(insertion_sites_[i].comp.size() / log2(sum_k)) * insertion_dv_[index_of_first_components[i]];
359 - break;
360 -
361 - case 'C':
362 - // Weighted by the JAR3D or BayesPairing score only:
363 - obj1 += IloNum(insertion_sites_[i].score_) * insertion_dv_[index_of_first_components[i]];
364 - break;
365 342
366 - case 'D': 343 + // Define the objective functions
367 - // everything 344 + obj1 = IloExpr(env_);
368 - for (const Component& c : insertion_sites_[i].comp) sum_k += c.k; 345 + if (obj_function2_nbr_ != 'c') {
369 - obj1 += IloNum(insertion_sites_[i].comp.size() * insertion_sites_[i].score_ / log2(sum_k)) * 346 + // Define the motif objective function:
370 - insertion_dv_[index_of_first_components[i]]; 347 + for (uint i = 0; i < insertion_sites_.size(); i++) {
371 - break; 348 + IloNum sum_k = 0;
349 +
350 + switch (obj_function_nbr_) {
351 + case 'A':
352 + // RNA MoIP style
353 + for (const Component& c : insertion_sites_[i].comp) sum_k += c.k;
354 + obj1 += IloNum(sum_k * sum_k) * insertion_dv_[index_of_first_components[i]];
355 + break;
356 +
357 + case 'B':
358 + // everything but the Jar3D/Bayespairing score
359 + for (const Component& c : insertion_sites_[i].comp) sum_k += c.k;
360 + obj1 += IloNum(insertion_sites_[i].comp.size() / log2(sum_k)) * insertion_dv_[index_of_first_components[i]];
361 + break;
362 +
363 + case 'C':
364 + // Weighted by the JAR3D or BayesPairing score only:
365 + obj1 += IloNum(insertion_sites_[i].score_) * insertion_dv_[index_of_first_components[i]];
366 + break;
367 +
368 + case 'D':
369 + // everything
370 + for (const Component& c : insertion_sites_[i].comp) sum_k += c.k;
371 + obj1 += IloNum(insertion_sites_[i].comp.size() * insertion_sites_[i].score_ / log2(sum_k)) *
372 + insertion_dv_[index_of_first_components[i]];
373 + break;
374 + }
375 + }
376 + } else {
377 + // user passed both --mea and --mfe, this is Biokop mode, obj1 will be MEA
378 + if (verbose_) cout << "Running in Biokop mode: MEA versus MFE, ignoring motifs." << endl;
379 + for (size_t u = 0; u < rna_.get_RNA_length() - 6; u++) {
380 + for (size_t v = u + 4; v < rna_.get_RNA_length(); v++) {
381 + if (allowed_basepair(u, v)) obj1 += (IloNum(rna_.get_pij(u, v)) * y(u, v));
382 + }
372 } 383 }
384 + // set obj_function2_nbr_ to a so that obj2 is set to MFE just here after.
385 + obj_function2_nbr_ = 'a';
373 } 386 }
374 387
375 //Stacking energy parameter matrix 388 //Stacking energy parameter matrix
...@@ -446,7 +459,7 @@ void MOIP::define_problem_constraints(string& source) ...@@ -446,7 +459,7 @@ void MOIP::define_problem_constraints(string& source)
446 } 459 }
447 460
448 // Ensure that the stacking of (i,j) and (i+1,j-1) exists if and only if the pairing (i,j) and (i+1, j-1) exist 461 // Ensure that the stacking of (i,j) and (i+1,j-1) exists if and only if the pairing (i,j) and (i+1, j-1) exist
449 - if (verbose_) cout << "\t> ensuring that the stacks are possible..." << endl; 462 + if (verbose_) cout << "\t> ensuring that the stacks are correct..." << endl;
450 for (u = 0; u < n - 5; u++) { 463 for (u = 0; u < n - 5; u++) {
451 for (v = u + 4; v < n; v++) { 464 for (v = u + 4; v < n; v++) {
452 if (allowed_basepair(u, v) and allowed_basepair(u + 1, v - 1)) { 465 if (allowed_basepair(u, v) and allowed_basepair(u + 1, v - 1)) {
...@@ -459,7 +472,7 @@ void MOIP::define_problem_constraints(string& source) ...@@ -459,7 +472,7 @@ void MOIP::define_problem_constraints(string& source)
459 model_.add(IloNum(2) * x(u, v) <= c7_1); 472 model_.add(IloNum(2) * x(u, v) <= c7_1);
460 if (verbose_) cout << "\t\t" << (2 * x(u,v) <= c7_1) << endl; 473 if (verbose_) cout << "\t\t" << (2 * x(u,v) <= c7_1) << endl;
461 model_.add(x(u, v) >= c7_2); 474 model_.add(x(u, v) >= c7_2);
462 - if (verbose_) cout << "\t\t" << (x(u, v) >= c7_2) << endl << endl; 475 + if (verbose_) cout << "\t\t" << (x(u, v) >= c7_2) << endl;
463 } 476 }
464 } 477 }
465 } 478 }
......
...@@ -61,7 +61,7 @@ int main(int argc, char* argv[]) ...@@ -61,7 +61,7 @@ int main(int argc, char* argv[])
61 bool verbose = false; 61 bool verbose = false;
62 float theta_p_threshold; 62 float theta_p_threshold;
63 char obj_function_nbr; 63 char obj_function_nbr;
64 - char mea_or_mfe = 'b'; // a for MFE, b for MEA 64 + char mea_or_mfe = 'b'; // a for MFE, b for MEA, c for both and no motifs
65 list<Fasta> f; 65 list<Fasta> f;
66 ofstream outfile; 66 ofstream outfile;
67 SecondaryStructure bestSSO1, bestSSO2; 67 SecondaryStructure bestSSO1, bestSSO2;
...@@ -121,6 +121,7 @@ int main(int argc, char* argv[]) ...@@ -121,6 +121,7 @@ int main(int argc, char* argv[])
121 po::notify(vm); // throws on error, so do after help in case there are any problems 121 po::notify(vm); // throws on error, so do after help in case there are any problems
122 if (vm.count("mfe")) mea_or_mfe = 'a'; 122 if (vm.count("mfe")) mea_or_mfe = 'a';
123 if (vm.count("mea")) mea_or_mfe = 'b'; 123 if (vm.count("mea")) mea_or_mfe = 'b';
124 + if (vm.count("mea") and vm.count("mfe")) mea_or_mfe = 'c';
124 if (vm.count("verbose")) verbose = true; 125 if (vm.count("verbose")) verbose = true;
125 if (vm.count("disable-pseudoknots")) MOIP::allow_pk_ = false; 126 if (vm.count("disable-pseudoknots")) MOIP::allow_pk_ = false;
126 } catch (po::error& e) { 127 } catch (po::error& e) {
...@@ -146,12 +147,6 @@ int main(int argc, char* argv[]) ...@@ -146,12 +147,6 @@ int main(int argc, char* argv[])
146 myRNA = RNA(fa->name(), fa->seq(), verbose); 147 myRNA = RNA(fa->name(), fa->seq(), verbose);
147 if (verbose) cout << "\t> " << inputName << " successfuly loaded (" << myRNA.get_RNA_length() << " nt)" << endl; 148 if (verbose) cout << "\t> " << inputName << " successfuly loaded (" << myRNA.get_RNA_length() << " nt)" << endl;
148 149
149 - // check motif folder exists
150 - if (access(motifs_path_name.c_str(), F_OK) == -1) {
151 - cerr << "\033[31m" << motifs_path_name << " not found\033[0m" << endl;
152 - return EXIT_FAILURE;
153 - }
154 -
155 string source; 150 string source;
156 Motif::delay = 1; 151 Motif::delay = 1;
157 if (vm.count("rinfolder")) 152 if (vm.count("rinfolder"))
...@@ -164,8 +159,16 @@ int main(int argc, char* argv[]) ...@@ -164,8 +159,16 @@ int main(int argc, char* argv[])
164 source = "jsonfolder"; 159 source = "jsonfolder";
165 else if (vm.count("pre-placed")) 160 else if (vm.count("pre-placed"))
166 source = "csvfile"; 161 source = "csvfile";
167 - else 162 + else if (mea_or_mfe != 'c') {
168 cerr << "ERR: no source of modules provided !" << endl; 163 cerr << "ERR: no source of modules provided !" << endl;
164 + return EXIT_FAILURE;
165 + }
166 +
167 + // check motif folder exists
168 + if (mea_or_mfe != 'c' and access(motifs_path_name.c_str(), F_OK) == -1) {
169 + cerr << "\033[31m" << motifs_path_name << " not found\033[0m" << endl;
170 + return EXIT_FAILURE;
171 + }
169 172
170 /* FIND PARETO SET */ 173 /* FIND PARETO SET */
171 174
......
...@@ -100,7 +100,6 @@ RNA::RNA(string name, string seq, bool verbose) ...@@ -100,7 +100,6 @@ RNA::RNA(string name, string seq, bool verbose)
100 // Free memory allocated by ViennaRNA 100 // Free memory allocated by ViennaRNA
101 free(save); 101 free(save);
102 } 102 }
103 -
104 else cerr << "NULL result returned by vrna_pfl_fold" << endl; 103 else cerr << "NULL result returned by vrna_pfl_fold" << endl;
105 } 104 }
106 105
......