Louis BECQUEY

Big cleanup for JSON format support

This diff is collapsed. Click to expand it.
...@@ -40,12 +40,9 @@ re: remove clean all ...@@ -40,12 +40,9 @@ re: remove clean all
40 .PHONY: clean 40 .PHONY: clean
41 clean: 41 clean:
42 $(rm) $(OBJECTS) 42 $(rm) $(OBJECTS)
43 - $(rm) doc/supplementary_material.bbl doc/supplementary_material.blg doc/supplementary_material.synctex.gz doc/supplementary_material.log doc/supplementary_material.aux
44 - $(rm) doc/main_bioinformatics.bbl doc/main_bioinformatics.blg doc/main_bioinformatics.synctex.gz doc/main_bioinformatics.log doc/main_bioinformatics.aux doc/OUP_First_SBk_Bot_8401-eps-converted-to.pdf
45 @echo -e "\033[00;32mCleanup completed.\033[00m" 43 @echo -e "\033[00;32mCleanup completed.\033[00m"
46 44
47 .PHONY: remove 45 .PHONY: remove
48 remove: 46 remove:
49 @$(rm) $(BINDIR)/$(TARGET) 47 @$(rm) $(BINDIR)/$(TARGET)
50 - @$(rm) doc/main_bioinformatics.pdf doc/supplementary_material.pdf
51 @echo -e "\033[00;32mExecutable and docs removed!\033[00m" 48 @echo -e "\033[00;32mExecutable and docs removed!\033[00m"
......
This diff is collapsed. Click to expand it.
...@@ -12,12 +12,19 @@ ...@@ -12,12 +12,19 @@
12 12
13 using std::vector; 13 using std::vector;
14 14
15 -typedef struct args_ { 15 +typedef struct argsf_ {
16 path motif_file; 16 path motif_file;
17 std::mutex& posInsertionSites_mutex; 17 std::mutex& posInsertionSites_mutex;
18 - args_(path motif_file_, mutex& mutex_) : motif_file(motif_file_), posInsertionSites_mutex(mutex_) {} 18 + argsf_(path motif_file_, mutex& mutex_)
19 - } args_of_parallel_func; 19 + : motif_file(motif_file_), posInsertionSites_mutex(mutex_) {}
20 + } file_and_mutex;
20 21
22 +typedef struct argsm_ {
23 + json_elem motif;
24 + std::mutex& posInsertionSites_mutex;
25 + argsm_(json_elem& motif_, mutex& mutex_)
26 + : motif(motif_), posInsertionSites_mutex(mutex_) {}
27 + } motif_and_mutex;
21 28
22 class MOIP 29 class MOIP
23 { 30 {
...@@ -56,9 +63,9 @@ class MOIP ...@@ -56,9 +63,9 @@ class MOIP
56 63
57 bool exists_vertical_outdated_labels(const SecondaryStructure& s) const; 64 bool exists_vertical_outdated_labels(const SecondaryStructure& s) const;
58 bool exists_horizontal_outdated_labels(const SecondaryStructure& s) const; 65 bool exists_horizontal_outdated_labels(const SecondaryStructure& s) const;
59 - void allowed_motifs_from_desc(args_of_parallel_func arg_struct); 66 + void allowed_motifs_from_desc(file_and_mutex arg_struct);
60 - void allowed_motifs_from_rin(args_of_parallel_func arg_struct); 67 + void allowed_motifs_from_rin(file_and_mutex arg_struct);
61 - void allowed_motifs_from_json(args_of_parallel_func arg_struct, vector<pair<uint, char>> errors_id); 68 + void allowed_motifs_from_json(motif_and_mutex arg_struct);
62 69
63 bool verbose_; // Should we print things ? 70 bool verbose_; // Should we print things ?
64 71
...@@ -79,7 +86,6 @@ class MOIP ...@@ -79,7 +86,6 @@ class MOIP
79 vector<vector<size_t>> index_of_Cxip_; // Stores the indexes of the Cxip in insertion_dv_ 86 vector<vector<size_t>> index_of_Cxip_; // Stores the indexes of the Cxip in insertion_dv_
80 vector<size_t> index_of_first_components; // Stores the indexes of Cx1p in insertion_dv_ 87 vector<size_t> index_of_first_components; // Stores the indexes of Cx1p in insertion_dv_
81 vector<vector<size_t>> index_of_yuv_; // Stores the indexes of the y^u_v in basepair_dv_ 88 vector<vector<size_t>> index_of_yuv_; // Stores the indexes of the y^u_v in basepair_dv_
82 -
83 vector<vector<size_t>> index_of_xij_; //Stores the indexes of the xij variables (BioKop) in stacks_dv_ 89 vector<vector<size_t>> index_of_xij_; //Stores the indexes of the xij variables (BioKop) in stacks_dv_
84 }; 90 };
85 91
......
This diff is collapsed. Click to expand it.
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
7 #include <vector> 7 #include <vector>
8 #include <filesystem> 8 #include <filesystem>
9 #include "rna.h" 9 #include "rna.h"
10 +#include "json.hpp"
10 11
11 using boost::filesystem::path; 12 using boost::filesystem::path;
12 using std::pair; 13 using std::pair;
...@@ -14,6 +15,8 @@ using std::string; ...@@ -14,6 +15,8 @@ using std::string;
14 using std::vector; 15 using std::vector;
15 using std::mutex; 16 using std::mutex;
16 17
18 +typedef enum { RNA3DMOTIF = 1, CSV = 2, CARNAVAL = 3, JSON = 4 } source_type;
19 +typedef nlohmann::detail::iter_impl<nlohmann::basic_json<> > json_elem;
17 20
18 21
19 typedef struct Comp_ { 22 typedef struct Comp_ {
...@@ -42,15 +45,14 @@ class Motif ...@@ -42,15 +45,14 @@ class Motif
42 public: 45 public:
43 Motif(void); 46 Motif(void);
44 Motif(string csv_line); 47 Motif(string csv_line);
45 - Motif(const vector<Component>& v, string PDB); 48 + Motif(const vector<Component>& v, string name);
46 - Motif(const vector<Component>& v, string id, size_t contacts, double tx_occurrences); 49 + Motif(const vector<Component>& v, string name, string& struc);
47 Motif(const vector<Component>& v, path rinfile, uint id, bool reversed); 50 Motif(const vector<Component>& v, path rinfile, uint id, bool reversed);
51 + // Motif(string path, int id); //full path to biorseo/data/modules/RIN/Subfiles/
48 52
49 -
50 - Motif(string path, int id); //full path to biorseo/data/modules/RIN/Subfiles/
51 static char is_valid_RIN(const string& rinfile); 53 static char is_valid_RIN(const string& rinfile);
52 static char is_valid_DESC(const string& descfile); 54 static char is_valid_DESC(const string& descfile);
53 - static vector<pair<uint,char>> is_valid_JSON(const string& jsonfile); 55 + static char is_valid_JSON(const json_elem& i);
54 56
55 string pos_string(void) const; 57 string pos_string(void) const;
56 string sec_struct(void) const; 58 string sec_struct(void) const;
...@@ -64,39 +66,27 @@ class Motif ...@@ -64,39 +66,27 @@ class Motif
64 double tx_occurrences_; 66 double tx_occurrences_;
65 double score_; 67 double score_;
66 bool reversed_; 68 bool reversed_;
69 + static uint delay;
70 + // delay is the minimal shift between end of a component and begining of the next.
71 + // For regular loop motifs, it should be at least 5 (because hairpins cannot be of size smaller than 5).
72 + // For the general case, it could be zero, but solutions will look dirty...
73 + // Higher values reduce combinatorial explosion of potential insertion sites.
67 74
68 private: 75 private:
69 - string carnaval_id; // if source = CARNAVAL 76 + string id_;
70 - string atlas_id; // if source = RNAMOTIFATLAS 77 + source_type source_;
71 - string PDBID; // if source = RNA3DMOTIF
72 - string contacts_id; // if source = CONTACTS
73 - bool is_model_; // Whether the motif is a model or an extracted module from a 3D structure
74 - enum { RNA3DMOTIF = 1, RNAMOTIFATLAS = 2, CARNAVAL = 3, CONTACTS = 4 } source_;
75 }; 78 };
76 79
77 bool is_desc_insertible(const string& descfile, const string& rna); 80 bool is_desc_insertible(const string& descfile, const string& rna);
78 -bool is_rin_insertible(const string& rinfile, const string& rna); 81 +bool check_motif_ss(string);
79 -bool is_json_insertible(const string& jsonfile, const string& rna); 82 +bool check_motif_sequence(string);
80 83
81 vector<Motif> load_txt_folder(const string& path, const string& rna, bool verbose); 84 vector<Motif> load_txt_folder(const string& path, const string& rna, bool verbose);
82 vector<Motif> load_desc_folder(const string& path, const string& rna, bool verbose); 85 vector<Motif> load_desc_folder(const string& path, const string& rna, bool verbose);
83 vector<Motif> load_csv(const string& path); 86 vector<Motif> load_csv(const string& path);
84 vector<Motif> load_json_folder(const string& path, const string& rna, bool verbose); 87 vector<Motif> load_json_folder(const string& path, const string& rna, bool verbose);
85 88
86 -vector<vector<Component>> find_next_ones_in(string rna, uint offset, vector<string>& vc); 89 +vector<vector<Component>> find_next_ones_in(string rna, uint offset, vector<string> vc);
87 -vector<vector<Component>> json_find_next_ones_in(string rna, uint offset, vector<string>& vc);
88 -
89 -// utilities for Json motifs
90 -size_t count_nucleotide(string&);
91 -size_t count_delimiter(string&);
92 -size_t count_contacts(string&);
93 -string check_motif_sequence(string);
94 -bool checkSecondaryStructure(string);
95 -vector<Link> build_motif_pairs(string&, vector<Component>&);
96 -uint find_max_occurrences(string&);
97 -uint find_max_sequence(string&);
98 -vector<string> find_components(string&, string);
99 -vector<uint> find_contacts(vector<string>&, vector<Component>&);
100 90
101 // utilities to compare secondary structures: 91 // utilities to compare secondary structures:
102 bool operator==(const Motif& m1, const Motif& m2); 92 bool operator==(const Motif& m1, const Motif& m2);
......
...@@ -3,16 +3,12 @@ ...@@ -3,16 +3,12 @@
3 #include <algorithm> 3 #include <algorithm>
4 #include <boost/format.hpp> 4 #include <boost/format.hpp>
5 5
6 -#define RESET "\033[0m"
7 -#define RED "\033[31m" /* Red */
8 -
9 using std::abs; 6 using std::abs;
10 using std::cout; 7 using std::cout;
11 using std::endl; 8 using std::endl;
12 9
13 SecondaryStructure::SecondaryStructure() {} 10 SecondaryStructure::SecondaryStructure() {}
14 11
15 -
16 SecondaryStructure::SecondaryStructure(const RNA& rna) 12 SecondaryStructure::SecondaryStructure(const RNA& rna)
17 : objective_scores_(vector<double>(2)), n_(rna.get_RNA_length()), nBP_(0), rna_(rna) 13 : objective_scores_(vector<double>(2)), n_(rna.get_RNA_length()), nBP_(0), rna_(rna)
18 { 14 {
...@@ -21,8 +17,6 @@ SecondaryStructure::SecondaryStructure(const RNA& rna) ...@@ -21,8 +17,6 @@ SecondaryStructure::SecondaryStructure(const RNA& rna)
21 17
22 SecondaryStructure::SecondaryStructure(bool empty) : rna_(RNA()) { is_empty_structure = empty; } 18 SecondaryStructure::SecondaryStructure(bool empty) : rna_(RNA()) { is_empty_structure = empty; }
23 19
24 -
25 -
26 string SecondaryStructure::to_DBN(void) const 20 string SecondaryStructure::to_DBN(void) const
27 { 21 {
28 22
...@@ -100,26 +94,6 @@ string SecondaryStructure::to_DBN(void) const ...@@ -100,26 +94,6 @@ string SecondaryStructure::to_DBN(void) const
100 return res; 94 return res;
101 } 95 }
102 96
103 -string structure_with_contacts(const SecondaryStructure& ss) {
104 - string sequence = ss.rna_.get_seq();
105 - string construct = "";
106 - bool flag;
107 - for (uint i = 0; i < sequence.size(); i++) {
108 - flag = false;
109 - for (const Motif& m : ss.motif_info_) {
110 - for (uint j = 0; j < m.pos_contacts.size(); j++) {
111 - if (m.pos_contacts[j] == i) flag = true;
112 - }
113 - }
114 - if (flag) {
115 - construct += "*";
116 - } else {
117 - construct += ".";
118 - }
119 - }
120 - return construct;
121 -}
122 -
123 string SecondaryStructure::to_string(void) const 97 string SecondaryStructure::to_string(void) const
124 { 98 {
125 string s; 99 string s;
...@@ -141,35 +115,11 @@ void SecondaryStructure::set_basepair(uint i, uint j) ...@@ -141,35 +115,11 @@ void SecondaryStructure::set_basepair(uint i, uint j)
141 115
142 void SecondaryStructure::insert_motif(const Motif& m) { motif_info_.push_back(m); } 116 void SecondaryStructure::insert_motif(const Motif& m) { motif_info_.push_back(m); }
143 117
144 -void colored_contacts(string sequence, vector<Motif> motif_info_) {
145 - bool flag;
146 - for (uint i = 0; i < sequence.size(); i++) {
147 - flag = false;
148 - for (const Motif& m : motif_info_) {
149 - for (uint j = 0; j < m.pos_contacts.size(); j++) {
150 - if (m.pos_contacts[j] == i) flag = true;
151 - }
152 - }
153 - if (flag) {
154 - cout << RED << sequence[i] << RESET;
155 - } else {
156 - cout << sequence[i];
157 - }
158 - }
159 -}
160 -
161 void SecondaryStructure::print(void) const 118 void SecondaryStructure::print(void) const
162 { 119 {
163 - cout << endl; 120 + cout << endl << '\t' << rna_.get_seq() << endl;
164 - cout << '\t';
165 - colored_contacts(rna_.get_seq(), motif_info_);
166 - //rna_.get_seq()
167 - cout << endl;
168 string ss = to_string(); 121 string ss = to_string();
169 - cout << '\t'; 122 + cout << '\t' << ss << endl;
170 - colored_contacts(ss, motif_info_);
171 - //cout << ss;
172 - cout << endl;
173 for (const Motif& m : motif_info_) { 123 for (const Motif& m : motif_info_) {
174 uint i = 0; 124 uint i = 0;
175 cout << '\t'; 125 cout << '\t';
...@@ -324,7 +274,6 @@ bool operator<=(const SecondaryStructure& s1, const SecondaryStructure& s2) ...@@ -324,7 +274,6 @@ bool operator<=(const SecondaryStructure& s1, const SecondaryStructure& s2)
324 return false; 274 return false;
325 } 275 }
326 276
327 -
328 bool operator==(const SecondaryStructure& s1, const SecondaryStructure& s2) 277 bool operator==(const SecondaryStructure& s1, const SecondaryStructure& s2)
329 { 278 {
330 // Checks wether the secondary structures are exactly the same, including the inserted motifs. 279 // Checks wether the secondary structures are exactly the same, including the inserted motifs.
......
...@@ -57,7 +57,4 @@ inline void SecondaryStructure::set_objective_score(int i, double s) { objecti ...@@ -57,7 +57,4 @@ inline void SecondaryStructure::set_objective_score(int i, double s) { objecti
57 inline uint SecondaryStructure::get_n_motifs(void) const { return motif_info_.size(); } 57 inline uint SecondaryStructure::get_n_motifs(void) const { return motif_info_.size(); }
58 inline uint SecondaryStructure::get_n_bp(void) const { return nBP_; } 58 inline uint SecondaryStructure::get_n_bp(void) const { return nBP_; }
59 59
60 -string structure_with_contacts(const SecondaryStructure& ss);
61 -
62 -
63 #endif // SECONDARY_STRUCTURE_ 60 #endif // SECONDARY_STRUCTURE_
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -79,12 +79,10 @@ int main(int argc, char* argv[]) ...@@ -79,12 +79,10 @@ int main(int argc, char* argv[])
79 ("jsonfolder,j", po::value<string>(&motifs_path_name), "A folder containing a custom motif library in .json format") 79 ("jsonfolder,j", po::value<string>(&motifs_path_name), "A folder containing a custom motif library in .json format")
80 ("pre-placed,x", po::value<string>(&motifs_path_name), "A CSV file providing motif insertion sites obtained with another tool.") 80 ("pre-placed,x", po::value<string>(&motifs_path_name), "A CSV file providing motif insertion sites obtained with another tool.")
81 ("function,f", po::value<char>(&obj_function_nbr)->default_value('B'), 81 ("function,f", po::value<char>(&obj_function_nbr)->default_value('B'),
82 - "(A, B, C, D, E or F) Objective function to score module insertions:\n" 82 + "(A, B, C, or D) Objective function to score module insertions:\n"
83 " (A) insert big modules\n (B) light, high-order modules\n" 83 " (A) insert big modules\n (B) light, high-order modules\n"
84 " (C) well-scored modules\n (D) light, high-order, well-scored\n modules\n" 84 " (C) well-scored modules\n (D) light, high-order, well-scored\n modules\n"
85 - " (E, F) insert big modules with many\n contacts with proteins, different\n ponderations.\n" 85 + " C and D require position scores\n provided by --pre-placed.\n")
86 - " C and D require position scores\n provided by --pre-placed.\n"
87 - " E and F require protein-contact\n information and should be\n used only with --jsonfolder.")
88 ("mfe,E", "Minimize stacking energies\n (leads to MFE extimator)") 86 ("mfe,E", "Minimize stacking energies\n (leads to MFE extimator)")
89 ("mea,A", "(default) Maximize expected accuracy\n (leads to MEA estimator)") 87 ("mea,A", "(default) Maximize expected accuracy\n (leads to MEA estimator)")
90 ("first-objective,c", po::value<unsigned int>(&MOIP::obj_to_solve_)->default_value(2), 88 ("first-objective,c", po::value<unsigned int>(&MOIP::obj_to_solve_)->default_value(2),
...@@ -108,8 +106,8 @@ int main(int argc, char* argv[]) ...@@ -108,8 +106,8 @@ int main(int argc, char* argv[])
108 << "Bio-objective integer linear programming framework to predict RNA secondary structures by including known RNA modules." << endl 106 << "Bio-objective integer linear programming framework to predict RNA secondary structures by including known RNA modules." << endl
109 << "Developped by Louis Becquey, 2018-2021\nLénaïc Durand, 2019\nNathalie Bernard, 2021" << endl << endl 107 << "Developped by Louis Becquey, 2018-2021\nLénaïc Durand, 2019\nNathalie Bernard, 2021" << endl << endl
110 << "Usage:\tYou must provide:\n\t1) a FASTA input file with -s," << endl 108 << "Usage:\tYou must provide:\n\t1) a FASTA input file with -s," << endl
111 - << "\t2) a module type with --rna3dmotifs, --carnaval, --contacts or --pre-placed," << endl 109 + << "\t2) a module type with --rna3dmotifs, --carnaval, --json or --pre-placed," << endl
112 - << "\t3) one module-based scoring function with --func A, B, C, D, E or F," << endl 110 + << "\t3) one module-based scoring function with --func A, B, C, or D" << endl
113 << "\t4) one energy-based scoring function with --mfe or --mea," << endl 111 << "\t4) one energy-based scoring function with --mfe or --mea," << endl
114 << "\t5) how to display results: in console (-v), or in a result file (-o)." << endl 112 << "\t5) how to display results: in console (-v), or in a result file (-o)." << endl
115 << endl 113 << endl
...@@ -154,13 +152,14 @@ int main(int argc, char* argv[]) ...@@ -154,13 +152,14 @@ int main(int argc, char* argv[])
154 return EXIT_FAILURE; 152 return EXIT_FAILURE;
155 } 153 }
156 154
157 -
158 - /* FIND PARETO SET */
159 string source; 155 string source;
156 + Motif::delay = 1;
160 if (vm.count("rinfolder")) 157 if (vm.count("rinfolder"))
161 source = "rinfolder"; 158 source = "rinfolder";
162 - else if (vm.count("descfolder")) 159 + else if (vm.count("descfolder")) {
163 source = "descfolder"; 160 source = "descfolder";
161 + Motif::delay = 5;
162 + }
164 else if (vm.count("jsonfolder")) 163 else if (vm.count("jsonfolder"))
165 source = "jsonfolder"; 164 source = "jsonfolder";
166 else if (vm.count("pre-placed")) 165 else if (vm.count("pre-placed"))
...@@ -168,6 +167,8 @@ int main(int argc, char* argv[]) ...@@ -168,6 +167,8 @@ int main(int argc, char* argv[])
168 else 167 else
169 cerr << "ERR: no source of modules provided !" << endl; 168 cerr << "ERR: no source of modules provided !" << endl;
170 169
170 + /* FIND PARETO SET */
171 +
171 MOIP myMOIP = MOIP(myRNA, source, motifs_path_name.c_str(), theta_p_threshold, verbose); 172 MOIP myMOIP = MOIP(myRNA, source, motifs_path_name.c_str(), theta_p_threshold, verbose);
172 double min, max; 173 double min, max;
173 IloConstraintArray F(myMOIP.get_env()); 174 IloConstraintArray F(myMOIP.get_env());
...@@ -243,11 +244,8 @@ int main(int argc, char* argv[]) ...@@ -243,11 +244,8 @@ int main(int argc, char* argv[])
243 outfile.open(outputName); 244 outfile.open(outputName);
244 outfile << fa->name() << endl << fa->seq() << endl; 245 outfile << fa->name() << endl << fa->seq() << endl;
245 246
246 - for (uint i = 0; i < myMOIP.get_n_solutions(); i++) { 247 + for (uint i = 0; i < myMOIP.get_n_solutions(); i++)
247 - outfile << myMOIP.solution(i).to_string() << endl << structure_with_contacts(myMOIP.solution(i)) << endl; 248 + outfile << myMOIP.solution(i).to_string() << endl;
248 - string str1 = myMOIP.solution(i).to_string();
249 -
250 - }
251 outfile.close(); 249 outfile.close();
252 } 250 }
253 251
......
No preview for this file type
...@@ -50,6 +50,7 @@ RNA::RNA(string name, string seq, bool verbose) ...@@ -50,6 +50,7 @@ RNA::RNA(string name, string seq, bool verbose)
50 int max_bp_span = 100; 50 int max_bp_span = 100;
51 float cutoff = 1e-6; 51 float cutoff = 1e-6;
52 vrna_ep_t* results = vrna_pfl_fold(cseq, window_size, max_bp_span, cutoff); 52 vrna_ep_t* results = vrna_pfl_fold(cseq, window_size, max_bp_span, cutoff);
53 + vrna_ep_t* save = results; // keep the pointer to free it later
53 54
54 if (results != NULL) 55 if (results != NULL)
55 { 56 {
...@@ -96,6 +97,8 @@ RNA::RNA(string name, string seq, bool verbose) ...@@ -96,6 +97,8 @@ RNA::RNA(string name, string seq, bool verbose)
96 } 97 }
97 } 98 }
98 99
100 + // Free memory allocated by ViennaRNA
101 + free(save);
99 } 102 }
100 103
101 else cerr << "NULL result returned by vrna_pfl_fold" << endl; 104 else cerr << "NULL result returned by vrna_pfl_fold" << endl;
......