Nathalie BERNARD

Scripts corrigés pour le matching unique et la suppression des pdbs

...@@ -24,7 +24,7 @@ def run_test(cmd, log): ...@@ -24,7 +24,7 @@ def run_test(cmd, log):
24 log.flush() 24 log.flush()
25 rc = process.poll() 25 rc = process.poll()
26 26
27 -def create_command_E(name): 27 +def create_command_E(name, estimator):
28 #cmd = ("python3 /mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/biorseo.py -i " + 28 #cmd = ("python3 /mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/biorseo.py -i " +
29 cmd = ("python3 /local/local/BiorseoNath/biorseo.py -i " + 29 cmd = ("python3 /local/local/BiorseoNath/biorseo.py -i " +
30 "/local/local/BiorseoNath/data/fasta/" + 30 "/local/local/BiorseoNath/data/fasta/" +
...@@ -32,12 +32,12 @@ def create_command_E(name): ...@@ -32,12 +32,12 @@ def create_command_E(name):
32 "-O results/ " + 32 "-O results/ " +
33 "--contacts " + 33 "--contacts " +
34 "--patternmatch " + 34 "--patternmatch " +
35 - "--func E --MFE -v " + 35 + "--func E --" + estimator + " -v " +
36 "--biorseo-dir /local/local/BiorseoNath " + 36 "--biorseo-dir /local/local/BiorseoNath " +
37 "--modules-path /local/local/BiorseoNath/data/modules/ISAURE/Motifs_derniere_version ") 37 "--modules-path /local/local/BiorseoNath/data/modules/ISAURE/Motifs_derniere_version ")
38 return cmd 38 return cmd
39 39
40 -def create_command_F(name): 40 +def create_command_F(name, estimator):
41 #cmd = ("python3 /mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/biorseo.py -i " + 41 #cmd = ("python3 /mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/biorseo.py -i " +
42 cmd = ("python3 /local/local/BiorseoNath/biorseo.py -i " + 42 cmd = ("python3 /local/local/BiorseoNath/biorseo.py -i " +
43 "/local/local/BiorseoNath/data/fasta/" + 43 "/local/local/BiorseoNath/data/fasta/" +
...@@ -45,7 +45,7 @@ def create_command_F(name): ...@@ -45,7 +45,7 @@ def create_command_F(name):
45 "-O results/ " + 45 "-O results/ " +
46 "--contacts " + 46 "--contacts " +
47 "--patternmatch " + 47 "--patternmatch " +
48 - "--func F --MFE -v " + 48 + "--func F --" + estimator + " -v " +
49 "--biorseo-dir /local/local/BiorseoNath " + 49 "--biorseo-dir /local/local/BiorseoNath " +
50 "--modules-path /local/local/BiorseoNath/data/modules/ISAURE/Motifs_derniere_version ") 50 "--modules-path /local/local/BiorseoNath/data/modules/ISAURE/Motifs_derniere_version ")
51 return cmd 51 return cmd
...@@ -238,7 +238,7 @@ def set_axis_style(ax, labels): ...@@ -238,7 +238,7 @@ def set_axis_style(ax, labels):
238 ax.set_xlim(0.25, len(labels) + 0.75) 238 ax.set_xlim(0.25, len(labels) + 0.75)
239 ax.set_xlabel('Sample name') 239 ax.set_xlabel('Sample name')
240 240
241 -def visualization_best_mcc(list_struct2d, list_contacts, function, color, lines_color): 241 +def visualization_best_mcc(list_struct2d, list_contacts, estimator, function, color, lines_color):
242 242
243 np_struct2d = np.array(list_struct2d) 243 np_struct2d = np.array(list_struct2d)
244 np_contacts = np.array(list_contacts) 244 np_contacts = np.array(list_contacts)
...@@ -268,7 +268,7 @@ def visualization_best_mcc(list_struct2d, list_contacts, function, color, lines_ ...@@ -268,7 +268,7 @@ def visualization_best_mcc(list_struct2d, list_contacts, function, color, lines_
268 268
269 for v in violins['bodies']: 269 for v in violins['bodies']:
270 v.set_facecolor(color) 270 v.set_facecolor(color)
271 - plt.savefig('visualisation_16_06_MFE_' + function + '.png', bbox_inches='tight') 271 + plt.savefig('visualisation_16_06_' + estimator + '_' + function + '.png', bbox_inches='tight')
272 272
273 def get_list_structs_contacts(path_benchmark, estimator, function): 273 def get_list_structs_contacts(path_benchmark, estimator, function):
274 myfile = open(path_benchmark, "r") 274 myfile = open(path_benchmark, "r")
...@@ -333,7 +333,7 @@ def visualization_all_mcc(path_benchmark, estimator, function): ...@@ -333,7 +333,7 @@ def visualization_all_mcc(path_benchmark, estimator, function):
333 333
334 plt.figure(figsize=(25,4),dpi=200) 334 plt.figure(figsize=(25,4),dpi=200)
335 plt.xticks(rotation=90) 335 plt.xticks(rotation=90)
336 - plt.boxplot(data) 336 + plt.boxplot(data, medianprops=dict(color='black'))
337 for i in range(absciss): 337 for i in range(absciss):
338 y =data[i] 338 y =data[i]
339 x = np.random.normal(1 + i, 0.04, size=len(y)) 339 x = np.random.normal(1 + i, 0.04, size=len(y))
...@@ -356,7 +356,7 @@ def visualization_all_mcc(path_benchmark, estimator, function): ...@@ -356,7 +356,7 @@ def visualization_all_mcc(path_benchmark, estimator, function):
356 356
357 plt.figure(figsize=(25, 4), dpi=200) 357 plt.figure(figsize=(25, 4), dpi=200)
358 plt.xticks(rotation=90) 358 plt.xticks(rotation=90)
359 - plt.boxplot(data) 359 + plt.boxplot(data, medianprops=dict(color='black'))
360 for i in range(absciss): 360 for i in range(absciss):
361 y = data[i] 361 y = data[i]
362 x = np.random.normal(1 + i, 0.04, size=len(y)) 362 x = np.random.normal(1 + i, 0.04, size=len(y))
...@@ -372,53 +372,90 @@ def visualization_all_mcc(path_benchmark, estimator, function): ...@@ -372,53 +372,90 @@ def visualization_all_mcc(path_benchmark, estimator, function):
372 #cmd1 = ("cppsrc/Scripts/countPattern") 372 #cmd1 = ("cppsrc/Scripts/countPattern")
373 #cmd2 = ("cppsrc/Scripts/deletePdb") 373 #cmd2 = ("cppsrc/Scripts/deletePdb")
374 374
375 -"""myfile = open("data/modules/ISAURE/Motifs_version_initiale/benchmark.txt", "r") 375 +myfile = open("data/modules/ISAURE/Motifs_version_initiale/benchmark.txt", "r")
376 name = myfile.readline() 376 name = myfile.readline()
377 contacts = myfile.readline() 377 contacts = myfile.readline()
378 seq = myfile.readline() 378 seq = myfile.readline()
379 structure2d = myfile.readline() 379 structure2d = myfile.readline()
380 380
381 -list_struct2d_E = [] 381 +list_struct2d_E_MFE = []
382 -list_contacts_E = [] 382 +list_contacts_E_MFE = []
383 -list_struct2d_F = [] 383 +list_struct2d_F_MFE = []
384 -list_contacts_F = [] 384 +list_contacts_F_MFE = []
385 -countE = 0 385 +
386 -countF = 0 386 +list_struct2d_E_MEA = []
387 +list_contacts_E_MEA = []
388 +list_struct2d_F_MEA = []
389 +list_contacts_F_MEA = []
390 +
391 +countE_MFE = 0
392 +countF_MFE = 0
393 +
394 +countE_MEA = 0
395 +countF_MEA = 0
387 while seq: 396 while seq:
388 name = name[6:].strip() 397 name = name[6:].strip()
389 print(name) 398 print(name)
390 - run_test(cmd2 + " " + name + ".fa", log) 399 +
391 - print(cmd2 + " " + name + ".fa") 400 + cmd2 = ("cppsrc/Scripts/deletePdb " + name)
392 401
393 - cmd3 = create_command_E(name) 402 + cmd3 = create_command_E(name, 'MFE')
394 os.system(cmd3) 403 os.system(cmd3)
395 404
396 file_path = "results/test_" + name + ".json_pmE_MFE" 405 file_path = "results/test_" + name + ".json_pmE_MFE"
397 if os.path.isfile(file_path): 406 if os.path.isfile(file_path):
398 - tabE = write_mcc_in_file_E(name, contacts, structure2d) 407 + tabE_MFE = write_mcc_in_file_E(name, contacts, structure2d)
399 - list_contacts_E.append(tabE[0]) 408 + list_contacts_E_MFE.append(tabE_MFE[0])
400 - list_struct2d_E.append(tabE[1]) 409 + list_struct2d_E_MFE.append(tabE_MFE[1])
401 - countE = countE + 1 410 + countE_MFE = countE_MFE + 1
402 411
403 - cmd3 = create_command_F(name) 412 + cmd3 = create_command_F(name, 'MFE')
404 os.system(cmd3) 413 os.system(cmd3)
405 414
406 file_path = "results/test_" + name + ".json_pmF_MFE" 415 file_path = "results/test_" + name + ".json_pmF_MFE"
407 if os.path.isfile(file_path): 416 if os.path.isfile(file_path):
408 - tabF = write_mcc_in_file_F(name, contacts, structure2d) 417 + tabF_MFE = write_mcc_in_file_F(name, contacts, structure2d)
409 - list_contacts_F.append(tabF[0]) 418 + list_contacts_F_MFE.append(tabF_MFE[0])
410 - list_struct2d_F.append(tabF[1]) 419 + list_struct2d_F_MFE.append(tabF_MFE[1])
411 - countF = countF + 1 420 + countF_MFE = countF_MFE + 1
421 +
422 + cmd3 = create_command_E(name, 'MEA')
423 + os.system(cmd3)
424 +
425 + file_path = "results/test_" + name + ".json_pmE_MEA"
426 + if os.path.isfile(file_path):
427 + tabE_MEA = write_mcc_in_file_E(name, contacts, structure2d)
428 + list_contacts_E_MEA.append(tabE_MEA[0])
429 + list_struct2d_E_MEA.append(tabE_MEA[1])
430 + countE_MEA = countE_MEA + 1
431 +
432 + cmd3 = create_command_F(name, 'MEA')
433 + os.system(cmd3)
434 +
435 + file_path = "results/test_" + name + ".json_pmF_MEA"
436 + if os.path.isfile(file_path):
437 + tabF_MEA = write_mcc_in_file_F(name, contacts, structure2d)
438 + list_contacts_F_MEA.append(tabF_MEA[0])
439 + list_struct2d_F_MEA.append(tabF_MEA[1])
440 + countF_MEA = countF_MEA + 1
412 441
413 name = myfile.readline() 442 name = myfile.readline()
414 contacts = myfile.readline() 443 contacts = myfile.readline()
415 seq = myfile.readline() 444 seq = myfile.readline()
416 structure2d = myfile.readline() 445 structure2d = myfile.readline()
417 446
418 -visualization_best_mcc(list_struct2d_E, list_contacts_E, 'E', 'red', '#900C3F')
419 -visualization_best_mcc(list_struct2d_F, list_contacts_F, 'F', 'blue', '#0900FF')
420 -print("countE: " + str(countE) + "\n")
421 -print("countF: " + str(countF) + "\n")
422 -myfile.close()"""
423 -path_benchmark = "data/modules/ISAURE/Motifs_version_initiale/benchmark.txt"
424 -visualization_all_mcc(path_benchmark,'MEA', 'F')
...\ No newline at end of file ...\ No newline at end of file
447 +visualization_best_mcc(list_struct2d_E_MFE, list_contacts_E_MFE, 'MFE', 'E', 'red', '#900C3F')
448 +visualization_best_mcc(list_struct2d_F_MFE, list_contacts_F_MFE, 'MFE', 'F', 'blue', '#0900FF')
449 +visualization_best_mcc(list_struct2d_E_MEA, list_contacts_E_MEA, 'MEA', 'E', 'red', '#900C3F')
450 +visualization_best_mcc(list_struct2d_F_MEA, list_contacts_F_MEA, 'MEA', 'F', 'blue', '#0900FF')
451 +
452 +print("countE_MFE: " + str(countE_MFE) + "\n")
453 +print("countF_MFE: " + str(countF_MFE) + "\n")
454 +print("countE_MEA: " + str(countE_MEA) + "\n")
455 +print("countF_MEA: " + str(countF_MEA) + "\n")
456 +myfile.close()
457 +#path_benchmark = "data/modules/ISAURE/Motifs_version_initiale/benchmark.txt"
458 +#visualization_all_mcc(path_benchmark,'MEA', 'F')
459 +#visualization_all_mcc(path_benchmark,'MEA', 'E')
460 +#visualization_all_mcc(path_benchmark,'MFE', 'E')
461 +#visualization_all_mcc(path_benchmark,'MFE', 'F')
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -12,45 +12,31 @@ ...@@ -12,45 +12,31 @@
12 using namespace std; 12 using namespace std;
13 using json = nlohmann::json; 13 using json = nlohmann::json;
14 14
15 -void delete_redundant_pdb(const string& jsonlibrary, const string& fasta, const string& jsonoutfile) { 15 +void delete_redundant_pdb(const string& jsonlibrary, const string& name, const string& jsonoutfile) {
16 std::ifstream lib(jsonlibrary); 16 std::ifstream lib(jsonlibrary);
17 17
18 std::ofstream outfile (jsonoutfile); 18 std::ofstream outfile (jsonoutfile);
19 json new_motif; 19 json new_motif;
20 json new_id; 20 json new_id;
21 json js = json::parse(lib); 21 json js = json::parse(lib);
22 -
23 - std::ifstream file(fasta);
24 - string pdb, seq;
25 - std::getline(file, pdb);
26 - std::getline(file, seq);
27 22
28 for (auto it = js.begin(); it != js.end(); ++it) { 23 for (auto it = js.begin(); it != js.end(); ++it) {
29 string id = it.key(); 24 string id = it.key();
30 vector<string> list_pdbs; 25 vector<string> list_pdbs;
31 bool is_added = true; 26 bool is_added = true;
32 27
33 - //cout << "id: " << id << endl;
34 for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) { 28 for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) {
35 - string test = it2.key(); 29 + string field = it2.key();
36 30
37 - if (!test.compare("pdb")) { 31 + if (!field.compare("pdb")) {
38 vector<string> tab = it2.value(); 32 vector<string> tab = it2.value();
39 list_pdbs = tab; 33 list_pdbs = tab;
40 - /*set<set<string>>::iterator iit;
41 - set<string>::iterator iit2;
42 - for(iit = list_pfams.begin(); iit != list_pfams.end(); iit++) {
43 - for (iit2 = iit->begin(); iit2 != iit->end(); ++iit2) {
44 - cout << *iit2 << endl;
45 - }
46 - cout << endl << endl;
47 - }*/
48 } else { 34 } else {
49 - new_id[test] = it2.value(); 35 + new_id[field] = it2.value();
50 } 36 }
51 } 37 }
52 - 38 +
53 - if (count(list_pdbs.begin(), list_pdbs.end(), pdb.substr(6,pdb.size()))) { 39 + if (count(list_pdbs.begin(), list_pdbs.end(), name.substr(0, name.size()-2))) {
54 is_added = false; 40 is_added = false;
55 } 41 }
56 if (is_added) { 42 if (is_added) {
...@@ -66,10 +52,9 @@ void delete_redundant_pdb(const string& jsonlibrary, const string& fasta, const ...@@ -66,10 +52,9 @@ void delete_redundant_pdb(const string& jsonlibrary, const string& fasta, const
66 int main(int argc, char** argv) 52 int main(int argc, char** argv)
67 { 53 {
68 string jsonlibrary = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_final.json"; 54 string jsonlibrary = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_final.json";
69 - string fasta = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/fasta/";
70 string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; 55 string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json";
71 - fasta = fasta + argv[1]; 56 + string name = argv[1];
72 - delete_redundant_pdb(jsonlibrary, fasta, out); 57 + delete_redundant_pdb(jsonlibrary, name, out);
73 return 0; 58 return 0;
74 } 59 }
75 60
......
...@@ -15,10 +15,19 @@ using json = nlohmann::json; ...@@ -15,10 +15,19 @@ using json = nlohmann::json;
15 That script will remove from the library all the pattern that match ONLY with the sequence from which it comes from. 15 That script will remove from the library all the pattern that match ONLY with the sequence from which it comes from.
16 */ 16 */
17 17
18 -vector<string> get_list_pdb_benchmark(const string& benchmark) { 18 +struct data {
19 + string pdb;
20 + string seq_pdb;
21 + string id;
22 + string cmp;
23 +};
24 +typedef struct data data;
25 +
26 +
27 +vector<data> get_list_pdb_benchmark(const string& benchmark) {
19 28
20 fstream bm(benchmark); 29 fstream bm(benchmark);
21 - vector<string> list_pdb; 30 + vector<data> list_pdb_seq;
22 if (bm.is_open()) { 31 if (bm.is_open()) {
23 string name; 32 string name;
24 string sequence; 33 string sequence;
...@@ -26,17 +35,20 @@ vector<string> get_list_pdb_benchmark(const string& benchmark) { ...@@ -26,17 +35,20 @@ vector<string> get_list_pdb_benchmark(const string& benchmark) {
26 string contacts; 35 string contacts;
27 36
28 while (getline(bm, name)) { 37 while (getline(bm, name)) {
38 + data d;
29 int size = name.size(); 39 int size = name.size();
30 name = name.substr(5,size-6); 40 name = name.substr(5,size-6);
31 - list_pdb.push_back(name);
32 -
33 getline(bm, sequence); 41 getline(bm, sequence);
42 + d.pdb = name;
43 + d.seq_pdb = sequence;
44 + list_pdb_seq.push_back(d);
45 +
34 getline(bm, structure); 46 getline(bm, structure);
35 getline(bm, contacts); 47 getline(bm, contacts);
36 } 48 }
37 bm.close(); 49 bm.close();
38 } 50 }
39 - return list_pdb; 51 + return list_pdb_seq;
40 } 52 }
41 53
42 string trim(string str) { 54 string trim(string str) {
...@@ -45,101 +57,118 @@ string trim(string str) { ...@@ -45,101 +57,118 @@ string trim(string str) {
45 return str; 57 return str;
46 } 58 }
47 59
48 -string find_id_pattern(string& pdb_pattern, const string& benchmark) { 60 +data find_id_pattern(string& pdb_pattern, const string& benchmark) {
49 - vector<string> l = get_list_pdb_benchmark(benchmark); 61 + vector<data> l = get_list_pdb_benchmark(benchmark);
50 - for (string pdb_bm : l) { 62 + int size = l.size();
51 - int size = pdb_bm.size(); 63 +
52 - string cmp = pdb_bm.substr(0, size-2); 64 + for (data d : l) {
65 + string cmp = d.pdb;
66 + cmp = cmp.substr(0, d.pdb.size()-2);
53 if (!cmp.compare(pdb_pattern)) { 67 if (!cmp.compare(pdb_pattern)) {
54 - return pdb_bm; 68 + return d;
55 } 69 }
56 } 70 }
57 - return string(); 71 + return data();
58 } 72 }
59 73
60 -vector<pair<string, string>> find_id(const string& bibli, const string& benchmark) { 74 +vector<data> find_id(const string& bibli, const string& benchmark) {
61 ifstream lib(bibli); 75 ifstream lib(bibli);
62 json js = json::parse(lib); 76 json js = json::parse(lib);
63 77
64 - vector<pair<string, string>> association; 78 + //nam seq_bm et id seq_id
79 + vector<data> association;
65 80
66 for (auto it = js.begin(); it != js.end(); ++it) { 81 for (auto it = js.begin(); it != js.end(); ++it) {
67 string id = it.key(); 82 string id = it.key();
83 + data d;
84 +
68 for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) { 85 for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) {
69 string field = it2.key(); 86 string field = it2.key();
87 + string seq;
70 if (!field.compare("pdb")) { 88 if (!field.compare("pdb")) {
71 int n = js[id][field].size(); 89 int n = js[id][field].size();
72 for (int i = 0; i < n ; i++) { 90 for (int i = 0; i < n ; i++) {
73 ostringstream stream; 91 ostringstream stream;
74 stream << js[id][field][i]; 92 stream << js[id][field][i];
75 string pdb = trim(stream.str()); 93 string pdb = trim(stream.str());
76 - string pdb_complete = find_id_pattern(pdb, benchmark); 94 +
77 - if (!(pdb_complete.empty())) { 95 + d = find_id_pattern(pdb, benchmark);
78 - pair<string, string> p; 96 + }
79 - p.first = pdb_complete; 97 + }
80 - p.second = id; 98 +
81 - association.push_back(p); 99 + if (!field.compare("sequence")) {
82 - } 100 + seq = it2.value();
101 +
102 + if (!(d.pdb.empty())) {
103 + d.id = id;
104 + d.cmp = seq;
105 + association.push_back(d);
83 } 106 }
84 } 107 }
85 } 108 }
86 } 109 }
87 lib.close(); 110 lib.close();
111 + cout << association.size() << endl;
88 return association; 112 return association;
89 } 113 }
90 114
91 -bool does_it_match(const string& result, const string& id_motif) { 115 +bool does_it_match(const string& seq, const string& seq_motif) {
92 - ifstream f_res(result); 116 + size_t found = seq_motif.find("&");
93 - if (f_res.is_open()) { 117 + size_t size = seq_motif.size();
94 - string name; 118 + vector<string> list_cmp;
95 - string seq; 119 + if (found != std::string::npos) {
96 - string struc; 120 + int count = 1;
97 - string contacts; 121 +
122 + string cmp = seq_motif.substr(0, found);
123 + list_cmp.push_back(cmp);
124 + while(found != std::string::npos) {
125 + size_t begin = found;
126 + found = seq_motif.find("&", found + 1);
127 + cmp = seq_motif.substr(begin+1, found-begin-1);
128 + list_cmp.push_back(cmp);
129 + count++;
130 + }
98 131
99 - getline(f_res, name); 132 + found = seq.find(list_cmp[0]);
100 - getline(f_res, seq); 133 + int count2 = 1;
101 - while (getline(f_res, struc)) { 134 + while((found != std::string::npos) && (count2 < count)) {
102 - string motif_json = "JSON" + id_motif + " +"; 135 + size_t begin = found;
103 - if(struc.find(motif_json, 0) != string::npos) { 136 + found = seq.find(list_cmp[count2], found + 1);
104 - return true; 137 + count2++;
105 - } 138 + }
106 - motif_json = "JSON" + id_motif + "\n"; 139 +
107 - if(struc.find(motif_json, 0) != string::npos) { 140 + if(count == count2) {
108 - return true; 141 + return true;
109 - } 142 + }
110 - getline(f_res,contacts); 143 +
144 + } else {
145 + found = seq.find(seq_motif);
146 + if (found != std::string::npos) {
147 + return true;
111 } 148 }
112 - f_res.close();
113 } 149 }
114 return false; 150 return false;
115 } 151 }
116 152
117 vector<string> select_not_motif(const string& bibli, const string& benchmark) { 153 vector<string> select_not_motif(const string& bibli, const string& benchmark) {
118 vector<string> selection; 154 vector<string> selection;
119 - vector<pair<string, string>> association = find_id(bibli, benchmark); 155 + vector<data> association = find_id(bibli, benchmark);
120 - vector<string> list_bm = get_list_pdb_benchmark(benchmark);
121 -
122 - string path_begin = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/test_";
123 - string path_MFE_F = ".json_pmF_MEA";
124 156
125 - for (pair<string, string> p : association) { 157 + for (data d : association) {
126 - string id_motif = p.second; 158 + selection.push_back(d.id);
127 - selection.push_back(id_motif);
128 } 159 }
129 - for (pair<string, string> p : association) {
130 - cout << p.first << ", " << p.second << endl;
131 - }
132 - cout << "size: " << association.size() << endl;
133 -
134 - for (string pdb : list_bm) {
135 - string path_result = path_begin + pdb + path_MFE_F;
136 - for (pair<string,string> pair : association) {
137 - if (pair.first.substr(0, pair.first.size()-2).compare(pdb.substr(0, pdb.size()-2)) != 0) {
138 - bool test = does_it_match(path_result, pair.second);
139 160
161 + for (data d : association) {
162 + for (data d2 : association) {
163 + string seq = d.seq_pdb;
164 + string seq2 = d2.cmp;
165 + bool test = false;
166 +
167 + if(d.pdb.substr(0, d.pdb.size()-2) != d2.pdb.substr(0, d2.pdb.size()-2)) {
168 + test = does_it_match(seq, seq2);
140 if (test) { 169 if (test) {
141 - //if (!(pair.second.compare("954"))) { cout << "p1: " << pair.first << "pdb: " << pdb << endl;} 170 + cout << "pdb: " << d.pdb << " vs " << d2.pdb << " " << d2.cmp << " " << d2.id << endl;
142 - auto position = find(selection.begin(), selection.end(), pair.second); 171 + auto position = find(selection.begin(), selection.end(), d.id);
143 if (position != selection.end()) { 172 if (position != selection.end()) {
144 int index = position - selection.begin(); 173 int index = position - selection.begin();
145 selection.erase(selection.begin() + index); 174 selection.erase(selection.begin() + index);
...@@ -161,20 +190,30 @@ int main() ...@@ -161,20 +190,30 @@ int main()
161 string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; 190 string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json";
162 string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn"; 191 string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn";
163 192
164 - /*vector<pair<string, string>> association = find_id(bibli, benchmark); 193 + /*vector<data> v = get_list_pdb_benchmark(benchmark);
165 - for (pair<string,string> p : association) { 194 + for (data d : v) {
166 - cout << "<" << p.first << ", " << p.second << ">" << endl; 195 + cout << d.pdb << ", " << d.seq_pdb << endl;
167 }*/ 196 }*/
168 197
198 + /*string name = "1U6P_B";
199 + data d = find_id_pattern(name, benchmark);
200 + cout << "name: " << d.pdb << ", seq: " << d.seq_pdb << endl;*/
201 +
202 + /*vector<data> association = find_id(bibli, benchmark);
203 + for (data d : association) {
204 + cout << "<" << d.pdb << ", " << d.seq_pdb << ">, " << "<" << d.id << ", " << d.cmp << ">" << endl;
205 + }*/
206 +
207 + /*string seq = "UGCGCUUGGCGUUUUAGAGCUAGAAAUAGCAAGUUAAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGGCACCGAGUCGGUGCUU";
208 + string seq_motif = "UGCGCUUGGCGUUUUAGAGC&GCAAGUUAAAAUAAGGCUAGUCCGUUAUCAA&UGGCACCGAGUCG&U";
209 + bool test = does_it_match(seq, seq_motif);
210 + cout << test << endl;*/
211 +
169 vector<string> selection = select_not_motif(bibli, benchmark); 212 vector<string> selection = select_not_motif(bibli, benchmark);
170 for (string str : selection) { 213 for (string str : selection) {
171 cout << str << ", "; 214 cout << str << ", ";
172 } 215 }
173 cout << endl; 216 cout << endl;
174 217
175 - /*string result = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/test_1U6P_B.json_pmF_MEA";
176 - bool test = does_it_match(result, "150");
177 - cout << "test : " << test << endl;*/
178 -
179 return 0; 218 return 0;
180 } 219 }
...\ No newline at end of file ...\ No newline at end of file
......