Scripts corrigés pour le matching unique et la suppression des pdbs
Showing
3 changed files
with
186 additions
and
125 deletions
... | @@ -24,7 +24,7 @@ def run_test(cmd, log): | ... | @@ -24,7 +24,7 @@ def run_test(cmd, log): |
24 | log.flush() | 24 | log.flush() |
25 | rc = process.poll() | 25 | rc = process.poll() |
26 | 26 | ||
27 | -def create_command_E(name): | 27 | +def create_command_E(name, estimator): |
28 | #cmd = ("python3 /mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/biorseo.py -i " + | 28 | #cmd = ("python3 /mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/biorseo.py -i " + |
29 | cmd = ("python3 /local/local/BiorseoNath/biorseo.py -i " + | 29 | cmd = ("python3 /local/local/BiorseoNath/biorseo.py -i " + |
30 | "/local/local/BiorseoNath/data/fasta/" + | 30 | "/local/local/BiorseoNath/data/fasta/" + |
... | @@ -32,12 +32,12 @@ def create_command_E(name): | ... | @@ -32,12 +32,12 @@ def create_command_E(name): |
32 | "-O results/ " + | 32 | "-O results/ " + |
33 | "--contacts " + | 33 | "--contacts " + |
34 | "--patternmatch " + | 34 | "--patternmatch " + |
35 | - "--func E --MFE -v " + | 35 | + "--func E --" + estimator + " -v " + |
36 | "--biorseo-dir /local/local/BiorseoNath " + | 36 | "--biorseo-dir /local/local/BiorseoNath " + |
37 | "--modules-path /local/local/BiorseoNath/data/modules/ISAURE/Motifs_derniere_version ") | 37 | "--modules-path /local/local/BiorseoNath/data/modules/ISAURE/Motifs_derniere_version ") |
38 | return cmd | 38 | return cmd |
39 | 39 | ||
40 | -def create_command_F(name): | 40 | +def create_command_F(name, estimator): |
41 | #cmd = ("python3 /mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/biorseo.py -i " + | 41 | #cmd = ("python3 /mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/biorseo.py -i " + |
42 | cmd = ("python3 /local/local/BiorseoNath/biorseo.py -i " + | 42 | cmd = ("python3 /local/local/BiorseoNath/biorseo.py -i " + |
43 | "/local/local/BiorseoNath/data/fasta/" + | 43 | "/local/local/BiorseoNath/data/fasta/" + |
... | @@ -45,7 +45,7 @@ def create_command_F(name): | ... | @@ -45,7 +45,7 @@ def create_command_F(name): |
45 | "-O results/ " + | 45 | "-O results/ " + |
46 | "--contacts " + | 46 | "--contacts " + |
47 | "--patternmatch " + | 47 | "--patternmatch " + |
48 | - "--func F --MFE -v " + | 48 | + "--func F --" + estimator + " -v " + |
49 | "--biorseo-dir /local/local/BiorseoNath " + | 49 | "--biorseo-dir /local/local/BiorseoNath " + |
50 | "--modules-path /local/local/BiorseoNath/data/modules/ISAURE/Motifs_derniere_version ") | 50 | "--modules-path /local/local/BiorseoNath/data/modules/ISAURE/Motifs_derniere_version ") |
51 | return cmd | 51 | return cmd |
... | @@ -238,7 +238,7 @@ def set_axis_style(ax, labels): | ... | @@ -238,7 +238,7 @@ def set_axis_style(ax, labels): |
238 | ax.set_xlim(0.25, len(labels) + 0.75) | 238 | ax.set_xlim(0.25, len(labels) + 0.75) |
239 | ax.set_xlabel('Sample name') | 239 | ax.set_xlabel('Sample name') |
240 | 240 | ||
241 | -def visualization_best_mcc(list_struct2d, list_contacts, function, color, lines_color): | 241 | +def visualization_best_mcc(list_struct2d, list_contacts, estimator, function, color, lines_color): |
242 | 242 | ||
243 | np_struct2d = np.array(list_struct2d) | 243 | np_struct2d = np.array(list_struct2d) |
244 | np_contacts = np.array(list_contacts) | 244 | np_contacts = np.array(list_contacts) |
... | @@ -268,7 +268,7 @@ def visualization_best_mcc(list_struct2d, list_contacts, function, color, lines_ | ... | @@ -268,7 +268,7 @@ def visualization_best_mcc(list_struct2d, list_contacts, function, color, lines_ |
268 | 268 | ||
269 | for v in violins['bodies']: | 269 | for v in violins['bodies']: |
270 | v.set_facecolor(color) | 270 | v.set_facecolor(color) |
271 | - plt.savefig('visualisation_16_06_MFE_' + function + '.png', bbox_inches='tight') | 271 | + plt.savefig('visualisation_16_06_' + estimator + '_' + function + '.png', bbox_inches='tight') |
272 | 272 | ||
273 | def get_list_structs_contacts(path_benchmark, estimator, function): | 273 | def get_list_structs_contacts(path_benchmark, estimator, function): |
274 | myfile = open(path_benchmark, "r") | 274 | myfile = open(path_benchmark, "r") |
... | @@ -333,7 +333,7 @@ def visualization_all_mcc(path_benchmark, estimator, function): | ... | @@ -333,7 +333,7 @@ def visualization_all_mcc(path_benchmark, estimator, function): |
333 | 333 | ||
334 | plt.figure(figsize=(25,4),dpi=200) | 334 | plt.figure(figsize=(25,4),dpi=200) |
335 | plt.xticks(rotation=90) | 335 | plt.xticks(rotation=90) |
336 | - plt.boxplot(data) | 336 | + plt.boxplot(data, medianprops=dict(color='black')) |
337 | for i in range(absciss): | 337 | for i in range(absciss): |
338 | y =data[i] | 338 | y =data[i] |
339 | x = np.random.normal(1 + i, 0.04, size=len(y)) | 339 | x = np.random.normal(1 + i, 0.04, size=len(y)) |
... | @@ -356,7 +356,7 @@ def visualization_all_mcc(path_benchmark, estimator, function): | ... | @@ -356,7 +356,7 @@ def visualization_all_mcc(path_benchmark, estimator, function): |
356 | 356 | ||
357 | plt.figure(figsize=(25, 4), dpi=200) | 357 | plt.figure(figsize=(25, 4), dpi=200) |
358 | plt.xticks(rotation=90) | 358 | plt.xticks(rotation=90) |
359 | - plt.boxplot(data) | 359 | + plt.boxplot(data, medianprops=dict(color='black')) |
360 | for i in range(absciss): | 360 | for i in range(absciss): |
361 | y = data[i] | 361 | y = data[i] |
362 | x = np.random.normal(1 + i, 0.04, size=len(y)) | 362 | x = np.random.normal(1 + i, 0.04, size=len(y)) |
... | @@ -372,53 +372,90 @@ def visualization_all_mcc(path_benchmark, estimator, function): | ... | @@ -372,53 +372,90 @@ def visualization_all_mcc(path_benchmark, estimator, function): |
372 | #cmd1 = ("cppsrc/Scripts/countPattern") | 372 | #cmd1 = ("cppsrc/Scripts/countPattern") |
373 | #cmd2 = ("cppsrc/Scripts/deletePdb") | 373 | #cmd2 = ("cppsrc/Scripts/deletePdb") |
374 | 374 | ||
375 | -"""myfile = open("data/modules/ISAURE/Motifs_version_initiale/benchmark.txt", "r") | 375 | +myfile = open("data/modules/ISAURE/Motifs_version_initiale/benchmark.txt", "r") |
376 | name = myfile.readline() | 376 | name = myfile.readline() |
377 | contacts = myfile.readline() | 377 | contacts = myfile.readline() |
378 | seq = myfile.readline() | 378 | seq = myfile.readline() |
379 | structure2d = myfile.readline() | 379 | structure2d = myfile.readline() |
380 | 380 | ||
381 | -list_struct2d_E = [] | 381 | +list_struct2d_E_MFE = [] |
382 | -list_contacts_E = [] | 382 | +list_contacts_E_MFE = [] |
383 | -list_struct2d_F = [] | 383 | +list_struct2d_F_MFE = [] |
384 | -list_contacts_F = [] | 384 | +list_contacts_F_MFE = [] |
385 | -countE = 0 | 385 | + |
386 | -countF = 0 | 386 | +list_struct2d_E_MEA = [] |
387 | +list_contacts_E_MEA = [] | ||
388 | +list_struct2d_F_MEA = [] | ||
389 | +list_contacts_F_MEA = [] | ||
390 | + | ||
391 | +countE_MFE = 0 | ||
392 | +countF_MFE = 0 | ||
393 | + | ||
394 | +countE_MEA = 0 | ||
395 | +countF_MEA = 0 | ||
387 | while seq: | 396 | while seq: |
388 | name = name[6:].strip() | 397 | name = name[6:].strip() |
389 | print(name) | 398 | print(name) |
390 | - run_test(cmd2 + " " + name + ".fa", log) | 399 | + |
391 | - print(cmd2 + " " + name + ".fa") | 400 | + cmd2 = ("cppsrc/Scripts/deletePdb " + name) |
392 | 401 | ||
393 | - cmd3 = create_command_E(name) | 402 | + cmd3 = create_command_E(name, 'MFE') |
394 | os.system(cmd3) | 403 | os.system(cmd3) |
395 | 404 | ||
396 | file_path = "results/test_" + name + ".json_pmE_MFE" | 405 | file_path = "results/test_" + name + ".json_pmE_MFE" |
397 | if os.path.isfile(file_path): | 406 | if os.path.isfile(file_path): |
398 | - tabE = write_mcc_in_file_E(name, contacts, structure2d) | 407 | + tabE_MFE = write_mcc_in_file_E(name, contacts, structure2d) |
399 | - list_contacts_E.append(tabE[0]) | 408 | + list_contacts_E_MFE.append(tabE_MFE[0]) |
400 | - list_struct2d_E.append(tabE[1]) | 409 | + list_struct2d_E_MFE.append(tabE_MFE[1]) |
401 | - countE = countE + 1 | 410 | + countE_MFE = countE_MFE + 1 |
402 | 411 | ||
403 | - cmd3 = create_command_F(name) | 412 | + cmd3 = create_command_F(name, 'MFE') |
404 | os.system(cmd3) | 413 | os.system(cmd3) |
405 | 414 | ||
406 | file_path = "results/test_" + name + ".json_pmF_MFE" | 415 | file_path = "results/test_" + name + ".json_pmF_MFE" |
407 | if os.path.isfile(file_path): | 416 | if os.path.isfile(file_path): |
408 | - tabF = write_mcc_in_file_F(name, contacts, structure2d) | 417 | + tabF_MFE = write_mcc_in_file_F(name, contacts, structure2d) |
409 | - list_contacts_F.append(tabF[0]) | 418 | + list_contacts_F_MFE.append(tabF_MFE[0]) |
410 | - list_struct2d_F.append(tabF[1]) | 419 | + list_struct2d_F_MFE.append(tabF_MFE[1]) |
411 | - countF = countF + 1 | 420 | + countF_MFE = countF_MFE + 1 |
421 | + | ||
422 | + cmd3 = create_command_E(name, 'MEA') | ||
423 | + os.system(cmd3) | ||
424 | + | ||
425 | + file_path = "results/test_" + name + ".json_pmE_MEA" | ||
426 | + if os.path.isfile(file_path): | ||
427 | + tabE_MEA = write_mcc_in_file_E(name, contacts, structure2d) | ||
428 | + list_contacts_E_MEA.append(tabE_MEA[0]) | ||
429 | + list_struct2d_E_MEA.append(tabE_MEA[1]) | ||
430 | + countE_MEA = countE_MEA + 1 | ||
431 | + | ||
432 | + cmd3 = create_command_F(name, 'MEA') | ||
433 | + os.system(cmd3) | ||
434 | + | ||
435 | + file_path = "results/test_" + name + ".json_pmF_MEA" | ||
436 | + if os.path.isfile(file_path): | ||
437 | + tabF_MEA = write_mcc_in_file_F(name, contacts, structure2d) | ||
438 | + list_contacts_F_MEA.append(tabF_MEA[0]) | ||
439 | + list_struct2d_F_MEA.append(tabF_MEA[1]) | ||
440 | + countF_MEA = countF_MEA + 1 | ||
412 | 441 | ||
413 | name = myfile.readline() | 442 | name = myfile.readline() |
414 | contacts = myfile.readline() | 443 | contacts = myfile.readline() |
415 | seq = myfile.readline() | 444 | seq = myfile.readline() |
416 | structure2d = myfile.readline() | 445 | structure2d = myfile.readline() |
417 | 446 | ||
418 | -visualization_best_mcc(list_struct2d_E, list_contacts_E, 'E', 'red', '#900C3F') | ||
419 | -visualization_best_mcc(list_struct2d_F, list_contacts_F, 'F', 'blue', '#0900FF') | ||
420 | -print("countE: " + str(countE) + "\n") | ||
421 | -print("countF: " + str(countF) + "\n") | ||
422 | -myfile.close()""" | ||
423 | -path_benchmark = "data/modules/ISAURE/Motifs_version_initiale/benchmark.txt" | ||
424 | -visualization_all_mcc(path_benchmark,'MEA', 'F') | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
447 | +visualization_best_mcc(list_struct2d_E_MFE, list_contacts_E_MFE, 'MFE', 'E', 'red', '#900C3F') | ||
448 | +visualization_best_mcc(list_struct2d_F_MFE, list_contacts_F_MFE, 'MFE', 'F', 'blue', '#0900FF') | ||
449 | +visualization_best_mcc(list_struct2d_E_MEA, list_contacts_E_MEA, 'MEA', 'E', 'red', '#900C3F') | ||
450 | +visualization_best_mcc(list_struct2d_F_MEA, list_contacts_F_MEA, 'MEA', 'F', 'blue', '#0900FF') | ||
451 | + | ||
452 | +print("countE_MFE: " + str(countE_MFE) + "\n") | ||
453 | +print("countF_MFE: " + str(countF_MFE) + "\n") | ||
454 | +print("countE_MEA: " + str(countE_MEA) + "\n") | ||
455 | +print("countF_MEA: " + str(countF_MEA) + "\n") | ||
456 | +myfile.close() | ||
457 | +#path_benchmark = "data/modules/ISAURE/Motifs_version_initiale/benchmark.txt" | ||
458 | +#visualization_all_mcc(path_benchmark,'MEA', 'F') | ||
459 | +#visualization_all_mcc(path_benchmark,'MEA', 'E') | ||
460 | +#visualization_all_mcc(path_benchmark,'MFE', 'E') | ||
461 | +#visualization_all_mcc(path_benchmark,'MFE', 'F') | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
... | @@ -12,45 +12,31 @@ | ... | @@ -12,45 +12,31 @@ |
12 | using namespace std; | 12 | using namespace std; |
13 | using json = nlohmann::json; | 13 | using json = nlohmann::json; |
14 | 14 | ||
15 | -void delete_redundant_pdb(const string& jsonlibrary, const string& fasta, const string& jsonoutfile) { | 15 | +void delete_redundant_pdb(const string& jsonlibrary, const string& name, const string& jsonoutfile) { |
16 | std::ifstream lib(jsonlibrary); | 16 | std::ifstream lib(jsonlibrary); |
17 | 17 | ||
18 | std::ofstream outfile (jsonoutfile); | 18 | std::ofstream outfile (jsonoutfile); |
19 | json new_motif; | 19 | json new_motif; |
20 | json new_id; | 20 | json new_id; |
21 | json js = json::parse(lib); | 21 | json js = json::parse(lib); |
22 | - | ||
23 | - std::ifstream file(fasta); | ||
24 | - string pdb, seq; | ||
25 | - std::getline(file, pdb); | ||
26 | - std::getline(file, seq); | ||
27 | 22 | ||
28 | for (auto it = js.begin(); it != js.end(); ++it) { | 23 | for (auto it = js.begin(); it != js.end(); ++it) { |
29 | string id = it.key(); | 24 | string id = it.key(); |
30 | vector<string> list_pdbs; | 25 | vector<string> list_pdbs; |
31 | bool is_added = true; | 26 | bool is_added = true; |
32 | 27 | ||
33 | - //cout << "id: " << id << endl; | ||
34 | for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) { | 28 | for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) { |
35 | - string test = it2.key(); | 29 | + string field = it2.key(); |
36 | 30 | ||
37 | - if (!test.compare("pdb")) { | 31 | + if (!field.compare("pdb")) { |
38 | vector<string> tab = it2.value(); | 32 | vector<string> tab = it2.value(); |
39 | list_pdbs = tab; | 33 | list_pdbs = tab; |
40 | - /*set<set<string>>::iterator iit; | ||
41 | - set<string>::iterator iit2; | ||
42 | - for(iit = list_pfams.begin(); iit != list_pfams.end(); iit++) { | ||
43 | - for (iit2 = iit->begin(); iit2 != iit->end(); ++iit2) { | ||
44 | - cout << *iit2 << endl; | ||
45 | - } | ||
46 | - cout << endl << endl; | ||
47 | - }*/ | ||
48 | } else { | 34 | } else { |
49 | - new_id[test] = it2.value(); | 35 | + new_id[field] = it2.value(); |
50 | } | 36 | } |
51 | } | 37 | } |
52 | - | 38 | + |
53 | - if (count(list_pdbs.begin(), list_pdbs.end(), pdb.substr(6,pdb.size()))) { | 39 | + if (count(list_pdbs.begin(), list_pdbs.end(), name.substr(0, name.size()-2))) { |
54 | is_added = false; | 40 | is_added = false; |
55 | } | 41 | } |
56 | if (is_added) { | 42 | if (is_added) { |
... | @@ -66,10 +52,9 @@ void delete_redundant_pdb(const string& jsonlibrary, const string& fasta, const | ... | @@ -66,10 +52,9 @@ void delete_redundant_pdb(const string& jsonlibrary, const string& fasta, const |
66 | int main(int argc, char** argv) | 52 | int main(int argc, char** argv) |
67 | { | 53 | { |
68 | string jsonlibrary = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_final.json"; | 54 | string jsonlibrary = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/motifs_final.json"; |
69 | - string fasta = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/fasta/"; | ||
70 | string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; | 55 | string out = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; |
71 | - fasta = fasta + argv[1]; | 56 | + string name = argv[1]; |
72 | - delete_redundant_pdb(jsonlibrary, fasta, out); | 57 | + delete_redundant_pdb(jsonlibrary, name, out); |
73 | return 0; | 58 | return 0; |
74 | } | 59 | } |
75 | 60 | ... | ... |
... | @@ -15,10 +15,19 @@ using json = nlohmann::json; | ... | @@ -15,10 +15,19 @@ using json = nlohmann::json; |
15 | That script will remove from the library all the pattern that match ONLY with the sequence from which it comes from. | 15 | That script will remove from the library all the pattern that match ONLY with the sequence from which it comes from. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | -vector<string> get_list_pdb_benchmark(const string& benchmark) { | 18 | +struct data { |
19 | + string pdb; | ||
20 | + string seq_pdb; | ||
21 | + string id; | ||
22 | + string cmp; | ||
23 | +}; | ||
24 | +typedef struct data data; | ||
25 | + | ||
26 | + | ||
27 | +vector<data> get_list_pdb_benchmark(const string& benchmark) { | ||
19 | 28 | ||
20 | fstream bm(benchmark); | 29 | fstream bm(benchmark); |
21 | - vector<string> list_pdb; | 30 | + vector<data> list_pdb_seq; |
22 | if (bm.is_open()) { | 31 | if (bm.is_open()) { |
23 | string name; | 32 | string name; |
24 | string sequence; | 33 | string sequence; |
... | @@ -26,17 +35,20 @@ vector<string> get_list_pdb_benchmark(const string& benchmark) { | ... | @@ -26,17 +35,20 @@ vector<string> get_list_pdb_benchmark(const string& benchmark) { |
26 | string contacts; | 35 | string contacts; |
27 | 36 | ||
28 | while (getline(bm, name)) { | 37 | while (getline(bm, name)) { |
38 | + data d; | ||
29 | int size = name.size(); | 39 | int size = name.size(); |
30 | name = name.substr(5,size-6); | 40 | name = name.substr(5,size-6); |
31 | - list_pdb.push_back(name); | ||
32 | - | ||
33 | getline(bm, sequence); | 41 | getline(bm, sequence); |
42 | + d.pdb = name; | ||
43 | + d.seq_pdb = sequence; | ||
44 | + list_pdb_seq.push_back(d); | ||
45 | + | ||
34 | getline(bm, structure); | 46 | getline(bm, structure); |
35 | getline(bm, contacts); | 47 | getline(bm, contacts); |
36 | } | 48 | } |
37 | bm.close(); | 49 | bm.close(); |
38 | } | 50 | } |
39 | - return list_pdb; | 51 | + return list_pdb_seq; |
40 | } | 52 | } |
41 | 53 | ||
42 | string trim(string str) { | 54 | string trim(string str) { |
... | @@ -45,101 +57,118 @@ string trim(string str) { | ... | @@ -45,101 +57,118 @@ string trim(string str) { |
45 | return str; | 57 | return str; |
46 | } | 58 | } |
47 | 59 | ||
48 | -string find_id_pattern(string& pdb_pattern, const string& benchmark) { | 60 | +data find_id_pattern(string& pdb_pattern, const string& benchmark) { |
49 | - vector<string> l = get_list_pdb_benchmark(benchmark); | 61 | + vector<data> l = get_list_pdb_benchmark(benchmark); |
50 | - for (string pdb_bm : l) { | 62 | + int size = l.size(); |
51 | - int size = pdb_bm.size(); | 63 | + |
52 | - string cmp = pdb_bm.substr(0, size-2); | 64 | + for (data d : l) { |
65 | + string cmp = d.pdb; | ||
66 | + cmp = cmp.substr(0, d.pdb.size()-2); | ||
53 | if (!cmp.compare(pdb_pattern)) { | 67 | if (!cmp.compare(pdb_pattern)) { |
54 | - return pdb_bm; | 68 | + return d; |
55 | } | 69 | } |
56 | } | 70 | } |
57 | - return string(); | 71 | + return data(); |
58 | } | 72 | } |
59 | 73 | ||
60 | -vector<pair<string, string>> find_id(const string& bibli, const string& benchmark) { | 74 | +vector<data> find_id(const string& bibli, const string& benchmark) { |
61 | ifstream lib(bibli); | 75 | ifstream lib(bibli); |
62 | json js = json::parse(lib); | 76 | json js = json::parse(lib); |
63 | 77 | ||
64 | - vector<pair<string, string>> association; | 78 | + //nam seq_bm et id seq_id |
79 | + vector<data> association; | ||
65 | 80 | ||
66 | for (auto it = js.begin(); it != js.end(); ++it) { | 81 | for (auto it = js.begin(); it != js.end(); ++it) { |
67 | string id = it.key(); | 82 | string id = it.key(); |
83 | + data d; | ||
84 | + | ||
68 | for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) { | 85 | for (auto it2 = js[id].begin(); it2 != js[id].end(); ++it2) { |
69 | string field = it2.key(); | 86 | string field = it2.key(); |
87 | + string seq; | ||
70 | if (!field.compare("pdb")) { | 88 | if (!field.compare("pdb")) { |
71 | int n = js[id][field].size(); | 89 | int n = js[id][field].size(); |
72 | for (int i = 0; i < n ; i++) { | 90 | for (int i = 0; i < n ; i++) { |
73 | ostringstream stream; | 91 | ostringstream stream; |
74 | stream << js[id][field][i]; | 92 | stream << js[id][field][i]; |
75 | string pdb = trim(stream.str()); | 93 | string pdb = trim(stream.str()); |
76 | - string pdb_complete = find_id_pattern(pdb, benchmark); | 94 | + |
77 | - if (!(pdb_complete.empty())) { | 95 | + d = find_id_pattern(pdb, benchmark); |
78 | - pair<string, string> p; | 96 | + } |
79 | - p.first = pdb_complete; | 97 | + } |
80 | - p.second = id; | 98 | + |
81 | - association.push_back(p); | 99 | + if (!field.compare("sequence")) { |
82 | - } | 100 | + seq = it2.value(); |
101 | + | ||
102 | + if (!(d.pdb.empty())) { | ||
103 | + d.id = id; | ||
104 | + d.cmp = seq; | ||
105 | + association.push_back(d); | ||
83 | } | 106 | } |
84 | } | 107 | } |
85 | } | 108 | } |
86 | } | 109 | } |
87 | lib.close(); | 110 | lib.close(); |
111 | + cout << association.size() << endl; | ||
88 | return association; | 112 | return association; |
89 | } | 113 | } |
90 | 114 | ||
91 | -bool does_it_match(const string& result, const string& id_motif) { | 115 | +bool does_it_match(const string& seq, const string& seq_motif) { |
92 | - ifstream f_res(result); | 116 | + size_t found = seq_motif.find("&"); |
93 | - if (f_res.is_open()) { | 117 | + size_t size = seq_motif.size(); |
94 | - string name; | 118 | + vector<string> list_cmp; |
95 | - string seq; | 119 | + if (found != std::string::npos) { |
96 | - string struc; | 120 | + int count = 1; |
97 | - string contacts; | 121 | + |
122 | + string cmp = seq_motif.substr(0, found); | ||
123 | + list_cmp.push_back(cmp); | ||
124 | + while(found != std::string::npos) { | ||
125 | + size_t begin = found; | ||
126 | + found = seq_motif.find("&", found + 1); | ||
127 | + cmp = seq_motif.substr(begin+1, found-begin-1); | ||
128 | + list_cmp.push_back(cmp); | ||
129 | + count++; | ||
130 | + } | ||
98 | 131 | ||
99 | - getline(f_res, name); | 132 | + found = seq.find(list_cmp[0]); |
100 | - getline(f_res, seq); | 133 | + int count2 = 1; |
101 | - while (getline(f_res, struc)) { | 134 | + while((found != std::string::npos) && (count2 < count)) { |
102 | - string motif_json = "JSON" + id_motif + " +"; | 135 | + size_t begin = found; |
103 | - if(struc.find(motif_json, 0) != string::npos) { | 136 | + found = seq.find(list_cmp[count2], found + 1); |
104 | - return true; | 137 | + count2++; |
105 | - } | 138 | + } |
106 | - motif_json = "JSON" + id_motif + "\n"; | 139 | + |
107 | - if(struc.find(motif_json, 0) != string::npos) { | 140 | + if(count == count2) { |
108 | - return true; | 141 | + return true; |
109 | - } | 142 | + } |
110 | - getline(f_res,contacts); | 143 | + |
144 | + } else { | ||
145 | + found = seq.find(seq_motif); | ||
146 | + if (found != std::string::npos) { | ||
147 | + return true; | ||
111 | } | 148 | } |
112 | - f_res.close(); | ||
113 | } | 149 | } |
114 | return false; | 150 | return false; |
115 | } | 151 | } |
116 | 152 | ||
117 | vector<string> select_not_motif(const string& bibli, const string& benchmark) { | 153 | vector<string> select_not_motif(const string& bibli, const string& benchmark) { |
118 | vector<string> selection; | 154 | vector<string> selection; |
119 | - vector<pair<string, string>> association = find_id(bibli, benchmark); | 155 | + vector<data> association = find_id(bibli, benchmark); |
120 | - vector<string> list_bm = get_list_pdb_benchmark(benchmark); | ||
121 | - | ||
122 | - string path_begin = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/test_"; | ||
123 | - string path_MFE_F = ".json_pmF_MEA"; | ||
124 | 156 | ||
125 | - for (pair<string, string> p : association) { | 157 | + for (data d : association) { |
126 | - string id_motif = p.second; | 158 | + selection.push_back(d.id); |
127 | - selection.push_back(id_motif); | ||
128 | } | 159 | } |
129 | - for (pair<string, string> p : association) { | ||
130 | - cout << p.first << ", " << p.second << endl; | ||
131 | - } | ||
132 | - cout << "size: " << association.size() << endl; | ||
133 | - | ||
134 | - for (string pdb : list_bm) { | ||
135 | - string path_result = path_begin + pdb + path_MFE_F; | ||
136 | - for (pair<string,string> pair : association) { | ||
137 | - if (pair.first.substr(0, pair.first.size()-2).compare(pdb.substr(0, pdb.size()-2)) != 0) { | ||
138 | - bool test = does_it_match(path_result, pair.second); | ||
139 | 160 | ||
161 | + for (data d : association) { | ||
162 | + for (data d2 : association) { | ||
163 | + string seq = d.seq_pdb; | ||
164 | + string seq2 = d2.cmp; | ||
165 | + bool test = false; | ||
166 | + | ||
167 | + if(d.pdb.substr(0, d.pdb.size()-2) != d2.pdb.substr(0, d2.pdb.size()-2)) { | ||
168 | + test = does_it_match(seq, seq2); | ||
140 | if (test) { | 169 | if (test) { |
141 | - //if (!(pair.second.compare("954"))) { cout << "p1: " << pair.first << "pdb: " << pdb << endl;} | 170 | + cout << "pdb: " << d.pdb << " vs " << d2.pdb << " " << d2.cmp << " " << d2.id << endl; |
142 | - auto position = find(selection.begin(), selection.end(), pair.second); | 171 | + auto position = find(selection.begin(), selection.end(), d.id); |
143 | if (position != selection.end()) { | 172 | if (position != selection.end()) { |
144 | int index = position - selection.begin(); | 173 | int index = position - selection.begin(); |
145 | selection.erase(selection.begin() + index); | 174 | selection.erase(selection.begin() + index); |
... | @@ -161,20 +190,30 @@ int main() | ... | @@ -161,20 +190,30 @@ int main() |
161 | string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; | 190 | string bibli = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_derniere_version/motifs_final.json"; |
162 | string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn"; | 191 | string benchmark = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/data/modules/ISAURE/Motifs_version_initiale/benchmark.dbn"; |
163 | 192 | ||
164 | - /*vector<pair<string, string>> association = find_id(bibli, benchmark); | 193 | + /*vector<data> v = get_list_pdb_benchmark(benchmark); |
165 | - for (pair<string,string> p : association) { | 194 | + for (data d : v) { |
166 | - cout << "<" << p.first << ", " << p.second << ">" << endl; | 195 | + cout << d.pdb << ", " << d.seq_pdb << endl; |
167 | }*/ | 196 | }*/ |
168 | 197 | ||
198 | + /*string name = "1U6P_B"; | ||
199 | + data d = find_id_pattern(name, benchmark); | ||
200 | + cout << "name: " << d.pdb << ", seq: " << d.seq_pdb << endl;*/ | ||
201 | + | ||
202 | + /*vector<data> association = find_id(bibli, benchmark); | ||
203 | + for (data d : association) { | ||
204 | + cout << "<" << d.pdb << ", " << d.seq_pdb << ">, " << "<" << d.id << ", " << d.cmp << ">" << endl; | ||
205 | + }*/ | ||
206 | + | ||
207 | + /*string seq = "UGCGCUUGGCGUUUUAGAGCUAGAAAUAGCAAGUUAAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGGCACCGAGUCGGUGCUU"; | ||
208 | + string seq_motif = "UGCGCUUGGCGUUUUAGAGC&GCAAGUUAAAAUAAGGCUAGUCCGUUAUCAA&UGGCACCGAGUCG&U"; | ||
209 | + bool test = does_it_match(seq, seq_motif); | ||
210 | + cout << test << endl;*/ | ||
211 | + | ||
169 | vector<string> selection = select_not_motif(bibli, benchmark); | 212 | vector<string> selection = select_not_motif(bibli, benchmark); |
170 | for (string str : selection) { | 213 | for (string str : selection) { |
171 | cout << str << ", "; | 214 | cout << str << ", "; |
172 | } | 215 | } |
173 | cout << endl; | 216 | cout << endl; |
174 | 217 | ||
175 | - /*string result = "/mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/results/test_1U6P_B.json_pmF_MEA"; | ||
176 | - bool test = does_it_match(result, "150"); | ||
177 | - cout << "test : " << test << endl;*/ | ||
178 | - | ||
179 | return 0; | 218 | return 0; |
180 | } | 219 | } |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment