Ajout de fonctions pour faire un boxplot pour les MCC de chaque séquence du benchmark
Showing
1 changed file
with
140 additions
and
17 deletions
... | @@ -5,6 +5,8 @@ import os.path | ... | @@ -5,6 +5,8 @@ import os.path |
5 | from math import sqrt, ceil | 5 | from math import sqrt, ceil |
6 | import numpy as np | 6 | import numpy as np |
7 | import matplotlib.pyplot as plt | 7 | import matplotlib.pyplot as plt |
8 | +import seaborn as sns | ||
9 | +import pandas as pd | ||
8 | 10 | ||
9 | 11 | ||
10 | log_path = "test.log" | 12 | log_path = "test.log" |
... | @@ -22,7 +24,20 @@ def run_test(cmd, log): | ... | @@ -22,7 +24,20 @@ def run_test(cmd, log): |
22 | log.flush() | 24 | log.flush() |
23 | rc = process.poll() | 25 | rc = process.poll() |
24 | 26 | ||
25 | -def create_command(name): | 27 | +def create_command_E(name): |
28 | + #cmd = ("python3 /mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/biorseo.py -i " + | ||
29 | + cmd = ("python3 /local/local/BiorseoNath/biorseo.py -i " + | ||
30 | + "/local/local/BiorseoNath/data/fasta/" + | ||
31 | + name + ".fa " + | ||
32 | + "-O results/ " + | ||
33 | + "--contacts " + | ||
34 | + "--patternmatch " + | ||
35 | + "--func E --MFE -v " + | ||
36 | + "--biorseo-dir /local/local/BiorseoNath " + | ||
37 | + "--modules-path /local/local/BiorseoNath/data/modules/ISAURE/Motifs_derniere_version ") | ||
38 | + return cmd | ||
39 | + | ||
40 | +def create_command_F(name): | ||
26 | #cmd = ("python3 /mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/biorseo.py -i " + | 41 | #cmd = ("python3 /mnt/c/Users/natha/Documents/IBISC/biorseo2/biorseo/biorseo.py -i " + |
27 | cmd = ("python3 /local/local/BiorseoNath/biorseo.py -i " + | 42 | cmd = ("python3 /local/local/BiorseoNath/biorseo.py -i " + |
28 | "/local/local/BiorseoNath/data/fasta/" + | 43 | "/local/local/BiorseoNath/data/fasta/" + |
... | @@ -129,8 +144,8 @@ def specificity(tp, tn, fp, fn): | ... | @@ -129,8 +144,8 @@ def specificity(tp, tn, fp, fn): |
129 | # ================== Code from Louis Beckey Benchark.py ============================== | 144 | # ================== Code from Louis Beckey Benchark.py ============================== |
130 | 145 | ||
131 | def write_mcc_in_file_E(sequence_id, true_contacts, true_structure): | 146 | def write_mcc_in_file_E(sequence_id, true_contacts, true_structure): |
132 | - read_prd = open("results/test_" + sequence_id + ".json_pmE_MEA", "r") | 147 | + read_prd = open("results/test_" + sequence_id + ".json_pmE_MFE", "r") |
133 | - write = open("results/test_" + sequence_id + ".mcc_E_MEA", "w") | 148 | + write = open("results/test_" + sequence_id + ".mcc_E_MFE", "w") |
134 | 149 | ||
135 | max_mcc_str = -1; | 150 | max_mcc_str = -1; |
136 | max_mcc_ctc = -1; | 151 | max_mcc_ctc = -1; |
... | @@ -223,12 +238,16 @@ def set_axis_style(ax, labels): | ... | @@ -223,12 +238,16 @@ def set_axis_style(ax, labels): |
223 | ax.set_xlim(0.25, len(labels) + 0.75) | 238 | ax.set_xlim(0.25, len(labels) + 0.75) |
224 | ax.set_xlabel('Sample name') | 239 | ax.set_xlabel('Sample name') |
225 | 240 | ||
226 | -def visualization(list_struct2d, list_contacts, function, color, lines_color): | 241 | +def visualization_best_mcc(list_struct2d, list_contacts, function, color, lines_color): |
227 | 242 | ||
228 | np_struct2d = np.array(list_struct2d) | 243 | np_struct2d = np.array(list_struct2d) |
229 | np_contacts = np.array(list_contacts) | 244 | np_contacts = np.array(list_contacts) |
230 | 245 | ||
231 | data_to_plot = [np_struct2d, np_contacts] | 246 | data_to_plot = [np_struct2d, np_contacts] |
247 | + median_2d = np.median(np_struct2d) | ||
248 | + median_ctc = np.median(np_contacts) | ||
249 | + print("mediane 2D: " + str(median_2d) + "\n") | ||
250 | + print("mediane ctc: " + str(median_ctc) + "\n") | ||
232 | 251 | ||
233 | fig = plt.figure() | 252 | fig = plt.figure() |
234 | 253 | ||
... | @@ -249,14 +268,111 @@ def visualization(list_struct2d, list_contacts, function, color, lines_color): | ... | @@ -249,14 +268,111 @@ def visualization(list_struct2d, list_contacts, function, color, lines_color): |
249 | 268 | ||
250 | for v in violins['bodies']: | 269 | for v in violins['bodies']: |
251 | v.set_facecolor(color) | 270 | v.set_facecolor(color) |
252 | - plt.savefig('visualisation' + function + '.png', bbox_inches='tight') | 271 | + plt.savefig('visualisation_16_06_MFE_' + function + '.png', bbox_inches='tight') |
272 | + | ||
273 | +def get_list_structs_contacts(path_benchmark, estimator, function): | ||
274 | + myfile = open(path_benchmark, "r") | ||
275 | + list_name = [] | ||
276 | + | ||
277 | + complete_list_struct2d_F = [] | ||
278 | + complete_list_contacts_F = [] | ||
279 | + | ||
280 | + name = myfile.readline() | ||
281 | + contacts = myfile.readline() | ||
282 | + seq = myfile.readline() | ||
283 | + structure2d = myfile.readline() | ||
284 | + count = 0 | ||
285 | + while seq: | ||
286 | + name = name[6:].strip() | ||
287 | + count = count + 1 | ||
288 | + file_path = "results/test_" + name + ".json_pm" + function +"_" + estimator | ||
289 | + if os.path.isfile(file_path): | ||
290 | + file_result = open(file_path, "r") | ||
291 | + list_struct2d_F = [] | ||
292 | + list_contacts_F = [] | ||
293 | + list_name.append(name) | ||
294 | + title_prd = file_result.readline() | ||
295 | + structure_prd = file_result.readline() | ||
296 | + sequence = structure_prd | ||
297 | + while structure_prd: | ||
298 | + structure_prd = file_result.readline() | ||
299 | + if (len(structure_prd) != 0): | ||
300 | + mcc_tab = compare_two_structures(structure2d, structure_prd[:len(sequence)]) | ||
301 | + mcc_str = mattews_corr_coeff(mcc_tab[0], mcc_tab[1], mcc_tab[2], mcc_tab[3]) | ||
302 | + list_struct2d_F.append(mcc_str) | ||
303 | + | ||
304 | + contacts_prd = file_result.readline() | ||
305 | + if (len(contacts_prd) == len(contacts)): | ||
306 | + mcc_tab = compare_two_contacts(contacts, contacts_prd) | ||
307 | + mcc_ctc = mattews_corr_coeff(mcc_tab[0], mcc_tab[1], mcc_tab[2], mcc_tab[3]) | ||
308 | + list_contacts_F.append(mcc_ctc) | ||
309 | + complete_list_struct2d_F.append(list_struct2d_F) | ||
310 | + complete_list_contacts_F.append(list_contacts_F) | ||
311 | + name = myfile.readline() | ||
312 | + contacts = myfile.readline() | ||
313 | + seq = myfile.readline() | ||
314 | + structure2d = myfile.readline() | ||
315 | + return [list_name, complete_list_struct2d_F, complete_list_contacts_F] | ||
316 | + myfile.close() | ||
317 | + | ||
318 | +def visualization_all_mcc(path_benchmark, estimator, function, color, lines_color): | ||
319 | + | ||
320 | + list_name = get_list_structs_contacts(path_benchmark, estimator, function)[0] | ||
321 | + tab_struct2d = get_list_structs_contacts(path_benchmark, estimator, function)[1] | ||
322 | + tab_contacts = get_list_structs_contacts(path_benchmark, estimator, function)[2] | ||
323 | + | ||
324 | + np_struct2d = np.array(tab_struct2d) | ||
325 | + size = len(tab_struct2d) | ||
326 | + list_median_str = [] | ||
327 | + for i in range(size): | ||
328 | + list_median_str.append(np.median(np_struct2d[i])) | ||
329 | + | ||
330 | + data = [x for _, x in sorted(zip(list_median_str, tab_struct2d))] | ||
331 | + boxName = [x for _, x in sorted(zip(list_median_str, list_name))] | ||
332 | + absciss = len(data) | ||
333 | + | ||
334 | + plt.figure(figsize=(25,4),dpi=200) | ||
335 | + plt.xticks(rotation=90) | ||
336 | + plt.boxplot(data) | ||
337 | + for i in range(absciss): | ||
338 | + y =data[i] | ||
339 | + x = np.random.normal(1 + i, 0.04, size=len(y)) | ||
340 | + plt.scatter(x, y) | ||
341 | + plt.xticks(np.arange(1, absciss + 1), boxName) | ||
342 | + | ||
343 | + plt.xlabel('nom de la séquence') | ||
344 | + plt.ylabel('MCC') | ||
345 | + plt.savefig('visualisation_128arn_structure2d_' + estimator + "_" + function + '.png', bbox_inches='tight') | ||
346 | + | ||
347 | + np_contacts = np.array(tab_contacts) | ||
348 | + size = len(tab_contacts) | ||
349 | + list_median_ctc = [] | ||
350 | + for i in range(size): | ||
351 | + list_median_ctc.append(np.median(np_contacts[i])) | ||
352 | + | ||
353 | + data = [x for _, x in sorted(zip(list_median_ctc, tab_contacts))] | ||
354 | + boxName = [x for _, x in sorted(zip(list_median_ctc, list_name))] | ||
355 | + absciss = len(data) | ||
356 | + | ||
357 | + plt.figure(figsize=(25, 4), dpi=200) | ||
358 | + plt.xticks(rotation=90) | ||
359 | + plt.boxplot(data) | ||
360 | + for i in range(absciss): | ||
361 | + y = data[i] | ||
362 | + x = np.random.normal(1 + i, 0.04, size=len(y)) | ||
363 | + plt.scatter(x, y) | ||
364 | + plt.xticks(np.arange(1, absciss + 1), boxName) | ||
365 | + | ||
366 | + plt.xlabel('nom de la séquence') | ||
367 | + plt.ylabel('MCC') | ||
368 | + plt.savefig('visualisation_128arn_contacts_' + estimator + "_" + function + '.png', bbox_inches='tight') | ||
253 | 369 | ||
254 | #cmd = ("cppsrc/Scripts/create") | 370 | #cmd = ("cppsrc/Scripts/create") |
255 | #cmd0 = ("cppsrc/Scripts/addDelimiter") | 371 | #cmd0 = ("cppsrc/Scripts/addDelimiter") |
256 | #cmd1 = ("cppsrc/Scripts/countPattern") | 372 | #cmd1 = ("cppsrc/Scripts/countPattern") |
257 | #cmd2 = ("cppsrc/Scripts/deletePdb") | 373 | #cmd2 = ("cppsrc/Scripts/deletePdb") |
258 | 374 | ||
259 | -myfile = open("data/modules/ISAURE/Motifs_version_initiale/benchmark.txt", "r") | 375 | +"""myfile = open("data/modules/ISAURE/Motifs_version_initiale/benchmark.txt", "r") |
260 | name = myfile.readline() | 376 | name = myfile.readline() |
261 | contacts = myfile.readline() | 377 | contacts = myfile.readline() |
262 | seq = myfile.readline() | 378 | seq = myfile.readline() |
... | @@ -266,36 +382,43 @@ list_struct2d_E = [] | ... | @@ -266,36 +382,43 @@ list_struct2d_E = [] |
266 | list_contacts_E = [] | 382 | list_contacts_E = [] |
267 | list_struct2d_F = [] | 383 | list_struct2d_F = [] |
268 | list_contacts_F = [] | 384 | list_contacts_F = [] |
269 | -count = 0 | 385 | +countE = 0 |
386 | +countF = 0 | ||
270 | while seq: | 387 | while seq: |
271 | name = name[6:].strip() | 388 | name = name[6:].strip() |
272 | print(name) | 389 | print(name) |
273 | - """ | ||
274 | run_test(cmd2 + " " + name + ".fa", log) | 390 | run_test(cmd2 + " " + name + ".fa", log) |
275 | print(cmd2 + " " + name + ".fa") | 391 | print(cmd2 + " " + name + ".fa") |
276 | - """ | 392 | + |
277 | - cmd3 = create_command(name) | 393 | + cmd3 = create_command_E(name) |
278 | os.system(cmd3) | 394 | os.system(cmd3) |
279 | 395 | ||
280 | - """file_path = "results/test_" + name + ".json_pmE_MEA" | 396 | + file_path = "results/test_" + name + ".json_pmE_MFE" |
281 | if os.path.isfile(file_path): | 397 | if os.path.isfile(file_path): |
282 | tabE = write_mcc_in_file_E(name, contacts, structure2d) | 398 | tabE = write_mcc_in_file_E(name, contacts, structure2d) |
283 | list_contacts_E.append(tabE[0]) | 399 | list_contacts_E.append(tabE[0]) |
284 | - list_struct2d_E.append(tabE[1])""" | 400 | + list_struct2d_E.append(tabE[1]) |
401 | + countE = countE + 1 | ||
402 | + | ||
403 | + cmd3 = create_command_F(name) | ||
404 | + os.system(cmd3) | ||
285 | 405 | ||
286 | file_path = "results/test_" + name + ".json_pmF_MFE" | 406 | file_path = "results/test_" + name + ".json_pmF_MFE" |
287 | if os.path.isfile(file_path): | 407 | if os.path.isfile(file_path): |
288 | tabF = write_mcc_in_file_F(name, contacts, structure2d) | 408 | tabF = write_mcc_in_file_F(name, contacts, structure2d) |
289 | list_contacts_F.append(tabF[0]) | 409 | list_contacts_F.append(tabF[0]) |
290 | list_struct2d_F.append(tabF[1]) | 410 | list_struct2d_F.append(tabF[1]) |
291 | - count = count + 1 | 411 | + countF = countF + 1 |
292 | 412 | ||
293 | name = myfile.readline() | 413 | name = myfile.readline() |
294 | contacts = myfile.readline() | 414 | contacts = myfile.readline() |
295 | seq = myfile.readline() | 415 | seq = myfile.readline() |
296 | structure2d = myfile.readline() | 416 | structure2d = myfile.readline() |
297 | 417 | ||
298 | -"""visualization(list_struct2d_E, list_contacts_E, 'E', 'red', '#900C3F')""" | 418 | +visualization_best_mcc(list_struct2d_E, list_contacts_E, 'E', 'red', '#900C3F') |
299 | -visualization(list_struct2d_F, list_contacts_F, 'F', 'blue', '#0900FF') | 419 | +visualization_best_mcc(list_struct2d_F, list_contacts_F, 'F', 'blue', '#0900FF') |
300 | -print("count: " + str(count) + "\n") | 420 | +print("countE: " + str(countE) + "\n") |
301 | -myfile.close() | 421 | +print("countF: " + str(countF) + "\n") |
422 | +myfile.close()""" | ||
423 | +path_benchmark = "data/modules/ISAURE/Motifs_version_initiale/benchmark.txt" | ||
424 | +visualization_all_mcc(path_benchmark,'MEA', 'F', 'blue', '#0900FF') | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment