Nathalie BERNARD

script divise en 2 le boxplot des benchmark

...@@ -315,6 +315,24 @@ def get_list_structs_contacts(path_benchmark, estimator, function): ...@@ -315,6 +315,24 @@ def get_list_structs_contacts(path_benchmark, estimator, function):
315 return [list_name, complete_list_struct2d_F, complete_list_contacts_F] 315 return [list_name, complete_list_struct2d_F, complete_list_contacts_F]
316 myfile.close() 316 myfile.close()
317 317
318 +def get_half(list):
319 +
320 + first_half = []
321 + second_half = []
322 + if (len(list) % 2 == 0):
323 + middle = len(list) / 2
324 + else:
325 + middle = len(list) / 2 + 0.5
326 +
327 + for i in range (int(middle)):
328 + first_half.append(list[i])
329 +
330 + for i in range (int(middle)):
331 + if i + int(middle) < len(list):
332 + second_half.append(list[i + int(middle)])
333 +
334 + return [first_half, second_half]
335 +
318 def visualization_all_mcc(path_benchmark, estimator, function): 336 def visualization_all_mcc(path_benchmark, estimator, function):
319 337
320 list_name = get_list_structs_contacts(path_benchmark, estimator, function)[0] 338 list_name = get_list_structs_contacts(path_benchmark, estimator, function)[0]
...@@ -329,21 +347,42 @@ def visualization_all_mcc(path_benchmark, estimator, function): ...@@ -329,21 +347,42 @@ def visualization_all_mcc(path_benchmark, estimator, function):
329 347
330 data = [x for _, x in sorted(zip(list_median_str, tab_struct2d))] 348 data = [x for _, x in sorted(zip(list_median_str, tab_struct2d))]
331 boxName = [x for _, x in sorted(zip(list_median_str, list_name))] 349 boxName = [x for _, x in sorted(zip(list_median_str, list_name))]
332 - absciss = len(data)
333 350
334 - plt.figure(figsize=(25,4),dpi=200) 351 + if (len(data) % 2 == 0):
352 + absciss = len(data) / 2
353 + else:
354 + absciss = len(data) / 2 + 0.5
355 +
356 + divide_tab_name = get_half(boxName)
357 + divide_tab_data = get_half(data)
358 +
359 + plt.figure(figsize=(15,4),dpi=200)
335 plt.xticks(rotation=90) 360 plt.xticks(rotation=90)
336 - plt.boxplot(data, medianprops=dict(color='black')) 361 + plt.boxplot(divide_tab_data[0], medianprops=dict(color='black'))
337 - for i in range(absciss): 362 + for i in range(int(absciss)):
338 y =data[i] 363 y =data[i]
339 x = np.random.normal(1 + i, 0.04, size=len(y)) 364 x = np.random.normal(1 + i, 0.04, size=len(y))
340 plt.scatter(x, y) 365 plt.scatter(x, y)
341 - plt.xticks(np.arange(1, absciss + 1), boxName) 366 + plt.xticks(np.arange(1, absciss + 1), divide_tab_name[0])
342 367
343 plt.xlabel('nom de la séquence') 368 plt.xlabel('nom de la séquence')
344 - plt.ylabel('MCC') 369 + plt.ylabel('MCC (appariements)')
345 plt.savefig('visualisation_128arn_structure2d_' + estimator + "_" + function + '.png', bbox_inches='tight') 370 plt.savefig('visualisation_128arn_structure2d_' + estimator + "_" + function + '.png', bbox_inches='tight')
346 371
372 + plt.figure(figsize=(15, 4), dpi=200)
373 + plt.xticks(rotation=90)
374 + plt.boxplot(divide_tab_data[1], medianprops=dict(color='black'))
375 + for i in range(len(data)):
376 + if i + int(absciss) < len(data):
377 + y = data[i + int(absciss)]
378 + x = np.random.normal(1 + i, 0.04, size=len(y))
379 + plt.scatter(x, y)
380 + plt.xticks(np.arange(1, absciss + 1), divide_tab_name[1])
381 +
382 + plt.xlabel('nom de la séquence')
383 + plt.ylabel('MCC')
384 + plt.savefig('visualisation_128arn_structure2d_' + estimator + "_" + function + '_2.png', bbox_inches='tight')
385 +
347 np_contacts = np.array(tab_contacts) 386 np_contacts = np.array(tab_contacts)
348 size = len(tab_contacts) 387 size = len(tab_contacts)
349 list_median_ctc = [] 388 list_median_ctc = []
...@@ -352,21 +391,42 @@ def visualization_all_mcc(path_benchmark, estimator, function): ...@@ -352,21 +391,42 @@ def visualization_all_mcc(path_benchmark, estimator, function):
352 391
353 data = [x for _, x in sorted(zip(list_median_ctc, tab_contacts))] 392 data = [x for _, x in sorted(zip(list_median_ctc, tab_contacts))]
354 boxName = [x for _, x in sorted(zip(list_median_ctc, list_name))] 393 boxName = [x for _, x in sorted(zip(list_median_ctc, list_name))]
355 - absciss = len(data)
356 394
357 - plt.figure(figsize=(25, 4), dpi=200) 395 + if (len(data) % 2 == 0) :
396 + absciss = len(data)/2
397 + else :
398 + absciss = len(data)/2 + 0.5
399 +
400 + divide_tab_name = get_half(boxName)
401 + divide_tab_data = get_half(data)
402 +
403 + plt.figure(figsize=(15, 4), dpi=200)
358 plt.xticks(rotation=90) 404 plt.xticks(rotation=90)
359 - plt.boxplot(data, medianprops=dict(color='black')) 405 + plt.boxplot(divide_tab_data[0], medianprops=dict(color='black'))
360 - for i in range(absciss): 406 + for i in range(int(absciss)):
361 y = data[i] 407 y = data[i]
362 x = np.random.normal(1 + i, 0.04, size=len(y)) 408 x = np.random.normal(1 + i, 0.04, size=len(y))
363 plt.scatter(x, y) 409 plt.scatter(x, y)
364 - plt.xticks(np.arange(1, absciss + 1), boxName) 410 + plt.xticks(np.arange(1, absciss + 1), divide_tab_name[0])
365 411
366 plt.xlabel('nom de la séquence') 412 plt.xlabel('nom de la séquence')
367 - plt.ylabel('MCC') 413 + plt.ylabel('MCC (contacts)')
368 plt.savefig('visualisation_128arn_contacts_' + estimator + "_" + function + '.png', bbox_inches='tight') 414 plt.savefig('visualisation_128arn_contacts_' + estimator + "_" + function + '.png', bbox_inches='tight')
369 415
416 + plt.figure(figsize=(15, 4), dpi=200)
417 + plt.xticks(rotation=90)
418 + plt.boxplot(divide_tab_data[1], medianprops=dict(color='black'))
419 + for i in range(len(data)):
420 + if i + int(absciss) < len(data) :
421 + y = data[i + int(absciss)]
422 + x = np.random.normal(1 + i, 0.04, size=len(y))
423 + plt.scatter(x, y)
424 + plt.xticks(np.arange(1, absciss + 1), divide_tab_name[1])
425 +
426 + plt.xlabel('nom de la séquence')
427 + plt.ylabel('MCC')
428 + plt.savefig('visualisation_128arn_contacts_' + estimator + "_" + function + '_2.png', bbox_inches='tight')
429 +
370 #cmd = ("cppsrc/Scripts/create") 430 #cmd = ("cppsrc/Scripts/create")
371 #cmd0 = ("cppsrc/Scripts/addDelimiter") 431 #cmd0 = ("cppsrc/Scripts/addDelimiter")
372 #cmd1 = ("cppsrc/Scripts/countPattern") 432 #cmd1 = ("cppsrc/Scripts/countPattern")
...@@ -393,6 +453,7 @@ countF_MFE = 0 ...@@ -393,6 +453,7 @@ countF_MFE = 0
393 453
394 countE_MEA = 0 454 countE_MEA = 0
395 countF_MEA = 0 455 countF_MEA = 0
456 +"""
396 while seq: 457 while seq:
397 name = name[6:].strip() 458 name = name[6:].strip()
398 print(name) 459 print(name)
...@@ -452,10 +513,10 @@ visualization_best_mcc(list_struct2d_F_MEA, list_contacts_F_MEA, 'MEA', 'F', 'bl ...@@ -452,10 +513,10 @@ visualization_best_mcc(list_struct2d_F_MEA, list_contacts_F_MEA, 'MEA', 'F', 'bl
452 print("countE_MFE: " + str(countE_MFE) + "\n") 513 print("countE_MFE: " + str(countE_MFE) + "\n")
453 print("countF_MFE: " + str(countF_MFE) + "\n") 514 print("countF_MFE: " + str(countF_MFE) + "\n")
454 print("countE_MEA: " + str(countE_MEA) + "\n") 515 print("countE_MEA: " + str(countE_MEA) + "\n")
455 -print("countF_MEA: " + str(countF_MEA) + "\n") 516 +print("countF_MEA: " + str(countF_MEA) + "\n")"""
456 myfile.close() 517 myfile.close()
457 -#path_benchmark = "data/modules/ISAURE/Motifs_version_initiale/benchmark.txt"
458 -#visualization_all_mcc(path_benchmark,'MEA', 'F')
459 -#visualization_all_mcc(path_benchmark,'MEA', 'E')
460 -#visualization_all_mcc(path_benchmark,'MFE', 'E')
461 -#visualization_all_mcc(path_benchmark,'MFE', 'F')
...\ No newline at end of file ...\ No newline at end of file
518 +path_benchmark = "data/modules/ISAURE/Motifs_version_initiale/benchmark.txt"
519 +visualization_all_mcc(path_benchmark,'MEA', 'F')
520 +visualization_all_mcc(path_benchmark,'MEA', 'E')
521 +visualization_all_mcc(path_benchmark,'MFE', 'E')
522 +visualization_all_mcc(path_benchmark,'MFE', 'F')
...\ No newline at end of file ...\ No newline at end of file
......