Louis BECQUEY

rien

...@@ -1167,6 +1167,10 @@ print("==> %s ARN were predicted with all methods successful." % is_all(len(x_no ...@@ -1167,6 +1167,10 @@ print("==> %s ARN were predicted with all methods successful." % is_all(len(x_no
1167 test = stats.friedmanchisquare(*x_noPK_fully) 1167 test = stats.friedmanchisquare(*x_noPK_fully)
1168 print("Friedman test without PK: H0 = 'The position parameter of all distributions is equal', p-value = ", test.pvalue) 1168 print("Friedman test without PK: H0 = 'The position parameter of all distributions is equal', p-value = ", test.pvalue)
1169 # ==> No they are not, but none does better, no need to test one further. 1169 # ==> No they are not, but none does better, no need to test one further.
1170 +test = stats.wilcoxon(x_noPK_fully[1], x_noPK_fully[2])
1171 +print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of RNA-MoIP and RawA are equal', p-value = ", test.pvalue)
1172 +test = stats.wilcoxon(x_noPK_fully[1], x_noPK_fully[3])
1173 +print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of RNA-MoIP and RawB are equal', p-value = ", test.pvalue)
1170 1174
1171 # ================= Statistics (with pseudoknots) ======================== 1175 # ================= Statistics (with pseudoknots) ========================
1172 1176
...@@ -1260,154 +1264,157 @@ test = stats.wilcoxon(x_PK_fully[0], x_PK_fully[11]) ...@@ -1260,154 +1264,157 @@ test = stats.wilcoxon(x_PK_fully[0], x_PK_fully[11])
1260 print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and Jar3dD are equal', p-value = ", test.pvalue) 1264 print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and Jar3dD are equal', p-value = ", test.pvalue)
1261 1265
1262 1266
1263 -# ================== Print results for application cases ===================== 1267 +# # ================== Print results for application cases =====================
1264 - 1268 +
1265 -labels = ["Biokop","Biokop","RawA","RawB","BayesPairingA","BayesPairingB","BayesPairingC","BayesPairingD","JAR3DA","JAR3DB","JAR3DC","JAR3DD","BGSUBayesPairingA","BGSUBayesPairingB","BGSUBayesPairingC","BGSUBayesPairingD"] 1269 +# labels = ["Biokop","Biokop","RawA","RawB","BayesPairingA","BayesPairingB","BayesPairingC","BayesPairingD","JAR3DA","JAR3DB","JAR3DC","JAR3DD","BGSUBayesPairingA","BGSUBayesPairingB","BGSUBayesPairingC","BGSUBayesPairingD"]
1266 -print("RNAsubopt",":",x_noPK[0]) 1270 +# print("RNAsubopt",":",x_noPK[0])
1267 -print("RNA-MOIP",":",x_noPK[1]) 1271 +# print("RNA-MOIP",":",x_noPK[1])
1268 -for data, name in zip(x_PK, labels): 1272 +# for data, name in zip(x_PK, labels):
1269 - print(name,":",data) 1273 +# print(name,":",data)
1270 -labels = ["RNAsubopt","Biokop\t", "RNA MoIP\t","RawA\t","RawB\t","BayesPairingA","BayesPairingB","BayesPairingC","BayesPairingD","JAR3DA\t","JAR3DB\t","JAR3DC\t","JAR3DD\t","BGSUBPairingA","BGSUBPairingB","BGSUBPairingC","BGSUBPairingD"] 1274 +# labels = ["RNAsubopt","Biokop\t", "RNA MoIP\t","RawA\t","RawB\t","BayesPairingA","BayesPairingB","BayesPairingC","BayesPairingD","JAR3DA\t","JAR3DB\t","JAR3DC\t","JAR3DD\t","BGSUBPairingA","BGSUBPairingB","BGSUBPairingC","BGSUBPairingD"]
1271 -for r in RNAcontainer: 1275 +# # for r in RNAcontainer:
1272 - print("\n",r.header_,"\nTrue structure:\t", r.true2d) 1276 +# print("\n",r.header_,"\nTrue structure:\t", r.true2d)
1273 - for m, name in zip([r.rnasubopt, r.biokop, r.rnamoip, 1277 +# for m, name in zip([r.rnasubopt, r.biokop, r.rnamoip,
1274 - r.biorseoRawA, 1278 +# r.biorseoRawA,
1275 - r.biorseoRawB, 1279 +# r.biorseoRawB,
1276 - r.biorseoBayesPairA, 1280 +# r.biorseoBayesPairA,
1277 - r.biorseoBayesPairB, 1281 +# r.biorseoBayesPairB,
1278 - r.biorseoBayesPairC, 1282 +# r.biorseoBayesPairC,
1279 - r.biorseoBayesPairD, 1283 +# r.biorseoBayesPairD,
1280 - r.biorseoBGSUJAR3DA, 1284 +# r.biorseoBGSUJAR3DA,
1281 - r.biorseoBGSUJAR3DB, 1285 +# r.biorseoBGSUJAR3DB,
1282 - r.biorseoBGSUJAR3DC, 1286 +# r.biorseoBGSUJAR3DC,
1283 - r.biorseoBGSUJAR3DD, 1287 +# r.biorseoBGSUJAR3DD,
1284 - r.biorseoBGSUBayesPairA, 1288 +# r.biorseoBGSUBayesPairA,
1285 - r.biorseoBGSUBayesPairB, 1289 +# r.biorseoBGSUBayesPairB,
1286 - r.biorseoBGSUBayesPairC, 1290 +# r.biorseoBGSUBayesPairC,
1287 - r.biorseoBGSUBayesPairD ], labels): 1291 +# r.biorseoBGSUBayesPairD ], labels):
1288 - print(name+":\t",m.best_pred) 1292 +# print(name+":\t",m.best_pred)
1289 - 1293 +
1290 -# # ================= PLOTS OF RESULTS ======================================= 1294 +# ================= PLOTS OF RESULTS =======================================
1291 - 1295 +
1292 -# merge = [ x_PK_fully[0], # Biokop 1296 +merge = [ x_PK_fully[0], # Biokop
1293 -# x_noPK_fully[0], # RNA subopt 1297 + x_noPK_fully[0], # RNA subopt
1294 -# x_noPK_fully[1], # RNA MoIP 1298 + x_noPK_fully[1], # RNA MoIP
1295 -# x_noPK_fully[2], x_PK_fully[2], #biorseoRawA 1299 + x_noPK_fully[2], x_PK_fully[2], #biorseoRawA
1296 -# x_noPK_fully[3], x_PK_fully[3], #biorseoRawB 1300 + x_noPK_fully[3], x_PK_fully[3], #biorseoRawB
1297 -# x_noPK_fully[4], x_PK_fully[4], #biorseoBayesPairA 1301 + x_noPK_fully[4], x_PK_fully[4], #biorseoBayesPairA
1298 -# x_noPK_fully[5], x_PK_fully[5], #biorseoBayesPairB 1302 + x_noPK_fully[5], x_PK_fully[5], #biorseoBayesPairB
1299 -# x_noPK_fully[6], x_PK_fully[6], #biorseoBayesPairC 1303 + x_noPK_fully[6], x_PK_fully[6], #biorseoBayesPairC
1300 -# x_noPK_fully[7], x_PK_fully[7], #biorseoBayesPairD 1304 + x_noPK_fully[7], x_PK_fully[7], #biorseoBayesPairD
1301 -# x_noPK_fully[8], x_PK_fully[8], #biorseoBGSUJAR3DA 1305 + x_noPK_fully[8], x_PK_fully[8], #biorseoBGSUJAR3DA
1302 -# x_noPK_fully[9], x_PK_fully[9], #biorseoBGSUJAR3DB 1306 + x_noPK_fully[9], x_PK_fully[9], #biorseoBGSUJAR3DB
1303 -# x_noPK_fully[10], x_PK_fully[10], #biorseoBGSUJAR3DC 1307 + x_noPK_fully[10], x_PK_fully[10], #biorseoBGSUJAR3DC
1304 -# x_noPK_fully[11], x_PK_fully[11], #biorseoBGSUJAR3DD 1308 + x_noPK_fully[11], x_PK_fully[11], #biorseoBGSUJAR3DD
1305 -# x_noPK_fully[12], x_PK_fully[12], #biorseoBGSUBayesPairA 1309 + x_noPK_fully[12], x_PK_fully[12], #biorseoBGSUBayesPairA
1306 -# x_noPK_fully[13], x_PK_fully[13], #biorseoBGSUBayesPairB 1310 + x_noPK_fully[13], x_PK_fully[13], #biorseoBGSUBayesPairB
1307 -# x_noPK_fully[14], x_PK_fully[14], #biorseoBGSUBayesPairC 1311 + x_noPK_fully[14], x_PK_fully[14], #biorseoBGSUBayesPairC
1308 -# x_noPK_fully[15], x_PK_fully[15], #biorseoBGSUBayesPairD 1312 + x_noPK_fully[15], x_PK_fully[15], #biorseoBGSUBayesPairD
1309 -# ] 1313 +]
1310 -
1311 -# colors = [ 'green', 'blue', 'goldenrod',
1312 -# 'darkturquoise', 'darkturquoise',
1313 -# 'red', 'red',
1314 -# 'firebrick', 'firebrick',
1315 -# 'limegreen', 'limegreen',
1316 -# 'olive', 'olive',
1317 -# 'forestgreen', 'forestgreen',
1318 -# 'lime', 'lime',
1319 -# 'darkcyan', 'darkcyan',
1320 -# 'royalblue', 'royalblue',
1321 -# 'navy', 'navy',
1322 -# 'limegreen', 'limegreen',
1323 -# 'olive', 'olive',
1324 -# 'forestgreen', 'forestgreen',
1325 -# 'lime', 'lime'
1326 -# ]
1327 -# labels = [ "Biokop", "RNAsubopt",
1328 -# "RNA MoIP",
1329 -# "$f_{1A}$",
1330 -# "$f_{1B}$",
1331 -# "$f_{1A}$",
1332 -# "$f_{1B}$",
1333 -# "$f_{1C}$",
1334 -# "$f_{1D}$",
1335 -# "$f_{1A}$",
1336 -# "$f_{1B}$",
1337 -# "$f_{1C}$",
1338 -# "$f_{1D}$",
1339 -# "$f_{1A}$",
1340 -# "$f_{1B}$",
1341 -# "$f_{1C}$",
1342 -# "$f_{1D}$"
1343 -# ]
1344 -
1345 -# ax = plt.subplot(211)
1346 -# ax.tick_params(labelsize=12)
1347 -# for y in [ i/10 for i in range(11) ]:
1348 -# plt.axhline(y=y, color="grey", linestyle="--", linewidth=1)
1349 -# colors = [ 'blue','goldenrod',
1350 -# 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime',
1351 -# 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime'
1352 -# ]
1353 -# bplot = plt.boxplot(x_noPK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97])
1354 -# for patch, color in zip(bplot['boxes'], colors):
1355 -# patch.set_facecolor(color)
1356 -# # plt.axhline(y=0, color="black", linewidth=1)
1357 -# # plt.axhline(y=1, color="black", linewidth=1)
1358 -# plt.xticks([1.0+i for i in range(16)], labels[1:])
1359 -# plt.ylim((0, 1.01))
1360 -# plt.ylabel("MCC", fontsize=12)
1361 -# plt.subplots_adjust(left=0.05, right=0.95)
1362 -# # plt.title("Performance without pseudoknots (%d RNAs included)" % len(x_noPK_fully[0]))
1363 -
1364 -
1365 -# ax = plt.subplot(212)
1366 -# ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False, labelsize=12)
1367 -# ax.xaxis.set_label_position('top')
1368 -# for y in [ i/10 for i in range(11) ]:
1369 -# plt.axhline(y=y, color="grey", linestyle="--", linewidth=1)
1370 -# colors = [ 'green','green',
1371 -# 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime',
1372 -# 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime'
1373 -# ]
1374 -# labels = [ "Biokop"]
1375 -# bplot = plt.boxplot(x_PK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97])
1376 -# for patch, color in zip(bplot['boxes'], colors):
1377 -# patch.set_facecolor(color)
1378 -# # plt.axhline(y=0, color="black", linewidth=1)
1379 -# # plt.axhline(y=1, color="black", linewidth=1)
1380 -# plt.xticks([1.0+i for i in range(16)], labels)
1381 -# plt.ylim((0, 1.01))
1382 -# plt.ylabel("MCC", fontsize=12)
1383 -# plt.subplots_adjust(left=0.05, right=0.95)
1384 -# # plt.text(6.2,-0.3,"Performance with pseudoknots (%d RNAs included)" % len(x_PK_fully[0]), fontsize=12)
1385 1314
1315 +colors = [ 'green', 'blue', 'goldenrod',
1316 + 'darkturquoise', 'darkturquoise',
1317 + 'red', 'red',
1318 + 'firebrick', 'firebrick',
1319 + 'limegreen', 'limegreen',
1320 + 'olive', 'olive',
1321 + 'forestgreen', 'forestgreen',
1322 + 'lime', 'lime',
1323 + 'darkcyan', 'darkcyan',
1324 + 'royalblue', 'royalblue',
1325 + 'navy', 'navy',
1326 + 'limegreen', 'limegreen',
1327 + 'olive', 'olive',
1328 + 'forestgreen', 'forestgreen',
1329 + 'lime', 'lime'
1330 +]
1331 +labels = [ "Biokop", "RNAsubopt",
1332 + "RNA MoIP",
1333 + "$f_{1A}$",
1334 + "$f_{1B}$",
1335 + "$f_{1A}$",
1336 + "$f_{1B}$",
1337 + "$f_{1C}$",
1338 + "$f_{1D}$",
1339 + "$f_{1A}$",
1340 + "$f_{1B}$",
1341 + "$f_{1C}$",
1342 + "$f_{1D}$",
1343 + "$f_{1A}$",
1344 + "$f_{1B}$",
1345 + "$f_{1C}$",
1346 + "$f_{1D}$"
1347 +]
1386 1348
1387 -# plt.show() 1349 +ax = plt.subplot(211)
1350 +ax.tick_params(labelsize=12)
1351 +for y in [ i/10 for i in range(11) ]:
1352 + plt.axhline(y=y, color="grey", linestyle="--", linewidth=1)
1353 +colors = [ 'blue','goldenrod',
1354 + 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime',
1355 + 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime'
1356 + ]
1357 +bplot = plt.boxplot(x_noPK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97])
1358 +for patch, color in zip(bplot['boxes'], colors):
1359 + patch.set_facecolor(color)
1360 +# plt.axhline(y=0, color="black", linewidth=1)
1361 +# plt.axhline(y=1, color="black", linewidth=1)
1362 +plt.xticks([1.0+i for i in range(16)], labels[1:])
1363 +plt.ylim((0, 1.01))
1364 +plt.ylabel("MCC", fontsize=12)
1365 +plt.subplots_adjust(left=0.05, right=0.95)
1366 +# plt.title("Performance without pseudoknots (%d RNAs included)" % len(x_noPK_fully[0]))
1367 +
1368 +
1369 +ax = plt.subplot(212)
1370 +ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False, labelsize=12)
1371 +ax.xaxis.set_label_position('top')
1372 +for y in [ i/10 for i in range(11) ]:
1373 + plt.axhline(y=y, color="grey", linestyle="--", linewidth=1)
1374 +colors = [ 'green','green',
1375 + 'red', 'firebrick','limegreen','olive', 'forestgreen', 'lime',
1376 + 'darkturquoise', 'darkcyan', 'royalblue', 'navy', 'limegreen','olive', 'forestgreen', 'lime'
1377 + ]
1378 +labels = [ "Biokop"]
1379 +bplot = plt.boxplot(x_PK_fully, vert=True, patch_artist=True, notch=False, whis=[3,97])
1380 +for patch, color in zip(bplot['boxes'], colors):
1381 + patch.set_facecolor(color)
1382 +# plt.axhline(y=0, color="black", linewidth=1)
1383 +# plt.axhline(y=1, color="black", linewidth=1)
1384 +plt.xticks([1.0+i for i in range(16)], labels)
1385 +plt.ylim((0, 1.01))
1386 +plt.ylabel("MCC", fontsize=12)
1387 +plt.subplots_adjust(left=0.05, right=0.95)
1388 +# plt.text(6.2,-0.3,"Performance with pseudoknots (%d RNAs included)" % len(x_PK_fully[0]), fontsize=12)
1389 +
1390 +
1391 +plt.show()
1388 1392
1389 1393
1390 # # ================== MCC performance ==================================== 1394 # # ================== MCC performance ====================================
1391 # # plt.subplot(141) 1395 # # plt.subplot(141)
1396 +
1397 +# RNAs_fully_predicted.sort(key=lambda x: x.rnasubopt.max_mcc)
1398 +
1392 # x = [ 1399 # x = [
1393 # [ rna.rnasubopt.max_mcc for rna in RNAs_fully_predicted], 1400 # [ rna.rnasubopt.max_mcc for rna in RNAs_fully_predicted],
1394 -# # [ rna.rnamoip.max_mcc for rna in RNAs_fully_predicted], 1401 +# [ rna.rnamoip.max_mcc for rna in RNAs_fully_predicted],
1395 -# [ rna.biorseoRawA.max_mcc for rna in RNAs_fully_predicted], 1402 +# #[ rna.biorseoRawA.max_mcc for rna in RNAs_fully_predicted],
1396 # # [ rna.biorseoRawB.max_mcc for rna in RNAs_fully_predicted], 1403 # # [ rna.biorseoRawB.max_mcc for rna in RNAs_fully_predicted],
1397 -# [ rna.biokop.max_mcc for rna in RNAs_fully_predicted], 1404 +# #[ rna.biokop.max_mcc for rna in RNAs_fully_predicted],
1398 -# [ rna.biorseoBGSUJAR3DA.max_mcc for rna in RNAs_fully_predicted] 1405 +# #[ rna.biorseoBGSUJAR3DA.max_mcc for rna in RNAs_fully_predicted]
1399 # ] 1406 # ]
1400 -# colors = ['xkcd:blue','xkcd:red', 'green', 'cyan'] 1407 +# colors = ['xkcd:blue','goldenrod']#, 'green', 'cyan']
1401 -# labels = ["Best RNAsubopt prediction", "Best RawA prediction", "Best Biokop prediction", "Best JAR3DA prediction"] 1408 +# labels = ["Best RNAsubopt MCC", "Best RNA-MoIP MCC"]#, "Best Biokop prediction", "Best JAR3DA prediction"]
1402 # for y, col, lab in zip(x, colors, labels): 1409 # for y, col, lab in zip(x, colors, labels):
1403 # x_data = [ i for i in range(len(y)) if y[i]] 1410 # x_data = [ i for i in range(len(y)) if y[i]]
1404 # y_data = [ i for i in y if i] 1411 # y_data = [ i for i in y if i]
1405 # plt.scatter(x_data, y_data, color=col, label=lab, marker='o', s=2.5) 1412 # plt.scatter(x_data, y_data, color=col, label=lab, marker='o', s=2.5)
1406 # plt.axhline(y=0, color='black', linewidth=1) 1413 # plt.axhline(y=0, color='black', linewidth=1)
1407 # plt.axvline(x=0, color='black', linewidth=1) 1414 # plt.axvline(x=0, color='black', linewidth=1)
1408 -# plt.xlabel("RNA Strand verified tRNA structures (10 < |nt| < 100)") 1415 +# # plt.xlabel("RNA Strand verified tRNA structures (10 < |nt| < 100)")
1409 # plt.ylabel("Mattews Correlation Coefficient") 1416 # plt.ylabel("Mattews Correlation Coefficient")
1410 -# plt.title("Performance of the prediction method") 1417 +# # plt.title("Performance of the prediction method")
1411 # plt.legend(loc="lower right") 1418 # plt.legend(loc="lower right")
1412 # plt.show() 1419 # plt.show()
1413 1420
......