Showing
11 changed files
with
40 additions
and
49 deletions
data/sec_structs/Readme.md
0 → 100644
1 | +What are this RNA data files ? | ||
2 | +=============================== | ||
3 | + | ||
4 | +## Raw (big) databases | ||
5 | +* RNA-Strand 2.0 (secondary_structures_database.dbn) : this file is a dataset supposed to be identical to RNA-Strand 2.0 (actually the file is present on IBISC machines for years now and nobody remembers how it was built). The former RNA Strand website is not online anymore (http://rnasoft.ca/strand). | ||
6 | +* bpRNA-1m_90 : this huge database gathers the data from other databases (CRW, PDB, Rfam, RNP, SPR, SRP, ...) and superseeds RNA-Strand (minus the structures that are only in NDB, sadly). Sequences have been prefiltered to have no more than 90% identity. Source : http://bprna.cgrb.oregonstate.edu/ | ||
7 | +* Pseudobase(++) : A database of biologically validated pseudoknots, from the time discovering a pseudoknot was something unusual. Pseudobase stays famous for its pseudoknot classification scheme. I scraped it myself to build the file. Source : https://www.ekevanbatenburg.nl/PKBASE/PKB.HTML | ||
8 | + | ||
9 | + | ||
10 | +## Filtered databases | ||
11 | +* verified_secondary_structures.dbn : The subset of RNA-Strand that was experimentally validated (basically, the ones for which a 3D structure was available, so the ones from NDB and PDB). | ||
12 | +* The _short.dbn ones : Same as its parent, but filtered using the filter.py script. | ||
13 | +* pseudoknots.dbn : Audrey Legendre's scrap of Pseudobase, which, for an unknow reason, does not contain all the available data, but nice descriptions of what the RNAs are. | ||
14 | + | ||
15 | + | ||
16 | +## Small test databases | ||
17 | +* RNA-MoIP dataset : The cherry-picked cases presented in Reinhartz et al. 2012 to show RNA-MoIP's performance. | ||
18 | +* applications.dbn : My cherry-picked cases presented in Becquey et al. 2020 to show Biorseo's performance. | ||
19 | +* example.dbn : an example database with only one RNA, for testing purposes | ||
20 | +* nothing.dbn : an example database with no RNAs, for testing purposes | ||
21 | + | ||
22 | + | ||
23 | +Enjoy benchmarking RNA structure prediction tools. | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
figures/number_of_solutions.png
0 → 100644
11.9 KB
37.2 KB
38.2 KB
36.1 KB
37 KB
File moved
... | @@ -158,7 +158,6 @@ def is_canonical_nts(seq): | ... | @@ -158,7 +158,6 @@ def is_canonical_nts(seq): |
158 | return False | 158 | return False |
159 | return True | 159 | return True |
160 | 160 | ||
161 | - | ||
162 | def is_canonical_bps(struct): | 161 | def is_canonical_bps(struct): |
163 | if "()" in struct: | 162 | if "()" in struct: |
164 | return False | 163 | return False |
... | @@ -207,7 +206,6 @@ def load_from_dbn(file, header_style=3): | ... | @@ -207,7 +206,6 @@ def load_from_dbn(file, header_style=3): |
207 | db.close() | 206 | db.close() |
208 | return container, pkcounter | 207 | return container, pkcounter |
209 | 208 | ||
210 | - | ||
211 | def parse_biokop(folder, basename, ext=".biok"): | 209 | def parse_biokop(folder, basename, ext=".biok"): |
212 | solutions = [] | 210 | solutions = [] |
213 | err = 0 | 211 | err = 0 |
... | @@ -248,7 +246,6 @@ def parse_biokop(folder, basename, ext=".biok"): | ... | @@ -248,7 +246,6 @@ def parse_biokop(folder, basename, ext=".biok"): |
248 | err = 1 | 246 | err = 1 |
249 | return None, err | 247 | return None, err |
250 | 248 | ||
251 | - | ||
252 | def parse_biorseo(folder, basename, ext): | 249 | def parse_biorseo(folder, basename, ext): |
253 | solutions = [] | 250 | solutions = [] |
254 | err = 0 | 251 | err = 0 |
... | @@ -272,21 +269,14 @@ def parse_biorseo(folder, basename, ext): | ... | @@ -272,21 +269,14 @@ def parse_biorseo(folder, basename, ext): |
272 | err = 1 | 269 | err = 1 |
273 | return None, err | 270 | return None, err |
274 | 271 | ||
275 | - | ||
276 | def prettify_biorseo(code): | 272 | def prettify_biorseo(code): |
277 | name = "" | 273 | name = "" |
278 | - if "bgsu" in code: | 274 | + if "json" in code: |
279 | - name += "RNA 3D Motif Atlas + " | 275 | + name += "JSON motifs + " |
280 | elif "rin" in code: | 276 | elif "rin" in code: |
281 | name += "CaRNAval + " | 277 | name += "CaRNAval + " |
282 | else: | 278 | else: |
283 | name += "Rna3Dmotifs + " | 279 | name += "Rna3Dmotifs + " |
284 | - if "raw" in code: | ||
285 | - name += "Direct P.M." | ||
286 | - if "byp" in code: | ||
287 | - name += "BPairing" | ||
288 | - if "jar3d" in code: | ||
289 | - name += "Jar3d" | ||
290 | # name += " + $f_{1" + code[-1] + "}$" | 280 | # name += " + $f_{1" + code[-1] + "}$" |
291 | return name | 281 | return name |
292 | 282 | ||
... | @@ -342,14 +332,9 @@ def process_extension(ax, pos, ext, nsolutions=False, xlabel="Best solution perf | ... | @@ -342,14 +332,9 @@ def process_extension(ax, pos, ext, nsolutions=False, xlabel="Best solution perf |
342 | if __name__ == "__main__": | 332 | if __name__ == "__main__": |
343 | try: | 333 | try: |
344 | opts, args = getopt.getopt( sys.argv[1:], "", | 334 | opts, args = getopt.getopt( sys.argv[1:], "", |
345 | - [ "biorseo_desc_byp_A", "biorseo_desc_byp_B", | 335 | + [ "biorseo_desc_A", "biorseo_desc_B", |
346 | - "biorseo_desc_byp_C", "biorseo_desc_byp_D", | 336 | + "biorseo_rin_A", "biorseo_rin_B", |
347 | - "biorseo_bgsu_byp_A", "biorseo_bgsu_byp_B", | 337 | + "biorseo_json_A", "biorseo_json_B", |
348 | - "biorseo_bgsu_byp_C", "biorseo_bgsu_byp_D", | ||
349 | - "biorseo_desc_raw_A", "biorseo_desc_raw_B", | ||
350 | - "biorseo_bgsu_jar3d_A", "biorseo_bgsu_jar3d_B", | ||
351 | - "biorseo_bgsu_jar3d_C", "biorseo_bgsu_jar3d_D", | ||
352 | - "biorseo_rin_raw_A", "biorseo_rin_raw_B", | ||
353 | "biokop", "folder=", "database=", "output=" | 338 | "biokop", "folder=", "database=", "output=" |
354 | ]) | 339 | ]) |
355 | except getopt.GetoptError as err: | 340 | except getopt.GetoptError as err: |
... | @@ -384,36 +369,19 @@ if __name__ == "__main__": | ... | @@ -384,36 +369,19 @@ if __name__ == "__main__": |
384 | 369 | ||
385 | if extension == "all": | 370 | if extension == "all": |
386 | parse = parse_biorseo | 371 | parse = parse_biorseo |
387 | - fig, ax = plt.subplots(4,5,figsize=(12,10), sharex=True, sharey=True) | 372 | + fig, ax = plt.subplots(2,3,figsize=(8,10), sharex=True, sharey=True) |
388 | ax = ax.flatten() | 373 | ax = ax.flatten() |
389 | - process_extension(ax, 0, ".biorseo_desc_raw_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA") | 374 | + process_extension(ax, 0, ".biorseo_desc_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA") |
390 | - process_extension(ax, 1, ".biorseo_rin_raw_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA") | 375 | + process_extension(ax, 1, ".biorseo_rin_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA") |
391 | - process_extension(ax, 2, ".biorseo_desc_byp_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA") | 376 | + process_extension(ax, 2, ".biorseo_json_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA") |
392 | - process_extension(ax, 3, ".biorseo_bgsu_byp_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA") | 377 | + ax[0].set_title(prettify_biorseo("biorseo_desc_A"), fontsize=10) |
393 | - process_extension(ax, 4, ".biorseo_bgsu_jar3d_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA") | 378 | + ax[1].set_title(prettify_biorseo("biorseo_rin_A"), fontsize=10) |
394 | - ax[0].set_title(prettify_biorseo("biorseo_desc_raw_A"), fontsize=10) | 379 | + ax[2].set_title(prettify_biorseo("biorseo_json_A"), fontsize=10) |
395 | - ax[1].set_title(prettify_biorseo("biorseo_rin_raw_A"), fontsize=10) | 380 | + |
396 | - ax[2].set_title(prettify_biorseo("biorseo_desc_byp_A"), fontsize=10) | 381 | + process_extension(ax, 3, ".biorseo_desc_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA") |
397 | - ax[3].set_title(prettify_biorseo("biorseo_bgsu_byp_A"), fontsize=10) | 382 | + process_extension(ax, 4, ".biorseo_rin_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA") |
398 | - ax[4].set_title(prettify_biorseo("biorseo_bgsu_jar3d_A"), fontsize=10) | 383 | + process_extension(ax, 5, ".biorseo_json_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA") |
399 | - | 384 | + |
400 | - process_extension(ax, 5, ".biorseo_desc_raw_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA") | ||
401 | - process_extension(ax, 6, ".biorseo_rin_raw_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA") | ||
402 | - process_extension(ax, 7, ".biorseo_desc_byp_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA") | ||
403 | - process_extension(ax, 8, ".biorseo_bgsu_byp_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA") | ||
404 | - process_extension(ax, 9, ".biorseo_bgsu_jar3d_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA") | ||
405 | - | ||
406 | - process_extension(ax, 12, ".biorseo_desc_byp_C", ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA") | ||
407 | - process_extension(ax, 13, ".biorseo_bgsu_byp_C", ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA") | ||
408 | - process_extension(ax, 14, ".biorseo_bgsu_jar3d_C", ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA") | ||
409 | - ax[10].axis("off") | ||
410 | - ax[11].axis("off") | ||
411 | - | ||
412 | - process_extension(ax, 17, ".biorseo_desc_byp_D", ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA") | ||
413 | - process_extension(ax, 18, ".biorseo_bgsu_byp_D", ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA") | ||
414 | - process_extension(ax, 19, ".biorseo_bgsu_jar3d_D", ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA") | ||
415 | - ax[15].axis("off") | ||
416 | - ax[16].axis("off") | ||
417 | for a in ax: | 385 | for a in ax: |
418 | a.label_outer() | 386 | a.label_outer() |
419 | plt.subplots_adjust(bottom=0.05, top=0.95, left=0.07, right=0.98, hspace=0.1, wspace = 0.05) | 387 | plt.subplots_adjust(bottom=0.05, top=0.95, left=0.07, right=0.98, hspace=0.1, wspace = 0.05) | ... | ... |
scripts/pareto_visualizer_json.py
deleted
100644 → 0
This diff is collapsed. Click to expand it.
-
Please register or login to post a comment