cleaned scripts/ and figures/ folders

Louis BECQUEY
Commit 6ca2e36b3b4529c1bec1bf7b59cdaaf7e1b0ab9d 6ca2e36b 1 parent eda1ab32
Showing 11 changed files with 40 additions and 570 deletions
data/sec_structs/Readme.md
figures/best_MCCs.png
figures/detailed_stats.png
figures/number_of_solutions.png
figures/pareto_visualizer_ext_A_pk.png
figures/pareto_visualizer_ext_A_ssd.png
figures/pareto_visualizer_ext_B_pk.png
figures/pareto_visualizer_ext_B_ssd.png
benchmark.py → scripts/benchmark.py
scripts/pareto_visualizer.py
scripts/pareto_visualizer_json.py
--- a/data/sec_structs/Readme.md 0 → 100644
View file @6ca2e36
+++ b/data/sec_structs/Readme.md 0 → 100644
View file @6ca2e36
+ What are this RNA data files ?
+ ===============================
+ 
+ ## Raw (big) databases
+ * RNA-Strand 2.0 (secondary_structures_database.dbn) : this file is a dataset supposed to be identical to RNA-Strand 2.0 (actually the file is present on IBISC machines for years now and nobody remembers how it was built). The former RNA Strand website is not online anymore (http://rnasoft.ca/strand).
+ * bpRNA-1m_90 : this huge database gathers the data from other databases (CRW, PDB, Rfam, RNP, SPR, SRP, ...) and superseeds RNA-Strand (minus the structures that are only in NDB, sadly). Sequences have been prefiltered to have no more than 90% identity. Source : http://bprna.cgrb.oregonstate.edu/
+ * Pseudobase(++) : A database of biologically validated pseudoknots, from the time discovering a pseudoknot was something unusual. Pseudobase stays famous for its pseudoknot classification scheme. I scraped it myself to build the file. Source : https://www.ekevanbatenburg.nl/PKBASE/PKB.HTML 
+ 
+ 
+ ## Filtered databases
+ * verified_secondary_structures.dbn : The subset of RNA-Strand that was experimentally validated (basically, the ones for which a 3D structure was available, so the ones from NDB and PDB).
+ * The _short.dbn ones : Same as its parent, but filtered using the filter.py script.
+ * pseudoknots.dbn : Audrey Legendre's scrap of Pseudobase, which, for an unknow reason, does not contain all the available data, but nice descriptions of what the RNAs are.
+ 
+ 
+ ## Small test databases
+ * RNA-MoIP dataset : The cherry-picked cases presented in Reinhartz et al. 2012 to show RNA-MoIP's performance.
+ * applications.dbn : My cherry-picked cases presented in Becquey et al. 2020 to show Biorseo's performance.
+ * example.dbn : an example database with only one RNA, for testing purposes
+ * nothing.dbn : an example database with no RNAs, for testing purposes
+ 
+ 
+ Enjoy benchmarking RNA structure prediction tools.
\ No newline at end of file
--- a/figures/best_MCCs.png
View file @6ca2e36
+++ b/figures/best_MCCs.png
View file @6ca2e36
--- a/figures/detailed_stats.png
View file @6ca2e36
+++ b/figures/detailed_stats.png
View file @6ca2e36
--- a/figures/number_of_solutions.png 0 → 100644
View file @6ca2e36
+++ b/figures/number_of_solutions.png 0 → 100644
View file @6ca2e36
--- a/figures/pareto_visualizer_ext_A_pk.png deleted 100644 → 0
View file @eda1ab3
+++ b/figures/pareto_visualizer_ext_A_pk.png deleted 100644 → 0
View file @eda1ab3
--- a/figures/pareto_visualizer_ext_A_ssd.png deleted 100644 → 0
View file @eda1ab3
+++ b/figures/pareto_visualizer_ext_A_ssd.png deleted 100644 → 0
View file @eda1ab3
--- a/figures/pareto_visualizer_ext_B_pk.png deleted 100644 → 0
View file @eda1ab3
+++ b/figures/pareto_visualizer_ext_B_pk.png deleted 100644 → 0
View file @eda1ab3
--- a/figures/pareto_visualizer_ext_B_ssd.png deleted 100644 → 0
View file @eda1ab3
+++ b/figures/pareto_visualizer_ext_B_ssd.png deleted 100644 → 0
View file @eda1ab3
--- a/benchmark.py → scripts/benchmark.py
View file @6ca2e36
+++ b/benchmark.py → scripts/benchmark.py
View file @6ca2e36
--- a/scripts/pareto_visualizer.py
View file @6ca2e36
+++ b/scripts/pareto_visualizer.py
View file @6ca2e36
@@ -158,7 +158,6 @@ def is_canonical_nts(seq):
             return False
     return True
 
- 
 def is_canonical_bps(struct):
     if "()" in struct:
         return False
@@ -207,7 +206,6 @@ def load_from_dbn(file, header_style=3):
     db.close()
     return container, pkcounter
 
- 
 def parse_biokop(folder, basename, ext=".biok"):
     solutions = []
     err = 0
@@ -248,7 +246,6 @@ def parse_biokop(folder, basename, ext=".biok"):
             err = 1
     return None, err
 
- 
 def parse_biorseo(folder, basename, ext):
     solutions = []
     err = 0
@@ -272,21 +269,14 @@ def parse_biorseo(folder, basename, ext):
             err = 1
     return None, err
 
- 
 def prettify_biorseo(code):
     name = ""
-     if "bgsu" in code:
-         name += "RNA 3D Motif Atlas + "
+     if "json" in code:
+         name += "JSON motifs + "
     elif "rin" in code:
         name += "CaRNAval + "
     else:
         name += "Rna3Dmotifs + "
-     if "raw" in code:
-         name += "Direct P.M."
-     if "byp" in code:
-         name += "BPairing"
-     if "jar3d" in code:
-         name += "Jar3d"
     # name += " + $f_{1" + code[-1] + "}$"
     return name
 
@@ -342,14 +332,9 @@ def process_extension(ax, pos, ext, nsolutions=False, xlabel="Best solution perf
 if __name__ == "__main__":
     try:
         opts, args = getopt.getopt( sys.argv[1:], "", 
-                                 [  "biorseo_desc_byp_A", "biorseo_desc_byp_B",
-                                     "biorseo_desc_byp_C", "biorseo_desc_byp_D",
-                                     "biorseo_bgsu_byp_A", "biorseo_bgsu_byp_B",
-                                     "biorseo_bgsu_byp_C", "biorseo_bgsu_byp_D",
-                                     "biorseo_desc_raw_A", "biorseo_desc_raw_B",
-                                     "biorseo_bgsu_jar3d_A", "biorseo_bgsu_jar3d_B",
-                                     "biorseo_bgsu_jar3d_C", "biorseo_bgsu_jar3d_D",
-                                     "biorseo_rin_raw_A", "biorseo_rin_raw_B",
+                                 [  "biorseo_desc_A", "biorseo_desc_B",
+                                     "biorseo_rin_A", "biorseo_rin_B",
+                                     "biorseo_json_A", "biorseo_json_B",
                                     "biokop", "folder=", "database=", "output="
                                 ])
     except getopt.GetoptError as err:
@@ -384,36 +369,19 @@ if __name__ == "__main__":
 
     if extension == "all":
         parse = parse_biorseo
-         fig, ax = plt.subplots(4,5,figsize=(12,10), sharex=True, sharey=True)
+         fig, ax = plt.subplots(2,3,figsize=(8,10), sharex=True, sharey=True)
         ax = ax.flatten()
-         process_extension(ax, 0, ".biorseo_desc_raw_A",     ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
-         process_extension(ax, 1, ".biorseo_rin_raw_A",      ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
-         process_extension(ax, 2, ".biorseo_desc_byp_A",     ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
-         process_extension(ax, 3, ".biorseo_bgsu_byp_A",     ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
-         process_extension(ax, 4, ".biorseo_bgsu_jar3d_A",   ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
-         ax[0].set_title(prettify_biorseo("biorseo_desc_raw_A"), fontsize=10)
-         ax[1].set_title(prettify_biorseo("biorseo_rin_raw_A"), fontsize=10)
-         ax[2].set_title(prettify_biorseo("biorseo_desc_byp_A"), fontsize=10)
-         ax[3].set_title(prettify_biorseo("biorseo_bgsu_byp_A"), fontsize=10)
-         ax[4].set_title(prettify_biorseo("biorseo_bgsu_jar3d_A"), fontsize=10)
- 
-         process_extension(ax, 5, ".biorseo_desc_raw_B",     ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
-         process_extension(ax, 6, ".biorseo_rin_raw_B",      ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
-         process_extension(ax, 7, ".biorseo_desc_byp_B",     ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
-         process_extension(ax, 8, ".biorseo_bgsu_byp_B",     ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
-         process_extension(ax, 9, ".biorseo_bgsu_jar3d_B",   ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
- 
-         process_extension(ax, 12, ".biorseo_desc_byp_C",   ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA")
-         process_extension(ax, 13, ".biorseo_bgsu_byp_C",   ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA")
-         process_extension(ax, 14, ".biorseo_bgsu_jar3d_C", ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA")
-         ax[10].axis("off")
-         ax[11].axis("off")
- 
-         process_extension(ax, 17, ".biorseo_desc_byp_D",   ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA")
-         process_extension(ax, 18, ".biorseo_bgsu_byp_D",   ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA")
-         process_extension(ax, 19, ".biorseo_bgsu_jar3d_D", ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA")
-         ax[15].axis("off")
-         ax[16].axis("off")
+         process_extension(ax, 0, ".biorseo_desc_A",     ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
+         process_extension(ax, 1, ".biorseo_rin_A",      ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
+         process_extension(ax, 2, ".biorseo_json_A",      ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
+         ax[0].set_title(prettify_biorseo("biorseo_desc_A"), fontsize=10)
+         ax[1].set_title(prettify_biorseo("biorseo_rin_A"), fontsize=10)
+         ax[2].set_title(prettify_biorseo("biorseo_json_A"), fontsize=10)
+ 
+         process_extension(ax, 3, ".biorseo_desc_B",     ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
+         process_extension(ax, 4, ".biorseo_rin_B",      ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
+         process_extension(ax, 5, ".biorseo_json_B",     ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
+ 
         for a in ax:
             a.label_outer()
         plt.subplots_adjust(bottom=0.05, top=0.95, left=0.07, right=0.98, hspace=0.1, wspace = 0.05)
--- a/scripts/pareto_visualizer_json.py deleted 100644 → 0
View file @eda1ab3
+++ b/scripts/pareto_visualizer_json.py deleted 100644 → 0
View file @eda1ab3
- #!/usr/bin/python3
- # Created by Louis Becquey, louis.becquey@univ-evry.fr, Oct 2019
- # This script processes files containing RNA structures obtained from bi-objective
- # optimization programs, and a dot-bracket database of reference structures, to plot
- # where are the best solutions in the Pareto set.
- #
- # The result files should follow this kind of format:
- # for Biokop: (option --biokop)
- # Structure        Free energy score       Expected accuracy score
- # (((...(((...)))))) <tab> obj1_value <tab> obj2_value
- # (((............))) <tab> obj1_value <tab> obj2_value
- # ((((((...)))...))) <tab> obj1_value <tab> obj2_value
- # ...
- #
- # for BiORSEO: (options --biorseo_**stuff**)
- # >Header of the sequence
- # GGCACAGAGUUAUGUGCC
- # (((...(((...)))))) + Motif1 + Motif2 <tab> obj1_value <tab> obj2_value
- # (((............))) <tab> obj1_value <tab> obj2_value
- # ((((((...)))...))) + Motif1 <tab> obj1_value <tab> obj2_value
- #
- # typical Biokop usage:
- # python3 pareto_visualizer.py --biokop --folder path/to/your/results/folder --database path/to/the/database_file.dbn
- # typical Biorseo usage:
- # python3 pareto_visualizer_json.py --folder path/to/your/results/folder (pmE et pmF) --database path/to/the/database_file.dbn (nom, sequence, structure)
- #
- 
- from math import sqrt
- import numpy as np
- import matplotlib.pyplot as plt
- from matplotlib import cm 
- import scipy.stats as st
- import sys
- import os
- import subprocess
- import getopt
- 
- class SecStruct:
-     def __init__(self, name, dot_bracket, contacts, obj1_value, obj2_value):
-         self.name = name
-         self.dbn = dot_bracket
-         self.ctc = contacts
-         self.objectives = [ obj1_value, obj2_value ]
-         self.basepair_list = self.get_basepairs()
-         self.length = len(dot_bracket)
- 
-     def get_basepairs(self):
-         parenthesis = []
-         brackets = []
-         braces = []
-         rafters = []
-         basepairs = []
-         As = []
-         Bs = []
-         for i, c in enumerate(self.dbn):
-             if c == '(':
-                 parenthesis.append(i)
-             if c == '[':
-                 brackets.append(i)
-             if c == '{':
-                 braces.append(i)
-             if c == '<':
-                 rafters.append(i)
-             if c == 'A':
-                 As.append(i)
-             if c == 'B':
-                 Bs.append(i)
-             if c == '.':
-                 continue
-             if c == ')':
-                 basepairs.append((i, parenthesis.pop()))
-             if c == ']':
-                 basepairs.append((i, brackets.pop()))
-             if c == '}':
-                 basepairs.append((i, braces.pop()))
-             if c == '>':
-                 basepairs.append((i, rafters.pop()))
-             if c == 'a':
-                 basepairs.append((i, As.pop()))
-             if c == 'b':
-                 basepairs.append((i, Bs.pop()))
-         return basepairs
- 
-     def get_MCC_with(self, reference_structure):
-         # Get true and false positives and negatives
-         tp = 0
-         fp = 0
-         tn = 0
-         fn = 0
-         for bp in reference_structure.basepair_list:
-             if bp in self.basepair_list:
-                 tp += 1
-             else:
-                 fn += 1
-         for bp in self.basepair_list:
-             if bp not in reference_structure.basepair_list:
-                 fp += 1
-         tn = reference_structure.length * (reference_structure.length - 1) * 0.5 - fp - fn - tp
- 
-         # Compute MCC
-         if (tp+fp == 0):
-             print("We have an issue : no positives detected ! (linear structure)")
-         return (tp*tn-fp*fn) / sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))
- 
-     def get_MCC_ctc_with(self, reference_structure):
-         # Get true and false positives and negatives
-         tp = 0
-         fp = 0
-         tn = 0
-         fn = 0
-         prediction = self.ctc
-         true_ctc = reference_structure.ctc
-         for i in range(len(true_ctc)):
-             if true_ctc[i] == '*' and prediction[i] == '*':
-                 tp += 1
-             elif true_ctc[i] == '.' and prediction[i] == '.':
-                 tn += 1
-             elif true_ctc[i] == '.' and prediction[i] == '*':
-                 fp += 1
-             elif true_ctc[i] == '*' and prediction[i] == '.':
-                 fn += 1
-         # print(str(tp) + " " + str(tn) + " " + str(fp) + " " + str(fn) + "\n")
- 
-         result = (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
-         # Compute MCC
-         if ((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) == 0):
-             print("warning: division by zero!")
-             return None
-         elif (tp + fp == 0):
-             print("We have an issue : no positives detected ! (linear structure)")
-         return (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
- 
- class Pareto:
-     def __init__(self, list_of_structs, reference):
-         self.predictions = list_of_structs
-         self.true_structure = reference
-         self.n_pred = len(list_of_structs)
-         self.max_obj1 = max([s.objectives[0] for s in self.predictions ])
-         self.max_obj2 = max([s.objectives[1] for s in self.predictions ])
-         self.index_of_best = self.find_best_solution()
-         self.index_of_best_ctc = self.find_best_solution_ctc()
-         
-     def find_best_solution(self):
-         # returns the index of the solution of the Pareto set which is the closest
-         # to the real 2D structure (the one with the max MCC)
-         max_i = -1
-         max_mcc = -1
-         for i,s in enumerate(self.predictions):
-             mcc = s.get_MCC_with(self.true_structure)
-             if mcc > max_mcc:
-                 max_mcc = mcc
-                 max_i = i
-         print("\n" + "max mcc str: " + str(max_mcc))
-         return max_i
- 
-     def find_best_solution_ctc(self):
-         # returns the index of the solution of the Pareto set which is the closest
-         # to the real contacts area (the one with the max MCC)
-         max_i = -1
-         max_mcc = -1
-         for i,s in enumerate(self.predictions):
-             mcc = s.get_MCC_ctc_with(self.true_structure)
-             if mcc is None:
-                 continue
-             elif mcc > max_mcc:
-                 max_mcc = mcc
-                 max_i = i
-         return max_i
- 
-     def get_normalized_coords(self):
-         # retrieves the objective values of the best solution and normalizes them
-         coords = self.predictions[self.index_of_best].objectives
-         if self.max_obj1: # avoid divide by zero if all solutions are 0
-             x = coords[0]/self.max_obj1
-         else:
-             x = 0.5
-         if self.max_obj2: # avoid divide by zero if all solutions are 0
-             y = coords[1]/self.max_obj2
-         else:
-             y = 0.5
-         return ( x,y )
- 
-     def get_normalized_coords_ctc(self):
-         CRED = '\033[91m'
-         CEND = '\033[0m'
-         CGREEN = '\33[32m'
-         CBLUE = '\33[34m'
-         # retrieves the objective values of the best solution and normalizes them
-         coords = self.predictions[self.index_of_best_ctc].objectives
-         if self.max_obj1: # avoid divide by zero if all solutions are 0
-             x = coords[0]/self.max_obj1
-         else:
-             x = 0.5
-         """if(x < 0.5):
-             print("\n" + CRED + self.predictions[self.index_of_best_ctc].name + CEND)
-             print(CRED + self.predictions[self.index_of_best_ctc].ctc + CEND)
-             print("count: " + str(self.predictions[self.index_of_best_ctc].ctc.count("*")))
-             print(CRED + self.true_structure.ctc + CEND)
-             print("count: " + str(self.true_structure.ctc.count("*")) + "\n")
- 
-         elif(x >= 0.5 and type(self.predictions[self.index_of_best_ctc].ctc)) is str:
-             print("\n" + CGREEN + self.predictions[self.index_of_best_ctc].name + CEND)
-             print(CGREEN + self.predictions[self.index_of_best_ctc].ctc + CEND)
-             print("count: " + str(self.predictions[self.index_of_best_ctc].ctc.count("*")))
-             print(CGREEN + self.true_structure.ctc + CEND)
-             print("count: " + str(self.true_structure.ctc.count("*")) + "\n")"""
- 
-         if self.max_obj2: # avoid divide by zero if all solutions are 0
-             y = coords[1]/self.max_obj2
-         else:
-             y = 0.5
-         return ( x,y )
- 
- class RNA:
-     def __init__(self, filename, header, seq, struct, contacts):
-         self.seq_ = seq
-         self.header_ = header
-         self.struct_ = struct
-         self.contacts_ = contacts
-         self.basename_ = filename
- 
- 
- ignored_nt_dict = {}
- def is_canonical_nts(seq):
-     for c in seq[:-1]:
-         if c not in "ACGU":
-             if c in ignored_nt_dict.keys():
-                 ignored_nt_dict[c] += 1
-             else:
-                 ignored_nt_dict[c] = 1
-             return False
-     return True
- 
- def is_canonical_bps(struct):
-     if "()" in struct:
-         return False
-     if "(.)" in struct:
-         return False
-     if "(..)" in struct:
-         return False
-     if "[]" in struct:
-         return False
-     if "[.]" in struct:
-         return False
-     if "[..]" in struct:
-         return False
-     return True
- 
- def load_from_dbn(file, header_style=1):
-     container = []
-     counter = 0
-     db = open(file, "r")
-     c = 0
-     header = ""
-     seq = ""
-     struct = ""
-     while True:
-         l = db.readline()
-         if l == "":
-             break
-         c += 1
-         c = c % 4
-         if c == 1:
-             header = l[:-1]
-         if c == 2:
-             seq = l[:-1].upper()
-         if c == 3:
-             struct = l[:-1]
-             n = len(seq)
-         if c == 0:
-             contacts = l[:-1]
-             if is_canonical_nts(seq) and is_canonical_bps(struct):
-                 if header_style == 1: container.append(RNA(header.replace('/', '_').split('(')[-1][:-1], header, seq, struct, contacts))
-                 if header_style == 2: container.append(RNA(header.replace('/', '_').split('[')[-1][:-41], header, seq, struct, contacts))
-                 if '[' in struct: counter += 1
-     db.close()
-     return container, counter
- 
- def parse_biokop(folder, basename, ext=".biok"):
-     solutions = []
-     if os.path.isfile(os.path.join(folder, basename + ext)):
-         rna = open(os.path.join(folder, basename + ext), "r")
-         lines = rna.readlines()
-         rna.close()
-         different_2ds = []
-         for s in lines[1:]:
-             if s == '\n':
-                 continue
-             splitted = s.split('\t')
-             db2d = splitted[0]
-             if db2d not in different_2ds:
-                 different_2ds.append(db2d)
-             # here is a negative sign because Biokop actually minimizes -MEA instead
-             # of maximizing MEA : we switch back to MEA
-             solutions.append(SecStruct(basename, db2d, -float(splitted[1]), -float(splitted[2][:-1])))
- 
-         # check the range of MEA in this pareto set
-         min_mea = solutions[0].objectives[1]
-         max_mea = min_mea
-         for s in solutions:
-             mea = s.objectives[1]
-             if mea < min_mea:
-                 min_mea = mea
-             if mea > max_mea:
-                 max_mea = mea
- 
-         # normalize so the minimum MEA of the set is 0
-         for i in range(len(solutions)):
-             solutions[i].objectives[1] -= min_mea
- 
-         if len(different_2ds) > 1:
-             return solutions
-         else:
-             print("[%s] \033[36mWARNING: ignoring this RNA, only one 2D solution is found.\033[0m" % (basename))
-     else:
-         print("[%s] \033[36mWARNING: file not found !\033[0m" % (basename))
- 
- def parse_biorseo(folder, basename, ext):
-     solutions = []
-     print(basename + ext)
-     if os.path.isfile(os.path.join(folder, basename + ext)):
-         rna = open(os.path.join(folder, basename + ext), "r")
-         lines = rna.readlines()
-         rna.close()
-         different_2ds = []
-         contacts = []
-         str2d = []
-         count = 0;
-         for s in lines[2:]:
-             count = count + 1
-             if s == '\n':
-                 continue
-             splitted = s.split('\t')
-             if(count % 2 == 1):
-                 obj1 = float(splitted[1])
-                 obj2 = float(splitted[2][:-1])
-             db2d = splitted[0].split(' ')[0]
-             if db2d not in different_2ds:
-                 if(s.find('(') != -1):
-                     different_2ds.append(db2d)
-             if(s.find('*') != -1):
-                 contacts = db2d
-                 solutions.append(SecStruct(basename, str2d, contacts, obj1, obj2))
-             elif(s.find('(') != -1):
-                 str2d = db2d
-         if len(different_2ds) > 1:
-             return solutions
-         else:
-             print("[%s] \033[36mWARNING: ignoring this RNA, only one 2D or contacts solution is found.\033[0m" % (basename))
-     else:
-         print("[%s] \033[36mWARNING: file not found !\033[0m" % (basename))
-     return None
- 
- def prettify_biorseo(code):
-     name = ""
-     if "bgsu" in code:
-         name += "RNA 3D Motif Atlas + "
-     elif "json" in code:
-         name += "Motifs d'Isaure + Direct P.M"
-     else:
-         name += "Rna3Dmotifs + "
-     if "raw" in code:
-         name += "Direct P.M."
-     if "byp" in code:
-         name += "BPairing"
-     if "jar3d" in code:
-         name += "Jar3d"
-     # name += " + $f_{1" + code[-1] + "}$"
-     return name
- 
- # Parse options
- try:
-     opts, args = getopt.getopt( sys.argv[1:], "", 
-                              [  "json_pmE",
-                                 "json_pmF",
-                                 "folder=",
-                                 "database=",
-                                 "output="
-                              ])
- except getopt.GetoptError as err:
-     print(err)
-     sys.exit(2)
- 
- results_folder = "."
- extension = "all"
- outputf = ""
- for opt, arg in opts:
-     if opt == "--biokop":
-         extension = ".biok"
-         parse = parse_biokop
-     elif opt == "--folder":
-         results_folder = arg
-     elif opt == "--database":
-         database = arg
-     elif opt == "--output":
-         outputf = arg
-     else:
-         extension = '.' + opt[2:]
-         parse = parse_biorseo
- 
- RNAcontainer, _ = load_from_dbn(database)
- 
- if results_folder[-1] != '/':
-     results_folder = results_folder + '/'
- if outputf == "":
-     outputf = results_folder
- if outputf[-1] != '/':
-     outputf = outputf + '/'
- 
- def process_extension(ax, pos, ext, nsolutions=False, xlabel="Best solution performs\nwell on obj1", ylabel="Best solution performs\n well on obj2"):
-     points = []
-     sizes = []
-     for rna in RNAcontainer:
-         # Extracting the predictions from the results file
-         solutions = parse(results_folder, rna.basename_, ext)
-         reference = SecStruct(rna.basename_, rna.struct_, rna.contacts_, float("inf"), float("inf"))
-         if solutions is None:
-             continue
-         pset = Pareto(solutions, reference)
-         points.append(pset.get_normalized_coords())
-         sizes.append(pset.n_pred)
-         print("[%s] Loaded %d solutions in a Pareto set, max(obj1)=%f, max(obj2)=%f" % (rna.basename_, pset.n_pred, pset.max_obj1, pset.max_obj2))
-     print("Loaded %d points on %d." % (len(points), len(RNAcontainer)))
- 
-     x = np.array([ p[0] for p in points ])
-     y = np.array([ p[1] for p in points ])
-     xmin, xmax = 0, 1
-     ymin, ymax = 0, 1
-     xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
-     positions = np.vstack([xx.ravel(), yy.ravel()])
-     values = np.vstack([x, y])
-     kernel = st.gaussian_kde(values)
-     f = np.reshape(kernel(positions).T, xx.shape)
-     ax[pos].axhline(y=0, alpha=0.2, color='black')
-     ax[pos].axhline(y=1, alpha=0.2, color='black')
-     ax[pos].axvline(x=0, alpha=0.2, color='black')
-     ax[pos].axvline(x=1, alpha=0.2, color='black')
-     ax[pos].contourf(xx, yy, f, cmap=cm.Blues, alpha=0.5)
-     ax[pos].scatter(x, y, s=25, alpha=0.1)
-     ax[pos].set_xlim((-0.1,1.1))
-     ax[pos].set_ylim((-0.1,1.1))
-     ax[pos].set_title(prettify_biorseo(ext[1:]), fontsize=10)
-     ax[pos].annotate("("+str(len(points))+'/'+str(len(RNAcontainer))+" RNAs)", (0.08, 0.15))
-     ax[pos].set_xlabel(xlabel)
-     ax[pos].set_ylabel(ylabel)
- 
-     if nsolutions:
-         ax[pos+1].hist(sizes, bins=range(0, max(sizes)+1, 2), histtype='bar')
-         ax[pos+1].set_xlim((0,max(sizes)+2))
-         ax[pos+1].set_xticks(range(0, max(sizes), 10))
-         ax[pos+1].set_xticklabels(range(0, max(sizes), 10), rotation=90)
-         ax[pos+1].set_xlabel("# solutions")
-         ax[pos+1].set_ylabel("# RNAs")
- 
- def process_extension_ctc(ax, pos, ext, nsolutions=False, xlabel="Best solution performs\nwell on obj1", ylabel="Best solution performs\n well on obj2"):
-     points = []
-     sizes = []
-     for rna in RNAcontainer:
-         # Extracting the predictions from the results file
-         solutions = parse(results_folder, rna.basename_, ext)
-         reference = SecStruct(rna.basename_, rna.struct_, rna.contacts_, float("inf"), float("inf"))
-         if solutions is None:
-             continue
-         pset = Pareto(solutions, reference)
-         points.append(pset.get_normalized_coords_ctc())
-         sizes.append(pset.n_pred)
-         print("[%s] Loaded %d solutions in a Pareto set, max(obj1)=%f, max(obj2)=%f" % (rna.basename_, pset.n_pred, pset.max_obj1, pset.max_obj2))
-     print("Loaded %d points on %d." % (len(points), len(RNAcontainer)))
- 
-     x = np.array([ p[0] for p in points ])
-     y = np.array([ p[1] for p in points ])
-     xmin, xmax = 0, 1
-     ymin, ymax = 0, 1
-     xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
-     positions = np.vstack([xx.ravel(), yy.ravel()])
-     values = np.vstack([x, y])
-     kernel = st.gaussian_kde(values)
-     f = np.reshape(kernel(positions).T, xx.shape)
-     ax[pos].axhline(y=0, alpha=0.2, color='black')
-     ax[pos].axhline(y=1, alpha=0.2, color='black')
-     ax[pos].axvline(x=0, alpha=0.2, color='black')
-     ax[pos].axvline(x=1, alpha=0.2, color='black')
-     ax[pos].contourf(xx, yy, f, cmap=cm.Blues, alpha=0.5)
-     ax[pos].scatter(x, y, s=25, alpha=0.1)
-     ax[pos].set_xlim((-0.1,1.1))
-     ax[pos].set_ylim((-0.1,1.1))
-     ax[pos].set_title(prettify_biorseo(ext[1:]), fontsize=10)
-     ax[pos].annotate("("+str(len(points))+'/'+str(len(RNAcontainer))+" RNAs)", (0.08,0.15))
-     ax[pos].set_xlabel(xlabel)
-     ax[pos].set_ylabel(ylabel)
- 
-     if nsolutions:
-         ax[pos+1].hist(sizes, bins=range(0, max(sizes)+1, 2), histtype='bar')
-         ax[pos+1].set_xlim((0,max(sizes)+2))
-         ax[pos+1].set_xticks(range(0, max(sizes), 10))
-         ax[pos+1].set_xticklabels(range(0, max(sizes), 10), rotation=90)
-         ax[pos+1].set_xlabel("# solutions")
-         ax[pos+1].set_ylabel("# RNAs")
- 
- 
- if extension == "all":
-     parse = parse_biorseo
-     fig, ax = plt.subplots(1, 2, figsize=(10, 5), sharey=True)
-     ax = ax.flatten()
-     process_extension(ax, 0, ".json_pmF_MEA", xlabel="Normalized $f_{1E}$", ylabel="Normalized MEA")
-     print("--------------------------------------------------------------------------------------------")
-     process_extension_ctc(ax, 1, ".json_pmF_MEA", xlabel="Normalized $f_{1E}$", ylabel="Normalized MEA")
-     print("--------------------------------------------------------------------------------------------")
- 
-     for a in ax:
-         a.label_outer()
-     plt.subplots_adjust(bottom=0.2, top=0.9, left=0.07, right=0.98, hspace=0.05, wspace=0.05)
-     plt.savefig("pareto_visualizer_json_MEA_functionF.png")
- else:
-     fig, ax = plt.subplots(2,1, figsize=(6,5))
-     plt.subplots_adjust(bottom=0.12, top=0.9, left=0.15, right=0.9, hspace=0.4)
-     if extension == ".biok":
-         process_extension(ax, 0, extension, nsolutions=True, xlabel="Normalized MFE", ylabel="Normalized MFE")
-     else:
-         process_extension(ax, 0, extension, nsolutions=False)
-     plt.savefig("pareto_visualizer_ext.png")
\ No newline at end of file