cleaned scripts/ and figures/ folders

Louis BECQUEY
Commit 6ca2e36b3b4529c1bec1bf7b59cdaaf7e1b0ab9d 6ca2e36b 1 parent eda1ab32
Showing 11 changed files with 40 additions and 570 deletions
data/sec_structs/Readme.md
figures/best_MCCs.png
figures/detailed_stats.png
figures/number_of_solutions.png
figures/pareto_visualizer_ext_A_pk.png
figures/pareto_visualizer_ext_A_ssd.png
figures/pareto_visualizer_ext_B_pk.png
figures/pareto_visualizer_ext_B_ssd.png
benchmark.py → scripts/benchmark.py
scripts/pareto_visualizer.py
scripts/pareto_visualizer_json.py
--- a/data/sec_structs/Readme.md 0 → 100644
View file @6ca2e36
+++ b/data/sec_structs/Readme.md 0 → 100644
View file @6ca2e36
+What are this RNA data files ?
+===============================
+
+## Raw (big) databases
+* RNA-Strand 2.0 (secondary_structures_database.dbn) : this file is a dataset supposed to be identical to RNA-Strand 2.0 (actually the file is present on IBISC machines for years now and nobody remembers how it was built). The former RNA Strand website is not online anymore (http://rnasoft.ca/strand).
+* bpRNA-1m_90 : this huge database gathers the data from other databases (CRW, PDB, Rfam, RNP, SPR, SRP, ...) and superseeds RNA-Strand (minus the structures that are only in NDB, sadly). Sequences have been prefiltered to have no more than 90% identity. Source : http://bprna.cgrb.oregonstate.edu/
+* Pseudobase(++) : A database of biologically validated pseudoknots, from the time discovering a pseudoknot was something unusual. Pseudobase stays famous for its pseudoknot classification scheme. I scraped it myself to build the file. Source : https://www.ekevanbatenburg.nl/PKBASE/PKB.HTML 
+
+
+## Filtered databases
+* verified_secondary_structures.dbn : The subset of RNA-Strand that was experimentally validated (basically, the ones for which a 3D structure was available, so the ones from NDB and PDB).
+* The _short.dbn ones : Same as its parent, but filtered using the filter.py script.
+* pseudoknots.dbn : Audrey Legendre's scrap of Pseudobase, which, for an unknow reason, does not contain all the available data, but nice descriptions of what the RNAs are.
+
+
+## Small test databases
+* RNA-MoIP dataset : The cherry-picked cases presented in Reinhartz et al. 2012 to show RNA-MoIP's performance.
+* applications.dbn : My cherry-picked cases presented in Becquey et al. 2020 to show Biorseo's performance.
+* example.dbn : an example database with only one RNA, for testing purposes
+* nothing.dbn : an example database with no RNAs, for testing purposes
+
+
+Enjoy benchmarking RNA structure prediction tools.
\ No newline at end of file
--- a/figures/best_MCCs.png
View file @6ca2e36
+++ b/figures/best_MCCs.png
View file @6ca2e36
--- a/figures/detailed_stats.png
View file @6ca2e36
+++ b/figures/detailed_stats.png
View file @6ca2e36
--- a/figures/number_of_solutions.png 0 → 100644
View file @6ca2e36
+++ b/figures/number_of_solutions.png 0 → 100644
View file @6ca2e36
--- a/figures/pareto_visualizer_ext_A_pk.png deleted 100644 → 0
View file @eda1ab3
+++ b/figures/pareto_visualizer_ext_A_pk.png deleted 100644 → 0
View file @eda1ab3
--- a/figures/pareto_visualizer_ext_A_ssd.png deleted 100644 → 0
View file @eda1ab3
+++ b/figures/pareto_visualizer_ext_A_ssd.png deleted 100644 → 0
View file @eda1ab3
--- a/figures/pareto_visualizer_ext_B_pk.png deleted 100644 → 0
View file @eda1ab3
+++ b/figures/pareto_visualizer_ext_B_pk.png deleted 100644 → 0
View file @eda1ab3
--- a/figures/pareto_visualizer_ext_B_ssd.png deleted 100644 → 0
View file @eda1ab3
+++ b/figures/pareto_visualizer_ext_B_ssd.png deleted 100644 → 0
View file @eda1ab3
--- a/benchmark.py → scripts/benchmark.py
View file @6ca2e36
+++ b/benchmark.py → scripts/benchmark.py
View file @6ca2e36
--- a/scripts/pareto_visualizer.py
View file @6ca2e36
+++ b/scripts/pareto_visualizer.py
View file @6ca2e36
@@ -158,7 +158,6 @@ def is_canonical_nts(seq):
             return False
     return True
-
 def is_canonical_bps(struct):
     if "()" in struct:
         return False
@@ -207,7 +206,6 @@ def load_from_dbn(file, header_style=3):
     db.close()
     return container, pkcounter
-
 def parse_biokop(folder, basename, ext=".biok"):
     solutions = []
     err = 0
@@ -248,7 +246,6 @@ def parse_biokop(folder, basename, ext=".biok"):
             err = 1
     return None, err
-
 def parse_biorseo(folder, basename, ext):
     solutions = []
     err = 0
@@ -272,21 +269,14 @@ def parse_biorseo(folder, basename, ext):
             err = 1
     return None, err
-
 def prettify_biorseo(code):
     name = ""
-    if "bgsu" in code:
+    if "json" in code:
-        name += "RNA 3D Motif Atlas + "
+        name += "JSON motifs + "
     elif "rin" in code:
         name += "CaRNAval + "
     else:
         name += "Rna3Dmotifs + "
-    if "raw" in code:
-        name += "Direct P.M."
-    if "byp" in code:
-        name += "BPairing"
-    if "jar3d" in code:
-        name += "Jar3d"
     # name += " + $f_{1" + code[-1] + "}$"
     return name
@@ -342,14 +332,9 @@ def process_extension(ax, pos, ext, nsolutions=False, xlabel="Best solution perf
 if __name__ == "__main__":
     try:
         opts, args = getopt.getopt( sys.argv[1:], "", 
-                                [  "biorseo_desc_byp_A", "biorseo_desc_byp_B",
+                                [  "biorseo_desc_A", "biorseo_desc_B",
-                                    "biorseo_desc_byp_C", "biorseo_desc_byp_D",
+                                    "biorseo_rin_A", "biorseo_rin_B",
-                                    "biorseo_bgsu_byp_A", "biorseo_bgsu_byp_B",
+                                    "biorseo_json_A", "biorseo_json_B",
-                                    "biorseo_bgsu_byp_C", "biorseo_bgsu_byp_D",
-                                    "biorseo_desc_raw_A", "biorseo_desc_raw_B",
-                                    "biorseo_bgsu_jar3d_A", "biorseo_bgsu_jar3d_B",
-                                    "biorseo_bgsu_jar3d_C", "biorseo_bgsu_jar3d_D",
-                                    "biorseo_rin_raw_A", "biorseo_rin_raw_B",
                                     "biokop", "folder=", "database=", "output="
                                 ])
     except getopt.GetoptError as err:
@@ -384,36 +369,19 @@ if __name__ == "__main__":
     if extension == "all":
         parse = parse_biorseo
-        fig, ax = plt.subplots(4,5,figsize=(12,10), sharex=True, sharey=True)
+        fig, ax = plt.subplots(2,3,figsize=(8,10), sharex=True, sharey=True)
         ax = ax.flatten()
-        process_extension(ax, 0, ".biorseo_desc_raw_A",     ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
+        process_extension(ax, 0, ".biorseo_desc_A",     ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
-        process_extension(ax, 1, ".biorseo_rin_raw_A",      ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
+        process_extension(ax, 1, ".biorseo_rin_A",      ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
-        process_extension(ax, 2, ".biorseo_desc_byp_A",     ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
+        process_extension(ax, 2, ".biorseo_json_A",      ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
-        process_extension(ax, 3, ".biorseo_bgsu_byp_A",     ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
+        ax[0].set_title(prettify_biorseo("biorseo_desc_A"), fontsize=10)
-        process_extension(ax, 4, ".biorseo_bgsu_jar3d_A",   ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
+        ax[1].set_title(prettify_biorseo("biorseo_rin_A"), fontsize=10)
-        ax[0].set_title(prettify_biorseo("biorseo_desc_raw_A"), fontsize=10)
+        ax[2].set_title(prettify_biorseo("biorseo_json_A"), fontsize=10)
-        ax[1].set_title(prettify_biorseo("biorseo_rin_raw_A"), fontsize=10)
+
-        ax[2].set_title(prettify_biorseo("biorseo_desc_byp_A"), fontsize=10)
+        process_extension(ax, 3, ".biorseo_desc_B",     ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
-        ax[3].set_title(prettify_biorseo("biorseo_bgsu_byp_A"), fontsize=10)
+        process_extension(ax, 4, ".biorseo_rin_B",      ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
-        ax[4].set_title(prettify_biorseo("biorseo_bgsu_jar3d_A"), fontsize=10)
+        process_extension(ax, 5, ".biorseo_json_B",     ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
-
+
-        process_extension(ax, 5, ".biorseo_desc_raw_B",     ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
-        process_extension(ax, 6, ".biorseo_rin_raw_B",      ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
-        process_extension(ax, 7, ".biorseo_desc_byp_B",     ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
-        process_extension(ax, 8, ".biorseo_bgsu_byp_B",     ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
-        process_extension(ax, 9, ".biorseo_bgsu_jar3d_B",   ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
-
-        process_extension(ax, 12, ".biorseo_desc_byp_C",   ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA")
-        process_extension(ax, 13, ".biorseo_bgsu_byp_C",   ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA")
-        process_extension(ax, 14, ".biorseo_bgsu_jar3d_C", ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA")
-        ax[10].axis("off")
-        ax[11].axis("off")
-
-        process_extension(ax, 17, ".biorseo_desc_byp_D",   ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA")
-        process_extension(ax, 18, ".biorseo_bgsu_byp_D",   ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA")
-        process_extension(ax, 19, ".biorseo_bgsu_jar3d_D", ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA")
-        ax[15].axis("off")
-        ax[16].axis("off")
         for a in ax:
             a.label_outer()
         plt.subplots_adjust(bottom=0.05, top=0.95, left=0.07, right=0.98, hspace=0.1, wspace = 0.05)
--- a/scripts/pareto_visualizer_json.py deleted 100644 → 0
View file @eda1ab3
+++ b/scripts/pareto_visualizer_json.py deleted 100644 → 0
View file @eda1ab3
-#!/usr/bin/python3
-# Created by Louis Becquey, louis.becquey@univ-evry.fr, Oct 2019
-# This script processes files containing RNA structures obtained from bi-objective
-# optimization programs, and a dot-bracket database of reference structures, to plot
-# where are the best solutions in the Pareto set.
-#
-# The result files should follow this kind of format:
-# for Biokop: (option --biokop)
-# Structure        Free energy score       Expected accuracy score
-# (((...(((...)))))) <tab> obj1_value <tab> obj2_value
-# (((............))) <tab> obj1_value <tab> obj2_value
-# ((((((...)))...))) <tab> obj1_value <tab> obj2_value
-# ...
-#
-# for BiORSEO: (options --biorseo_**stuff**)
-# >Header of the sequence
-# GGCACAGAGUUAUGUGCC
-# (((...(((...)))))) + Motif1 + Motif2 <tab> obj1_value <tab> obj2_value
-# (((............))) <tab> obj1_value <tab> obj2_value
-# ((((((...)))...))) + Motif1 <tab> obj1_value <tab> obj2_value
-#
-# typical Biokop usage:
-# python3 pareto_visualizer.py --biokop --folder path/to/your/results/folder --database path/to/the/database_file.dbn
-# typical Biorseo usage:
-# python3 pareto_visualizer_json.py --folder path/to/your/results/folder (pmE et pmF) --database path/to/the/database_file.dbn (nom, sequence, structure)
-#
-
-from math import sqrt
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib import cm 
-import scipy.stats as st
-import sys
-import os
-import subprocess
-import getopt
-
-class SecStruct:
-    def __init__(self, name, dot_bracket, contacts, obj1_value, obj2_value):
-        self.name = name
-        self.dbn = dot_bracket
-        self.ctc = contacts
-        self.objectives = [ obj1_value, obj2_value ]
-        self.basepair_list = self.get_basepairs()
-        self.length = len(dot_bracket)
-
-    def get_basepairs(self):
-        parenthesis = []
-        brackets = []
-        braces = []
-        rafters = []
-        basepairs = []
-        As = []
-        Bs = []
-        for i, c in enumerate(self.dbn):
-            if c == '(':
-                parenthesis.append(i)
-            if c == '[':
-                brackets.append(i)
-            if c == '{':
-                braces.append(i)
-            if c == '<':
-                rafters.append(i)
-            if c == 'A':
-                As.append(i)
-            if c == 'B':
-                Bs.append(i)
-            if c == '.':
-                continue
-            if c == ')':
-                basepairs.append((i, parenthesis.pop()))
-            if c == ']':
-                basepairs.append((i, brackets.pop()))
-            if c == '}':
-                basepairs.append((i, braces.pop()))
-            if c == '>':
-                basepairs.append((i, rafters.pop()))
-            if c == 'a':
-                basepairs.append((i, As.pop()))
-            if c == 'b':
-                basepairs.append((i, Bs.pop()))
-        return basepairs
-
-    def get_MCC_with(self, reference_structure):
-        # Get true and false positives and negatives
-        tp = 0
-        fp = 0
-        tn = 0
-        fn = 0
-        for bp in reference_structure.basepair_list:
-            if bp in self.basepair_list:
-                tp += 1
-            else:
-                fn += 1
-        for bp in self.basepair_list:
-            if bp not in reference_structure.basepair_list:
-                fp += 1
-        tn = reference_structure.length * (reference_structure.length - 1) * 0.5 - fp - fn - tp
-
-        # Compute MCC
-        if (tp+fp == 0):
-            print("We have an issue : no positives detected ! (linear structure)")
-        return (tp*tn-fp*fn) / sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))
-
-    def get_MCC_ctc_with(self, reference_structure):
-        # Get true and false positives and negatives
-        tp = 0
-        fp = 0
-        tn = 0
-        fn = 0
-        prediction = self.ctc
-        true_ctc = reference_structure.ctc
-        for i in range(len(true_ctc)):
-            if true_ctc[i] == '*' and prediction[i] == '*':
-                tp += 1
-            elif true_ctc[i] == '.' and prediction[i] == '.':
-                tn += 1
-            elif true_ctc[i] == '.' and prediction[i] == '*':
-                fp += 1
-            elif true_ctc[i] == '*' and prediction[i] == '.':
-                fn += 1
-        # print(str(tp) + " " + str(tn) + " " + str(fp) + " " + str(fn) + "\n")
-
-        result = (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
-        # Compute MCC
-        if ((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) == 0):
-            print("warning: division by zero!")
-            return None
-        elif (tp + fp == 0):
-            print("We have an issue : no positives detected ! (linear structure)")
-        return (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
-
-class Pareto:
-    def __init__(self, list_of_structs, reference):
-        self.predictions = list_of_structs
-        self.true_structure = reference
-        self.n_pred = len(list_of_structs)
-        self.max_obj1 = max([s.objectives[0] for s in self.predictions ])
-        self.max_obj2 = max([s.objectives[1] for s in self.predictions ])
-        self.index_of_best = self.find_best_solution()
-        self.index_of_best_ctc = self.find_best_solution_ctc()
-        
-    def find_best_solution(self):
-        # returns the index of the solution of the Pareto set which is the closest
-        # to the real 2D structure (the one with the max MCC)
-        max_i = -1
-        max_mcc = -1
-        for i,s in enumerate(self.predictions):
-            mcc = s.get_MCC_with(self.true_structure)
-            if mcc > max_mcc:
-                max_mcc = mcc
-                max_i = i
-        print("\n" + "max mcc str: " + str(max_mcc))
-        return max_i
-
-    def find_best_solution_ctc(self):
-        # returns the index of the solution of the Pareto set which is the closest
-        # to the real contacts area (the one with the max MCC)
-        max_i = -1
-        max_mcc = -1
-        for i,s in enumerate(self.predictions):
-            mcc = s.get_MCC_ctc_with(self.true_structure)
-            if mcc is None:
-                continue
-            elif mcc > max_mcc:
-                max_mcc = mcc
-                max_i = i
-        return max_i
-
-    def get_normalized_coords(self):
-        # retrieves the objective values of the best solution and normalizes them
-        coords = self.predictions[self.index_of_best].objectives
-        if self.max_obj1: # avoid divide by zero if all solutions are 0
-            x = coords[0]/self.max_obj1
-        else:
-            x = 0.5
-        if self.max_obj2: # avoid divide by zero if all solutions are 0
-            y = coords[1]/self.max_obj2
-        else:
-            y = 0.5
-        return ( x,y )
-
-    def get_normalized_coords_ctc(self):
-        CRED = '\033[91m'
-        CEND = '\033[0m'
-        CGREEN = '\33[32m'
-        CBLUE = '\33[34m'
-        # retrieves the objective values of the best solution and normalizes them
-        coords = self.predictions[self.index_of_best_ctc].objectives
-        if self.max_obj1: # avoid divide by zero if all solutions are 0
-            x = coords[0]/self.max_obj1
-        else:
-            x = 0.5
-        """if(x < 0.5):
-            print("\n" + CRED + self.predictions[self.index_of_best_ctc].name + CEND)
-            print(CRED + self.predictions[self.index_of_best_ctc].ctc + CEND)
-            print("count: " + str(self.predictions[self.index_of_best_ctc].ctc.count("*")))
-            print(CRED + self.true_structure.ctc + CEND)
-            print("count: " + str(self.true_structure.ctc.count("*")) + "\n")
-
-        elif(x >= 0.5 and type(self.predictions[self.index_of_best_ctc].ctc)) is str:
-            print("\n" + CGREEN + self.predictions[self.index_of_best_ctc].name + CEND)
-            print(CGREEN + self.predictions[self.index_of_best_ctc].ctc + CEND)
-            print("count: " + str(self.predictions[self.index_of_best_ctc].ctc.count("*")))
-            print(CGREEN + self.true_structure.ctc + CEND)
-            print("count: " + str(self.true_structure.ctc.count("*")) + "\n")"""
-
-        if self.max_obj2: # avoid divide by zero if all solutions are 0
-            y = coords[1]/self.max_obj2
-        else:
-            y = 0.5
-        return ( x,y )
-
-class RNA:
-    def __init__(self, filename, header, seq, struct, contacts):
-        self.seq_ = seq
-        self.header_ = header
-        self.struct_ = struct
-        self.contacts_ = contacts
-        self.basename_ = filename
-
-
-ignored_nt_dict = {}
-def is_canonical_nts(seq):
-    for c in seq[:-1]:
-        if c not in "ACGU":
-            if c in ignored_nt_dict.keys():
-                ignored_nt_dict[c] += 1
-            else:
-                ignored_nt_dict[c] = 1
-            return False
-    return True
-
-def is_canonical_bps(struct):
-    if "()" in struct:
-        return False
-    if "(.)" in struct:
-        return False
-    if "(..)" in struct:
-        return False
-    if "[]" in struct:
-        return False
-    if "[.]" in struct:
-        return False
-    if "[..]" in struct:
-        return False
-    return True
-
-def load_from_dbn(file, header_style=1):
-    container = []
-    counter = 0
-    db = open(file, "r")
-    c = 0
-    header = ""
-    seq = ""
-    struct = ""
-    while True:
-        l = db.readline()
-        if l == "":
-            break
-        c += 1
-        c = c % 4
-        if c == 1:
-            header = l[:-1]
-        if c == 2:
-            seq = l[:-1].upper()
-        if c == 3:
-            struct = l[:-1]
-            n = len(seq)
-        if c == 0:
-            contacts = l[:-1]
-            if is_canonical_nts(seq) and is_canonical_bps(struct):
-                if header_style == 1: container.append(RNA(header.replace('/', '_').split('(')[-1][:-1], header, seq, struct, contacts))
-                if header_style == 2: container.append(RNA(header.replace('/', '_').split('[')[-1][:-41], header, seq, struct, contacts))
-                if '[' in struct: counter += 1
-    db.close()
-    return container, counter
-
-def parse_biokop(folder, basename, ext=".biok"):
-    solutions = []
-    if os.path.isfile(os.path.join(folder, basename + ext)):
-        rna = open(os.path.join(folder, basename + ext), "r")
-        lines = rna.readlines()
-        rna.close()
-        different_2ds = []
-        for s in lines[1:]:
-            if s == '\n':
-                continue
-            splitted = s.split('\t')
-            db2d = splitted[0]
-            if db2d not in different_2ds:
-                different_2ds.append(db2d)
-            # here is a negative sign because Biokop actually minimizes -MEA instead
-            # of maximizing MEA : we switch back to MEA
-            solutions.append(SecStruct(basename, db2d, -float(splitted[1]), -float(splitted[2][:-1])))
-
-        # check the range of MEA in this pareto set
-        min_mea = solutions[0].objectives[1]
-        max_mea = min_mea
-        for s in solutions:
-            mea = s.objectives[1]
-            if mea < min_mea:
-                min_mea = mea
-            if mea > max_mea:
-                max_mea = mea
-
-        # normalize so the minimum MEA of the set is 0
-        for i in range(len(solutions)):
-            solutions[i].objectives[1] -= min_mea
-
-        if len(different_2ds) > 1:
-            return solutions
-        else:
-            print("[%s] \033[36mWARNING: ignoring this RNA, only one 2D solution is found.\033[0m" % (basename))
-    else:
-        print("[%s] \033[36mWARNING: file not found !\033[0m" % (basename))
-
-def parse_biorseo(folder, basename, ext):
-    solutions = []
-    print(basename + ext)
-    if os.path.isfile(os.path.join(folder, basename + ext)):
-        rna = open(os.path.join(folder, basename + ext), "r")
-        lines = rna.readlines()
-        rna.close()
-        different_2ds = []
-        contacts = []
-        str2d = []
-        count = 0;
-        for s in lines[2:]:
-            count = count + 1
-            if s == '\n':
-                continue
-            splitted = s.split('\t')
-            if(count % 2 == 1):
-                obj1 = float(splitted[1])
-                obj2 = float(splitted[2][:-1])
-            db2d = splitted[0].split(' ')[0]
-            if db2d not in different_2ds:
-                if(s.find('(') != -1):
-                    different_2ds.append(db2d)
-            if(s.find('*') != -1):
-                contacts = db2d
-                solutions.append(SecStruct(basename, str2d, contacts, obj1, obj2))
-            elif(s.find('(') != -1):
-                str2d = db2d
-        if len(different_2ds) > 1:
-            return solutions
-        else:
-            print("[%s] \033[36mWARNING: ignoring this RNA, only one 2D or contacts solution is found.\033[0m" % (basename))
-    else:
-        print("[%s] \033[36mWARNING: file not found !\033[0m" % (basename))
-    return None
-
-def prettify_biorseo(code):
-    name = ""
-    if "bgsu" in code:
-        name += "RNA 3D Motif Atlas + "
-    elif "json" in code:
-        name += "Motifs d'Isaure + Direct P.M"
-    else:
-        name += "Rna3Dmotifs + "
-    if "raw" in code:
-        name += "Direct P.M."
-    if "byp" in code:
-        name += "BPairing"
-    if "jar3d" in code:
-        name += "Jar3d"
-    # name += " + $f_{1" + code[-1] + "}$"
-    return name
-
-# Parse options
-try:
-    opts, args = getopt.getopt( sys.argv[1:], "", 
-                             [  "json_pmE",
-                                "json_pmF",
-                                "folder=",
-                                "database=",
-                                "output="
-                             ])
-except getopt.GetoptError as err:
-    print(err)
-    sys.exit(2)
-
-results_folder = "."
-extension = "all"
-outputf = ""
-for opt, arg in opts:
-    if opt == "--biokop":
-        extension = ".biok"
-        parse = parse_biokop
-    elif opt == "--folder":
-        results_folder = arg
-    elif opt == "--database":
-        database = arg
-    elif opt == "--output":
-        outputf = arg
-    else:
-        extension = '.' + opt[2:]
-        parse = parse_biorseo
-
-RNAcontainer, _ = load_from_dbn(database)
-
-if results_folder[-1] != '/':
-    results_folder = results_folder + '/'
-if outputf == "":
-    outputf = results_folder
-if outputf[-1] != '/':
-    outputf = outputf + '/'
-
-def process_extension(ax, pos, ext, nsolutions=False, xlabel="Best solution performs\nwell on obj1", ylabel="Best solution performs\n well on obj2"):
-    points = []
-    sizes = []
-    for rna in RNAcontainer:
-        # Extracting the predictions from the results file
-        solutions = parse(results_folder, rna.basename_, ext)
-        reference = SecStruct(rna.basename_, rna.struct_, rna.contacts_, float("inf"), float("inf"))
-        if solutions is None:
-            continue
-        pset = Pareto(solutions, reference)
-        points.append(pset.get_normalized_coords())
-        sizes.append(pset.n_pred)
-        print("[%s] Loaded %d solutions in a Pareto set, max(obj1)=%f, max(obj2)=%f" % (rna.basename_, pset.n_pred, pset.max_obj1, pset.max_obj2))
-    print("Loaded %d points on %d." % (len(points), len(RNAcontainer)))
-
-    x = np.array([ p[0] for p in points ])
-    y = np.array([ p[1] for p in points ])
-    xmin, xmax = 0, 1
-    ymin, ymax = 0, 1
-    xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
-    positions = np.vstack([xx.ravel(), yy.ravel()])
-    values = np.vstack([x, y])
-    kernel = st.gaussian_kde(values)
-    f = np.reshape(kernel(positions).T, xx.shape)
-    ax[pos].axhline(y=0, alpha=0.2, color='black')
-    ax[pos].axhline(y=1, alpha=0.2, color='black')
-    ax[pos].axvline(x=0, alpha=0.2, color='black')
-    ax[pos].axvline(x=1, alpha=0.2, color='black')
-    ax[pos].contourf(xx, yy, f, cmap=cm.Blues, alpha=0.5)
-    ax[pos].scatter(x, y, s=25, alpha=0.1)
-    ax[pos].set_xlim((-0.1,1.1))
-    ax[pos].set_ylim((-0.1,1.1))
-    ax[pos].set_title(prettify_biorseo(ext[1:]), fontsize=10)
-    ax[pos].annotate("("+str(len(points))+'/'+str(len(RNAcontainer))+" RNAs)", (0.08, 0.15))
-    ax[pos].set_xlabel(xlabel)
-    ax[pos].set_ylabel(ylabel)
-
-    if nsolutions:
-        ax[pos+1].hist(sizes, bins=range(0, max(sizes)+1, 2), histtype='bar')
-        ax[pos+1].set_xlim((0,max(sizes)+2))
-        ax[pos+1].set_xticks(range(0, max(sizes), 10))
-        ax[pos+1].set_xticklabels(range(0, max(sizes), 10), rotation=90)
-        ax[pos+1].set_xlabel("# solutions")
-        ax[pos+1].set_ylabel("# RNAs")
-
-def process_extension_ctc(ax, pos, ext, nsolutions=False, xlabel="Best solution performs\nwell on obj1", ylabel="Best solution performs\n well on obj2"):
-    points = []
-    sizes = []
-    for rna in RNAcontainer:
-        # Extracting the predictions from the results file
-        solutions = parse(results_folder, rna.basename_, ext)
-        reference = SecStruct(rna.basename_, rna.struct_, rna.contacts_, float("inf"), float("inf"))
-        if solutions is None:
-            continue
-        pset = Pareto(solutions, reference)
-        points.append(pset.get_normalized_coords_ctc())
-        sizes.append(pset.n_pred)
-        print("[%s] Loaded %d solutions in a Pareto set, max(obj1)=%f, max(obj2)=%f" % (rna.basename_, pset.n_pred, pset.max_obj1, pset.max_obj2))
-    print("Loaded %d points on %d." % (len(points), len(RNAcontainer)))
-
-    x = np.array([ p[0] for p in points ])
-    y = np.array([ p[1] for p in points ])
-    xmin, xmax = 0, 1
-    ymin, ymax = 0, 1
-    xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
-    positions = np.vstack([xx.ravel(), yy.ravel()])
-    values = np.vstack([x, y])
-    kernel = st.gaussian_kde(values)
-    f = np.reshape(kernel(positions).T, xx.shape)
-    ax[pos].axhline(y=0, alpha=0.2, color='black')
-    ax[pos].axhline(y=1, alpha=0.2, color='black')
-    ax[pos].axvline(x=0, alpha=0.2, color='black')
-    ax[pos].axvline(x=1, alpha=0.2, color='black')
-    ax[pos].contourf(xx, yy, f, cmap=cm.Blues, alpha=0.5)
-    ax[pos].scatter(x, y, s=25, alpha=0.1)
-    ax[pos].set_xlim((-0.1,1.1))
-    ax[pos].set_ylim((-0.1,1.1))
-    ax[pos].set_title(prettify_biorseo(ext[1:]), fontsize=10)
-    ax[pos].annotate("("+str(len(points))+'/'+str(len(RNAcontainer))+" RNAs)", (0.08,0.15))
-    ax[pos].set_xlabel(xlabel)
-    ax[pos].set_ylabel(ylabel)
-
-    if nsolutions:
-        ax[pos+1].hist(sizes, bins=range(0, max(sizes)+1, 2), histtype='bar')
-        ax[pos+1].set_xlim((0,max(sizes)+2))
-        ax[pos+1].set_xticks(range(0, max(sizes), 10))
-        ax[pos+1].set_xticklabels(range(0, max(sizes), 10), rotation=90)
-        ax[pos+1].set_xlabel("# solutions")
-        ax[pos+1].set_ylabel("# RNAs")
-
-
-if extension == "all":
-    parse = parse_biorseo
-    fig, ax = plt.subplots(1, 2, figsize=(10, 5), sharey=True)
-    ax = ax.flatten()
-    process_extension(ax, 0, ".json_pmF_MEA", xlabel="Normalized $f_{1E}$", ylabel="Normalized MEA")
-    print("--------------------------------------------------------------------------------------------")
-    process_extension_ctc(ax, 1, ".json_pmF_MEA", xlabel="Normalized $f_{1E}$", ylabel="Normalized MEA")
-    print("--------------------------------------------------------------------------------------------")
-
-    for a in ax:
-        a.label_outer()
-    plt.subplots_adjust(bottom=0.2, top=0.9, left=0.07, right=0.98, hspace=0.05, wspace=0.05)
-    plt.savefig("pareto_visualizer_json_MEA_functionF.png")
-else:
-    fig, ax = plt.subplots(2,1, figsize=(6,5))
-    plt.subplots_adjust(bottom=0.12, top=0.9, left=0.15, right=0.9, hspace=0.4)
-    if extension == ".biok":
-        process_extension(ax, 0, extension, nsolutions=True, xlabel="Normalized MFE", ylabel="Normalized MFE")
-    else:
-        process_extension(ax, 0, extension, nsolutions=False)
-    plt.savefig("pareto_visualizer_ext.png")
\ No newline at end of file