cleaned scripts/ and figures/ folders

What are this RNA data files ?
## Raw (big) databases
* RNA-Strand 2.0 (secondary_structures_database.dbn) : this file is a dataset supposed to be identical to RNA-Strand 2.0 (actually the file is present on IBISC machines for years now and nobody remembers how it was built). The former RNA Strand website is not online anymore (http://rnasoft.ca/strand).
* bpRNA-1m_90 : this huge database gathers the data from other databases (CRW, PDB, Rfam, RNP, SPR, SRP, ...) and superseeds RNA-Strand (minus the structures that are only in NDB, sadly). Sequences have been prefiltered to have no more than 90% identity. Source : http://bprna.cgrb.oregonstate.edu/
* Pseudobase(++) : A database of biologically validated pseudoknots, from the time discovering a pseudoknot was something unusual. Pseudobase stays famous for its pseudoknot classification scheme. I scraped it myself to build the file. Source : https://www.ekevanbatenburg.nl/PKBASE/PKB.HTML
## Filtered databases
* verified_secondary_structures.dbn : The subset of RNA-Strand that was experimentally validated (basically, the ones for which a 3D structure was available, so the ones from NDB and PDB).
* The _short.dbn ones : Same as its parent, but filtered using the filter.py script.
* pseudoknots.dbn : Audrey Legendre's scrap of Pseudobase, which, for an unknow reason, does not contain all the available data, but nice descriptions of what the RNAs are.
## Small test databases
* RNA-MoIP dataset : The cherry-picked cases presented in Reinhartz et al. 2012 to show RNA-MoIP's performance.
* applications.dbn : My cherry-picked cases presented in Becquey et al. 2020 to show Biorseo's performance.
* example.dbn : an example database with only one RNA, for testing purposes
* nothing.dbn : an example database with no RNAs, for testing purposes
Enjoy benchmarking RNA structure prediction tools.
\ No newline at end of file

99.5 KB | W: | H:

156 KB | W: | H:

  • 2-up
  • Swipe
  • Onion skin

32.4 KB | W: | H:

30.4 KB | W: | H:

  • 2-up
  • Swipe
  • Onion skin
......@@ -158,7 +158,6 @@ def is_canonical_nts(seq):
return False
return True
def is_canonical_bps(struct):
if "()" in struct:
return False
......@@ -207,7 +206,6 @@ def load_from_dbn(file, header_style=3):
return container, pkcounter
def parse_biokop(folder, basename, ext=".biok"):
solutions = []
err = 0
......@@ -248,7 +246,6 @@ def parse_biokop(folder, basename, ext=".biok"):
err = 1
return None, err
def parse_biorseo(folder, basename, ext):
solutions = []
err = 0
......@@ -272,21 +269,14 @@ def parse_biorseo(folder, basename, ext):
err = 1
return None, err
def prettify_biorseo(code):
name = ""
if "bgsu" in code:
name += "RNA 3D Motif Atlas + "
if "json" in code:
name += "JSON motifs + "
elif "rin" in code:
name += "CaRNAval + "
name += "Rna3Dmotifs + "
if "raw" in code:
name += "Direct P.M."
if "byp" in code:
name += "BPairing"
if "jar3d" in code:
name += "Jar3d"
# name += " + $f_{1" + code[-1] + "}$"
return name
......@@ -342,14 +332,9 @@ def process_extension(ax, pos, ext, nsolutions=False, xlabel="Best solution perf
if __name__ == "__main__":
opts, args = getopt.getopt( sys.argv[1:], "",
[ "biorseo_desc_byp_A", "biorseo_desc_byp_B",
"biorseo_desc_byp_C", "biorseo_desc_byp_D",
"biorseo_bgsu_byp_A", "biorseo_bgsu_byp_B",
"biorseo_bgsu_byp_C", "biorseo_bgsu_byp_D",
"biorseo_desc_raw_A", "biorseo_desc_raw_B",
"biorseo_bgsu_jar3d_A", "biorseo_bgsu_jar3d_B",
"biorseo_bgsu_jar3d_C", "biorseo_bgsu_jar3d_D",
"biorseo_rin_raw_A", "biorseo_rin_raw_B",
[ "biorseo_desc_A", "biorseo_desc_B",
"biorseo_rin_A", "biorseo_rin_B",
"biorseo_json_A", "biorseo_json_B",
"biokop", "folder=", "database=", "output="
except getopt.GetoptError as err:
......@@ -384,36 +369,19 @@ if __name__ == "__main__":
if extension == "all":
parse = parse_biorseo
fig, ax = plt.subplots(4,5,figsize=(12,10), sharex=True, sharey=True)
fig, ax = plt.subplots(2,3,figsize=(8,10), sharex=True, sharey=True)
ax = ax.flatten()
process_extension(ax, 0, ".biorseo_desc_raw_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
process_extension(ax, 1, ".biorseo_rin_raw_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
process_extension(ax, 2, ".biorseo_desc_byp_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
process_extension(ax, 3, ".biorseo_bgsu_byp_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
process_extension(ax, 4, ".biorseo_bgsu_jar3d_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
ax[0].set_title(prettify_biorseo("biorseo_desc_raw_A"), fontsize=10)
ax[1].set_title(prettify_biorseo("biorseo_rin_raw_A"), fontsize=10)
ax[2].set_title(prettify_biorseo("biorseo_desc_byp_A"), fontsize=10)
ax[3].set_title(prettify_biorseo("biorseo_bgsu_byp_A"), fontsize=10)
ax[4].set_title(prettify_biorseo("biorseo_bgsu_jar3d_A"), fontsize=10)
process_extension(ax, 5, ".biorseo_desc_raw_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
process_extension(ax, 6, ".biorseo_rin_raw_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
process_extension(ax, 7, ".biorseo_desc_byp_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
process_extension(ax, 8, ".biorseo_bgsu_byp_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
process_extension(ax, 9, ".biorseo_bgsu_jar3d_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
process_extension(ax, 12, ".biorseo_desc_byp_C", ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA")
process_extension(ax, 13, ".biorseo_bgsu_byp_C", ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA")
process_extension(ax, 14, ".biorseo_bgsu_jar3d_C", ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA")
process_extension(ax, 17, ".biorseo_desc_byp_D", ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA")
process_extension(ax, 18, ".biorseo_bgsu_byp_D", ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA")
process_extension(ax, 19, ".biorseo_bgsu_jar3d_D", ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA")
process_extension(ax, 0, ".biorseo_desc_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
process_extension(ax, 1, ".biorseo_rin_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
process_extension(ax, 2, ".biorseo_json_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
ax[0].set_title(prettify_biorseo("biorseo_desc_A"), fontsize=10)
ax[1].set_title(prettify_biorseo("biorseo_rin_A"), fontsize=10)
ax[2].set_title(prettify_biorseo("biorseo_json_A"), fontsize=10)
process_extension(ax, 3, ".biorseo_desc_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
process_extension(ax, 4, ".biorseo_rin_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
process_extension(ax, 5, ".biorseo_json_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
for a in ax:
plt.subplots_adjust(bottom=0.05, top=0.95, left=0.07, right=0.98, hspace=0.1, wspace = 0.05)
# Created by Louis Becquey, louis.becquey@univ-evry.fr, Oct 2019
# This script processes files containing RNA structures obtained from bi-objective
# optimization programs, and a dot-bracket database of reference structures, to plot
# where are the best solutions in the Pareto set.
# The result files should follow this kind of format:
# for Biokop: (option --biokop)
# Structure Free energy score Expected accuracy score
# (((...(((...)))))) <tab> obj1_value <tab> obj2_value
# (((............))) <tab> obj1_value <tab> obj2_value
# ((((((...)))...))) <tab> obj1_value <tab> obj2_value
# ...
# for BiORSEO: (options --biorseo_**stuff**)
# >Header of the sequence
# (((...(((...)))))) + Motif1 + Motif2 <tab> obj1_value <tab> obj2_value
# (((............))) <tab> obj1_value <tab> obj2_value
# ((((((...)))...))) + Motif1 <tab> obj1_value <tab> obj2_value
# typical Biokop usage:
# python3 pareto_visualizer.py --biokop --folder path/to/your/results/folder --database path/to/the/database_file.dbn
# typical Biorseo usage:
# python3 pareto_visualizer_json.py --folder path/to/your/results/folder (pmE et pmF) --database path/to/the/database_file.dbn (nom, sequence, structure)
from math import sqrt
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import scipy.stats as st
import sys
import os
import subprocess
import getopt
class SecStruct:
def __init__(self, name, dot_bracket, contacts, obj1_value, obj2_value):
self.name = name
self.dbn = dot_bracket
self.ctc = contacts
self.objectives = [ obj1_value, obj2_value ]
self.basepair_list = self.get_basepairs()
self.length = len(dot_bracket)
def get_basepairs(self):
parenthesis = []
brackets = []
braces = []
rafters = []
basepairs = []
As = []
Bs = []
for i, c in enumerate(self.dbn):
if c == '(':
if c == '[':
if c == '{':
if c == '<':
if c == 'A':
if c == 'B':
if c == '.':
if c == ')':
basepairs.append((i, parenthesis.pop()))
if c == ']':
basepairs.append((i, brackets.pop()))
if c == '}':
basepairs.append((i, braces.pop()))
if c == '>':
basepairs.append((i, rafters.pop()))
if c == 'a':
basepairs.append((i, As.pop()))
if c == 'b':
basepairs.append((i, Bs.pop()))
return basepairs
def get_MCC_with(self, reference_structure):
# Get true and false positives and negatives
tp = 0
fp = 0
tn = 0
fn = 0
for bp in reference_structure.basepair_list:
if bp in self.basepair_list:
tp += 1
fn += 1
for bp in self.basepair_list:
if bp not in reference_structure.basepair_list:
fp += 1
tn = reference_structure.length * (reference_structure.length - 1) * 0.5 - fp - fn - tp
# Compute MCC
if (tp+fp == 0):
print("We have an issue : no positives detected ! (linear structure)")
return (tp*tn-fp*fn) / sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))
def get_MCC_ctc_with(self, reference_structure):
# Get true and false positives and negatives
tp = 0
fp = 0
tn = 0
fn = 0
prediction = self.ctc
true_ctc = reference_structure.ctc
for i in range(len(true_ctc)):
if true_ctc[i] == '*' and prediction[i] == '*':
tp += 1
elif true_ctc[i] == '.' and prediction[i] == '.':
tn += 1
elif true_ctc[i] == '.' and prediction[i] == '*':
fp += 1
elif true_ctc[i] == '*' and prediction[i] == '.':
fn += 1
# print(str(tp) + " " + str(tn) + " " + str(fp) + " " + str(fn) + "\n")
result = (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
# Compute MCC
if ((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) == 0):
print("warning: division by zero!")
return None
elif (tp + fp == 0):
print("We have an issue : no positives detected ! (linear structure)")
return (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
class Pareto:
def __init__(self, list_of_structs, reference):
self.predictions = list_of_structs
self.true_structure = reference
self.n_pred = len(list_of_structs)
self.max_obj1 = max([s.objectives[0] for s in self.predictions ])
self.max_obj2 = max([s.objectives[1] for s in self.predictions ])
self.index_of_best = self.find_best_solution()
self.index_of_best_ctc = self.find_best_solution_ctc()
def find_best_solution(self):
# returns the index of the solution of the Pareto set which is the closest
# to the real 2D structure (the one with the max MCC)
max_i = -1
max_mcc = -1
for i,s in enumerate(self.predictions):
mcc = s.get_MCC_with(self.true_structure)
if mcc > max_mcc:
max_mcc = mcc
max_i = i
print("\n" + "max mcc str: " + str(max_mcc))
return max_i
def find_best_solution_ctc(self):
# returns the index of the solution of the Pareto set which is the closest
# to the real contacts area (the one with the max MCC)
max_i = -1
max_mcc = -1
for i,s in enumerate(self.predictions):
mcc = s.get_MCC_ctc_with(self.true_structure)
if mcc is None:
elif mcc > max_mcc:
max_mcc = mcc
max_i = i
return max_i
def get_normalized_coords(self):
# retrieves the objective values of the best solution and normalizes them
coords = self.predictions[self.index_of_best].objectives
if self.max_obj1: # avoid divide by zero if all solutions are 0
x = coords[0]/self.max_obj1
x = 0.5
if self.max_obj2: # avoid divide by zero if all solutions are 0
y = coords[1]/self.max_obj2
y = 0.5
return ( x,y )
def get_normalized_coords_ctc(self):
CRED = '\033[91m'
CEND = '\033[0m'
CGREEN = '\33[32m'
CBLUE = '\33[34m'
# retrieves the objective values of the best solution and normalizes them
coords = self.predictions[self.index_of_best_ctc].objectives
if self.max_obj1: # avoid divide by zero if all solutions are 0
x = coords[0]/self.max_obj1
x = 0.5
"""if(x < 0.5):
print("\n" + CRED + self.predictions[self.index_of_best_ctc].name + CEND)
print(CRED + self.predictions[self.index_of_best_ctc].ctc + CEND)
print("count: " + str(self.predictions[self.index_of_best_ctc].ctc.count("*")))
print(CRED + self.true_structure.ctc + CEND)
print("count: " + str(self.true_structure.ctc.count("*")) + "\n")
elif(x >= 0.5 and type(self.predictions[self.index_of_best_ctc].ctc)) is str:
print("\n" + CGREEN + self.predictions[self.index_of_best_ctc].name + CEND)
print(CGREEN + self.predictions[self.index_of_best_ctc].ctc + CEND)
print("count: " + str(self.predictions[self.index_of_best_ctc].ctc.count("*")))
print(CGREEN + self.true_structure.ctc + CEND)
print("count: " + str(self.true_structure.ctc.count("*")) + "\n")"""
if self.max_obj2: # avoid divide by zero if all solutions are 0
y = coords[1]/self.max_obj2
y = 0.5
return ( x,y )
class RNA:
def __init__(self, filename, header, seq, struct, contacts):
self.seq_ = seq
self.header_ = header
self.struct_ = struct
self.contacts_ = contacts
self.basename_ = filename
ignored_nt_dict = {}
def is_canonical_nts(seq):
for c in seq[:-1]:
if c not in "ACGU":
if c in ignored_nt_dict.keys():
ignored_nt_dict[c] += 1
ignored_nt_dict[c] = 1
return False
return True
def is_canonical_bps(struct):
if "()" in struct:
return False
if "(.)" in struct:
return False
if "(..)" in struct:
return False
if "[]" in struct:
return False
if "[.]" in struct:
return False
if "[..]" in struct:
return False
return True
def load_from_dbn(file, header_style=1):
container = []
counter = 0
db = open(file, "r")
c = 0
header = ""
seq = ""
struct = ""
while True:
l = db.readline()
if l == "":
c += 1
c = c % 4
if c == 1:
header = l[:-1]
if c == 2:
seq = l[:-1].upper()
if c == 3:
struct = l[:-1]
n = len(seq)
if c == 0:
contacts = l[:-1]
if is_canonical_nts(seq) and is_canonical_bps(struct):
if header_style == 1: container.append(RNA(header.replace('/', '_').split('(')[-1][:-1], header, seq, struct, contacts))
if header_style == 2: container.append(RNA(header.replace('/', '_').split('[')[-1][:-41], header, seq, struct, contacts))
if '[' in struct: counter += 1
return container, counter
def parse_biokop(folder, basename, ext=".biok"):
solutions = []
if os.path.isfile(os.path.join(folder, basename + ext)):
rna = open(os.path.join(folder, basename + ext), "r")
lines = rna.readlines()
different_2ds = []
for s in lines[1:]:
if s == '\n':
splitted = s.split('\t')
db2d = splitted[0]
if db2d not in different_2ds:
# here is a negative sign because Biokop actually minimizes -MEA instead
# of maximizing MEA : we switch back to MEA
solutions.append(SecStruct(basename, db2d, -float(splitted[1]), -float(splitted[2][:-1])))
# check the range of MEA in this pareto set
min_mea = solutions[0].objectives[1]
max_mea = min_mea
for s in solutions:
mea = s.objectives[1]
if mea < min_mea:
min_mea = mea
if mea > max_mea:
max_mea = mea
# normalize so the minimum MEA of the set is 0
for i in range(len(solutions)):
solutions[i].objectives[1] -= min_mea
if len(different_2ds) > 1:
return solutions
print("[%s] \033[36mWARNING: ignoring this RNA, only one 2D solution is found.\033[0m" % (basename))
print("[%s] \033[36mWARNING: file not found !\033[0m" % (basename))
def parse_biorseo(folder, basename, ext):
solutions = []
print(basename + ext)
if os.path.isfile(os.path.join(folder, basename + ext)):
rna = open(os.path.join(folder, basename + ext), "r")
lines = rna.readlines()
different_2ds = []
contacts = []
str2d = []
count = 0;
for s in lines[2:]:
count = count + 1
if s == '\n':
splitted = s.split('\t')
if(count % 2 == 1):
obj1 = float(splitted[1])
obj2 = float(splitted[2][:-1])
db2d = splitted[0].split(' ')[0]
if db2d not in different_2ds:
if(s.find('(') != -1):
if(s.find('*') != -1):
contacts = db2d
solutions.append(SecStruct(basename, str2d, contacts, obj1, obj2))
elif(s.find('(') != -1):
str2d = db2d
if len(different_2ds) > 1:
return solutions
print("[%s] \033[36mWARNING: ignoring this RNA, only one 2D or contacts solution is found.\033[0m" % (basename))
print("[%s] \033[36mWARNING: file not found !\033[0m" % (basename))
return None
def prettify_biorseo(code):
name = ""
if "bgsu" in code:
name += "RNA 3D Motif Atlas + "
elif "json" in code:
name += "Motifs d'Isaure + Direct P.M"
name += "Rna3Dmotifs + "
if "raw" in code:
name += "Direct P.M."
if "byp" in code:
name += "BPairing"
if "jar3d" in code:
name += "Jar3d"
# name += " + $f_{1" + code[-1] + "}$"
return name
# Parse options
opts, args = getopt.getopt( sys.argv[1:], "",
[ "json_pmE",
except getopt.GetoptError as err:
results_folder = "."
extension = "all"
outputf = ""
for opt, arg in opts:
if opt == "--biokop":
extension = ".biok"
parse = parse_biokop
elif opt == "--folder":
results_folder = arg
elif opt == "--database":
database = arg
elif opt == "--output":
outputf = arg
extension = '.' + opt[2:]
parse = parse_biorseo
RNAcontainer, _ = load_from_dbn(database)
if results_folder[-1] != '/':
results_folder = results_folder + '/'
if outputf == "":
outputf = results_folder
if outputf[-1] != '/':
outputf = outputf + '/'
def process_extension(ax, pos, ext, nsolutions=False, xlabel="Best solution performs\nwell on obj1", ylabel="Best solution performs\n well on obj2"):
points = []
sizes = []
for rna in RNAcontainer:
# Extracting the predictions from the results file
solutions = parse(results_folder, rna.basename_, ext)
reference = SecStruct(rna.basename_, rna.struct_, rna.contacts_, float("inf"), float("inf"))
if solutions is None:
pset = Pareto(solutions, reference)
print("[%s] Loaded %d solutions in a Pareto set, max(obj1)=%f, max(obj2)=%f" % (rna.basename_, pset.n_pred, pset.max_obj1, pset.max_obj2))
print("Loaded %d points on %d." % (len(points), len(RNAcontainer)))
x = np.array([ p[0] for p in points ])
y = np.array([ p[1] for p in points ])
xmin, xmax = 0, 1
ymin, ymax = 0, 1
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values)
f = np.reshape(kernel(positions).T, xx.shape)
ax[pos].axhline(y=0, alpha=0.2, color='black')
ax[pos].axhline(y=1, alpha=0.2, color='black')
ax[pos].axvline(x=0, alpha=0.2, color='black')
ax[pos].axvline(x=1, alpha=0.2, color='black')
ax[pos].contourf(xx, yy, f, cmap=cm.Blues, alpha=0.5)
ax[pos].scatter(x, y, s=25, alpha=0.1)
ax[pos].set_title(prettify_biorseo(ext[1:]), fontsize=10)
ax[pos].annotate("("+str(len(points))+'/'+str(len(RNAcontainer))+" RNAs)", (0.08, 0.15))
if nsolutions:
ax[pos+1].hist(sizes, bins=range(0, max(sizes)+1, 2), histtype='bar')
ax[pos+1].set_xticks(range(0, max(sizes), 10))
ax[pos+1].set_xticklabels(range(0, max(sizes), 10), rotation=90)
ax[pos+1].set_xlabel("# solutions")
ax[pos+1].set_ylabel("# RNAs")
def process_extension_ctc(ax, pos, ext, nsolutions=False, xlabel="Best solution performs\nwell on obj1", ylabel="Best solution performs\n well on obj2"):
points = []
sizes = []
for rna in RNAcontainer:
# Extracting the predictions from the results file
solutions = parse(results_folder, rna.basename_, ext)
reference = SecStruct(rna.basename_, rna.struct_, rna.contacts_, float("inf"), float("inf"))
if solutions is None:
pset = Pareto(solutions, reference)
print("[%s] Loaded %d solutions in a Pareto set, max(obj1)=%f, max(obj2)=%f" % (rna.basename_, pset.n_pred, pset.max_obj1, pset.max_obj2))
print("Loaded %d points on %d." % (len(points), len(RNAcontainer)))
x = np.array([ p[0] for p in points ])
y = np.array([ p[1] for p in points ])
xmin, xmax = 0, 1
ymin, ymax = 0, 1
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values)
f = np.reshape(kernel(positions).T, xx.shape)
ax[pos].axhline(y=0, alpha=0.2, color='black')
ax[pos].axhline(y=1, alpha=0.2, color='black')
ax[pos].axvline(x=0, alpha=0.2, color='black')
ax[pos].axvline(x=1, alpha=0.2, color='black')
ax[pos].contourf(xx, yy, f, cmap=cm.Blues, alpha=0.5)
ax[pos].scatter(x, y, s=25, alpha=0.1)
ax[pos].set_title(prettify_biorseo(ext[1:]), fontsize=10)
ax[pos].annotate("("+str(len(points))+'/'+str(len(RNAcontainer))+" RNAs)", (0.08,0.15))
if nsolutions:
ax[pos+1].hist(sizes, bins=range(0, max(sizes)+1, 2), histtype='bar')
ax[pos+1].set_xticks(range(0, max(sizes), 10))
ax[pos+1].set_xticklabels(range(0, max(sizes), 10), rotation=90)
ax[pos+1].set_xlabel("# solutions")
ax[pos+1].set_ylabel("# RNAs")
if extension == "all":
parse = parse_biorseo
fig, ax = plt.subplots(1, 2, figsize=(10, 5), sharey=True)
ax = ax.flatten()
process_extension(ax, 0, ".json_pmF_MEA", xlabel="Normalized $f_{1E}$", ylabel="Normalized MEA")
process_extension_ctc(ax, 1, ".json_pmF_MEA", xlabel="Normalized $f_{1E}$", ylabel="Normalized MEA")
for a in ax:
plt.subplots_adjust(bottom=0.2, top=0.9, left=0.07, right=0.98, hspace=0.05, wspace=0.05)
fig, ax = plt.subplots(2,1, figsize=(6,5))
plt.subplots_adjust(bottom=0.12, top=0.9, left=0.15, right=0.9, hspace=0.4)
if extension == ".biok":
process_extension(ax, 0, extension, nsolutions=True, xlabel="Normalized MFE", ylabel="Normalized MFE")
process_extension(ax, 0, extension, nsolutions=False)
\ No newline at end of file