Louis BECQUEY

removed stuff from biorseo

1 -results_*
2 -build_BiORSEO_docker_image_ubuntu18.sh
3 -deploy_BiORSEO_docker_image_linux.sh
4 -INSTALL.md
5 -Readme.md
6 -benchmark_results/
7 -doc/
1 -FROM ubuntu:bionic
2 -
3 -# installing dependencies
4 -RUN apt-get update -yq && \
5 - apt-get upgrade -y && \
6 - apt-get install -y python3-dev python3-pip openjdk-11-jre libgsl23 libgslcblas0 libboost-program-options-dev libboost-filesystem-dev && \
7 - rm -rf /var/lib/apt/lists/*
8 -
9 -# compiled biorseo
10 -COPY . /biorseo
11 -# ViennaRNA installer
12 -ADD "https://www.tbi.univie.ac.at/RNA/download/ubuntu/ubuntu_18_04/viennarna_2.4.14-1_amd64.deb" /
13 -# jar3d archive
14 -ADD http://rna.bgsu.edu/data/jar3d/models/jar3d_2014-12-11.jar /
15 -
16 -# install codes
17 -RUN dpkg -i /viennarna_2.4.14-1_amd64.deb && \
18 - apt-get install -f && \
19 - \
20 - pip3 install networkx numpy regex wrapt biopython /biorseo/BayesPairing && \
21 - \
22 - cd / && \
23 - rm -rf /biorseo/BayesPairing /ViennaRNA-2.4.13 /ViennaRNA-2.4.13.tar.gz
24 -WORKDIR /biorseo
...\ No newline at end of file ...\ No newline at end of file
1 -#!/usr/bin/python3
2 -#coding=utf-8
3 -
4 -# typical usage : ./benchmark.py data/sec_structs/verified_secondary_structures_database.dbn data/sec_structs/pseudoknots.dbn data/sec_structs/applications.dbn
5 -
6 -# the .dbn files should be formatted the following way:
7 -# > header of the sequence (somecode)
8 -# ACGUACGUACGUACGUACGU
9 -# ...(((...((...))))).
10 -# > header of the next sequence (somecode2)
11 -# ACGUACGUACGGGCGUACGU
12 -# ...(((..........))).
13 -
14 -from sys import argv
15 -from scipy import stats
16 -from tqdm import tqdm
17 -import subprocess
18 -from os import path, makedirs, getcwd, chdir, devnull
19 -import numpy as np
20 -import matplotlib.pyplot as plt
21 -from matplotlib.lines import Line2D
22 -from math import sqrt, ceil
23 -from multiprocessing import Pool, cpu_count, Manager, Value
24 -import multiprocessing
25 -import multiprocessing.pool
26 -import ast, time
27 -import pickle
28 -
29 -# ================== DEFINITION OF THE PATHS ==============================
30 -
31 -biorseoDir = path.realpath(".")
32 -jar3dexec = "/local/local/localopt/jar3d_2014-12-11.jar"
33 -bypdir = biorseoDir + "/BayesPairing/bayespairing/src"
34 -byp2dir = biorseoDir + "/BayesPairing2/bayespairing/src"
35 -moipdir = "/local/local/localopt/RNAMoIP/Src/RNAMoIP.py"
36 -biokopdir = "/local/local/localopt/biokop/biokop"
37 -runDir = path.dirname(path.realpath(__file__))
38 -bpRNAFile = argv[1]
39 -PseudobaseFile = argv[2]
40 -StudyCaseFile = argv[3]
41 -outputDir = biorseoDir + "/benchmark_results/"
42 -HLmotifDir = biorseoDir + "/data/modules/BGSU/HL/3.2/lib"
43 -ILmotifDir = biorseoDir + "/data/modules/BGSU/IL/3.2/lib"
44 -descfolder = biorseoDir + "/data/modules/DESC"
45 -rinfolder = biorseoDir + "/data/modules/RIN/Subfiles/"
46 -
47 -# Create some folders to store the results
48 -subprocess.call(["mkdir", "-p", outputDir])
49 -subprocess.call(["mkdir", "-p", outputDir + "PK/"])
50 -subprocess.call(["mkdir", "-p", outputDir + "noPK/"])
51 -
52 -n_launched = Value('i', 0)
53 -n_finished = Value('i', 0)
54 -n_skipped = Value('i', 0)
55 -ncores = cpu_count()
56 -
57 -# ================== CLASSES AND FUNCTIONS ================================
58 -
59 -class NoDaemonProcess(multiprocessing.Process):
60 - @property
61 - def daemon(self):
62 - return False
63 -
64 - @daemon.setter
65 - def daemon(self, value):
66 - pass
67 -
68 -
69 -class NoDaemonContext(type(multiprocessing.get_context())):
70 - Process = NoDaemonProcess
71 -
72 -# We sub-class multiprocessing.pool.Pool instead of multiprocessing.Pool
73 -# because the latter is only a wrapper function, not a proper class.
74 -class MyPool(multiprocessing.pool.Pool):
75 - def __init__(self, *args, **kwargs):
76 - kwargs['context'] = NoDaemonContext()
77 - super(MyPool, self).__init__(*args, **kwargs)
78 -
79 -
80 -class Job:
81 - def __init__(self, results, command=[], function=None, args=[], how_many_in_parallel=0, priority=1, timeout=None, checkFunc=None, checkArgs=[], label=""):
82 - self.cmd_ = command
83 - self.func_ = function
84 - self.args_ = args
85 - self.checkFunc_ = checkFunc
86 - self.checkArgs_ = checkArgs
87 - self.results_file = results
88 - self.priority_ = priority
89 - self.timeout_ = timeout
90 - self.comp_time = -1 # -1 is not executed yet
91 - self.label = label
92 - if not how_many_in_parallel:
93 - self.nthreads = ncores
94 - elif how_many_in_parallel == -1:
95 - self.nthreads = ncores - 1
96 - else:
97 - self.nthreads = how_many_in_parallel
98 - self.useless_bool = False
99 -
100 - def __str__(self):
101 - if self.func_ is None:
102 - s = f"{self.priority_}({self.nthreads}) [{self.comp_time}]\t{j.label:25}" + " ".join(self.cmd_)
103 - else:
104 - s = f"{self.priority_}({self.nthreads}) [{self.comp_time}]\t{j.label:25}{self.func_.__name__}(" + " ".join([str(a) for a in self.args_]) + ")"
105 - return s
106 -
107 -
108 -class Loop:
109 - def __init__(self, rna_name, header, subsequence, looptype, position):
110 - self.rna_name = rna_name
111 - self.header = header
112 - self.seq = subsequence
113 - self.type = looptype
114 - self.position = position
115 -
116 - def get_header(self):
117 - return self.header
118 -
119 - def subsequence(self):
120 - return self.seq
121 -
122 -
123 -class InsertionSite:
124 - def __init__(self, loop, csv_line):
125 - # BEWARE : jar3d csv output is crap because of java's locale settings.
126 - # On french OSes, it uses commas to delimit the fields AND as floating point delimiters !!
127 - # Parse with caution, and check what the csv output files look like on your system...
128 - info = csv_line.split(',')
129 - self.loop = loop # the Loop object that has been searched with jar3d
130 - # position of the loop's components, so the motif's ones, in the query sequence.
131 - self.position = loop.position
132 - # Motif model identifier of the RNA 3D Motif Atlas
133 - self.atlas_id = info[2]
134 - # alignment score of the subsequence to the motif model
135 - self.score = int(float(info[4]))
136 - # should the motif model be inverted to fit the sequence ?
137 - self.rotation = int(info[-2])
138 -
139 - def __lt__(self, other):
140 - return self.score < other.score
141 -
142 - def __gt__(self, other):
143 - return self.score > other.score
144 -
145 -
146 -class Method:
147 - def __init__(self, parent_rna, tool, data_source=None, placement_method=None, obj_func=None, PK=True, flat=False):
148 - self.parent_rna = parent_rna
149 -
150 - # defintion of the method (theoretical)
151 - self.tool = tool
152 - self.data_source = data_source
153 - self.placement_method = placement_method
154 - self.func = obj_func
155 - self.allow_pk = PK
156 - self.label = self.get_label()
157 -
158 - # things related to execution
159 - self.joblist = []
160 - self.flat = flat
161 - self.build_job_list()
162 -
163 - # descriptors of the results set:
164 - self.predictions = []
165 - self.scores = []
166 - self.ninsertions = []
167 - self.max_mcc = 0
168 - self.min_mcc = 0
169 - self.avg_mcc = 0
170 - self.max_f1 = 0
171 - self.min_f1 = 0
172 - self.avg_f1 = 0
173 - self.best_pred = ""
174 - self.n_pred = 0
175 - self.ratio = 0 # ratio of the number of inserted motifs in the best solution on the max number of inserted motifs for this RNA
176 -
177 - def get_label(self):
178 - if self.tool == "biorseo":
179 - if self.allow_pk:
180 - return f"{self.data_source}-{self.placement_method}-{self.func}"
181 - else:
182 - return f"{self.data_source}-{self.placement_method}-{self.func}-noPK"
183 - else:
184 - return self.tool
185 -
186 - def build_job_list(self):
187 - # PRIORITIES :
188 - # 1/ RNAsubopt
189 - # 2/ mv
190 - # 3/ Jar3D, BayesPairing, RNA-MoIP
191 - # 4/ BiORSEO
192 - # 5/ BiokoP
193 - basename = self.parent_rna.basename
194 - fasta = outputDir+basename+".fa"
195 -
196 - # Things that require RNAsubopt calculations
197 - if self.tool in ["RNAsubopt", "RNA-MoIP (1by1)", "RNA-MoIP (chunk)"] or self.placement_method == "Jar3d":
198 - if f"{basename} RNAsubopt" in issues:
199 - return
200 - self.joblist.append(Job(command=["RNAsubopt", "-i", fasta, "--outfile="+ basename + ".subopt"],
201 - priority=1, how_many_in_parallel=1 if self.flat else 0,
202 - results = outputDir + "noPK/" + basename + ".subopt",
203 - label=f"{basename} RNAsubopt"))
204 - self.joblist.append(Job(command=["mv", basename + ".subopt", outputDir + "noPK/"],
205 - priority=2, how_many_in_parallel=1 if self.flat else 0,
206 - results = outputDir + "noPK/" + basename + ".subopt",
207 - label=f"{basename} mv"))
208 -
209 - # Find modules using Jar3d or BayesPairing:
210 - if self.placement_method == "Jar3d":
211 - if f"{basename} BGSU-Jar3d" in issues:
212 - return
213 - self.joblist.append(Job(function=launch_JAR3D, args=[instance.seq_, basename],
214 - priority=3, how_many_in_parallel=1,
215 - results = outputDir + basename + ".bgsu_jar3d.csv",
216 - label=f"{basename} BGSU-Jar3d"))
217 -
218 - if self.placement_method == "ByP":
219 - if self.data_source == "DESC" :
220 - module_type_arg = "rna3dmotif"
221 - elif self.data_source == "RIN" :
222 - module_type_arg = "carnaval"
223 - else:
224 - module_type_arg = "3dmotifatlas"
225 -
226 - if module_type_arg != "carnaval":
227 - if f"{basename} {self.data_source}-ByP" in issues:
228 - return
229 - # self.joblist.append(Job(function=launch_BayesPairing2, args=[module_type_arg, instance.seq_, instance.header_, basename],
230 - # how_many_in_parallel=1 if self.flat else -1, priority=3,
231 - # results = outputDir + basename + f".{self.data_source.lower()}_byp2.csv",
232 - # label=f"{basename} {self.data_source}-ByP"))
233 - self.joblist.append(Job(function=launch_BayesPairing, args=[module_type_arg, instance.seq_, instance.header_, basename],
234 - how_many_in_parallel=1 if self.flat else -1, priority=3,
235 - results = outputDir + basename + f".{self.data_source.lower()}_byp.csv",
236 - label=f"{basename} {self.data_source}-ByP"))
237 -
238 - if f"{basename} {self.label}" in issues:
239 - return
240 -
241 - if self.tool == "biorseo":
242 - c = [ biorseoDir+"/bin/biorseo", "-s", fasta ]
243 - if self.placement_method == "D.P.":
244 - if self.data_source == "RIN" :
245 - results_file = outputDir+f"{'' if self.allow_pk else 'no'}PK/"+basename+f".biorseo_rin_raw_{self.func}"
246 - c += [ "-x", rinfolder]
247 - else:
248 - results_file = outputDir+f"{'' if self.allow_pk else 'no'}PK/"+basename+f".biorseo_desc_raw_{self.func}"
249 - c += [ "-d", descfolder]
250 - elif self.placement_method == "ByP":
251 - results_file = outputDir+f"{'' if self.allow_pk else 'no'}PK/"+basename+f".biorseo_{self.data_source.lower()}_{self.placement_method.lower()}_{self.func}"
252 - c += ["--bayespaircsv", outputDir+basename+f".{self.data_source.lower()}_{self.placement_method.lower()}.csv"]
253 - else:
254 - results_file = outputDir+f"{'' if self.allow_pk else 'no'}PK/"+basename+f".biorseo_{self.data_source.lower()}_{self.placement_method.lower()}_{self.func}"
255 - c += ["--jar3dcsv", outputDir+basename+f".{self.data_source.lower()}_{self.placement_method.lower()}.csv"]
256 - c += ["-o", results_file, "--func", self.func]
257 - if not self.allow_pk:
258 - c += ["-n"]
259 - self.joblist.append(Job(command=c, priority=4, timeout=3600,
260 - how_many_in_parallel=1 if self.flat else 10,
261 - results = results_file,
262 - label=f"{basename} {self.label}"))
263 -
264 - if self.tool == "RNA-MoIP (chunk)":
265 - self.joblist.append(Job(function=launch_RNAMoIP, args=[instance.seq_, instance.header_, basename, False],
266 - priority=3, how_many_in_parallel=1 if self.flat else 0,
267 - timeout=3600, results = outputDir + f"noPK/{basename}.moipc",
268 - label=f"{basename} {self.label}"))
269 -
270 - if self.tool == "RNA-MoIP (1by1)":
271 - self.joblist.append(Job(function=launch_RNAMoIP, args=[instance.seq_, instance.header_, basename, True],
272 - priority=3, how_many_in_parallel=1 if self.flat else 0,
273 - timeout=3600,
274 - results = outputDir + f"noPK/{basename}.moip",
275 - label=f"{basename} {self.label}"))
276 -
277 - if self.tool == "Biokop":
278 - self.joblist.append(Job(command=[biokopdir, "-n1", "-i", fasta, "-o", outputDir + f"PK/{basename}.biok"],
279 - priority=5, timeout=3600,
280 - how_many_in_parallel=1 if self.flat else 3,
281 - results = outputDir + f"PK/{basename}.biok",
282 - label=f"{basename} {self.label}"))
283 -
284 - def get_comp_times(self):
285 - s = ""
286 - for j in self.joblist:
287 - s += f"{j.comp_time:.1f} + "
288 - return s[:-3]
289 -
290 -
291 -class RNA:
292 - def __init__(self, filename, header, seq, struct):
293 - self.seq_ = seq
294 - self.header_ = header
295 - self.true2d = struct
296 - self.basename = filename
297 - self.methods = []
298 - self.meth_idx = {}
299 - # subprocess.call(["rm", "-f", outputDir + self.basename + "*"])
300 - # subprocess.call(["rm", "-f", outputDir + "PK/" + self.basename + "*"])
301 - # subprocess.call(["rm", "-f", outputDir + "noPK/" + self.basename + "*"])
302 -
303 - if not path.isfile(outputDir + self.basename + ".fa"):
304 - rna = open(outputDir + self.basename + ".fa", "w")
305 - rna.write(">"+self.header_+'\n')
306 - rna.write(self.seq_+'\n')
307 - rna.close()
308 -
309 - def add_method_evaluation(self, *args, **kwargs):
310 - new_m = Method(*args, **kwargs)
311 - self.meth_idx[new_m.label] = len(self.methods)
312 - self.methods.append(new_m)
313 -
314 - def evaluate(self, verbose=False):
315 - if verbose:
316 - print("{:<24}{}".format("True 2D", self.true2d))
317 -
318 - for m in self.methods:
319 - if len(m.predictions):
320 - mccs = []
321 - f1s = []
322 - m.n_pred = len(m.predictions)
323 -
324 - # List unique solutions
325 - sec_structs = []
326 - for p in m.predictions:
327 - if not ')' in p: # ignore flat solutions
328 - m.n_pred -= 1
329 - continue
330 - ss = p.split('\t')[0].split(' ')[0]
331 - if ss not in sec_structs:
332 - sec_structs.append(p.split('\t')[0])
333 - else:
334 - m.n_pred -= 1
335 - continue
336 - f1s.append(f1_score(*compare_two_structures(self.true2d, p)))
337 - mccs.append(mattews_corr_coeff(*compare_two_structures(self.true2d, p)))
338 -
339 - if len(mccs):
340 - m.max_mcc = max(mccs)
341 - m.min_mcc = min(mccs)
342 - m.avg_mcc = sum(mccs)/float(len(mccs))
343 - m.best_pred = sec_structs[mccs.index(m.max_mcc)]
344 - if len(f1s):
345 - m.max_f1 = max(f1s)
346 - m.min_f1 = min(f1s)
347 - m.avg_f1 = sum(f1s)/float(len(f1s))
348 -
349 - for p,n in zip(m.predictions, m.ninsertions):
350 - if not ')' in p: # ignore linear structures
351 - continue
352 - # if several structures have the max_MCC
353 - if m.max_mcc == mattews_corr_coeff(*compare_two_structures(self.true2d, p)):
354 - # if one of them has a higher ratio, update the ratio
355 - if max(m.ninsertions) > 0 and float(n)/max(m.ninsertions) > m.ratio:
356 - m.ratio = float(n)/max(m.ninsertions)
357 - m.best_pred = p
358 -
359 - if verbose:
360 - print(f"{m.label:<21}\t{m.best_pred}\t{m.max_mcc:.2f}\t{m.n_pred}\t{m.get_comp_times()}")
361 -
362 - def get_method(self, label):
363 - return self.methods[self.meth_idx[label]]
364 -
365 - def load_biokop_results(self):
366 - if path.isfile(outputDir + "PK/" + self.basename + ".biok"):
367 - rna = open(outputDir + "PK/" + self.basename + ".biok", "r")
368 - lines = rna.readlines()
369 - rna.close()
370 - for i in range(1, len(lines)-1):
371 - ss = lines[i].split(' ')[0]
372 - if ss not in self.get_method("Biokop").predictions:
373 - self.get_method("Biokop").predictions.append(ss)
374 -
375 - def load_RNAsubopt_results(self):
376 - if not path.isfile(outputDir + "noPK/" + self.basename + ".subopt"):
377 - return
378 - rna = open(outputDir + "noPK/" + self.basename + ".subopt", "r")
379 - lines = rna.readlines()
380 - rna.close()
381 - for i in range(2, len(lines)):
382 - ss = lines[i].split(' ')[0]
383 - if ss not in self.get_method("RNAsubopt").predictions:
384 - self.get_method("RNAsubopt").predictions.append(ss)
385 -
386 - def load_RNAMoIP_results(self):
387 - if not path.isfile(outputDir + "noPK/" + self.basename + ".moipc"):
388 - return
389 - rna = open(outputDir + "noPK/" + self.basename + ".moipc", "r")
390 - lines = rna.readlines()
391 - rna.close()
392 - method = self.get_method("RNA-MoIP (chunk)")
393 - for i in range(2, len(lines)):
394 - method.predictions.append(lines[i].split('\t')[0])
395 - method.ninsertions.append(int(lines[i].split('\t')[1]))
396 - method.scores.append(float(lines[i].split('\t')[2][:-1]))
397 - rna = open(outputDir + "noPK/" + self.basename + ".moip", "r")
398 - lines = rna.readlines()
399 - rna.close()
400 - method = self.get_method("RNA-MoIP (1by1)")
401 - for i in range(2, len(lines)):
402 - method.predictions.append(lines[i].split('\t')[0])
403 - method.ninsertions.append(int(lines[i].split('\t')[1]))
404 - method.scores.append(float(lines[i].split('\t')[2][:-1]))
405 -
406 - def load_biorseo_results(self, filename, method):
407 - if path.isfile(filename):
408 - rna = open(filename, "r")
409 - lines = rna.readlines()
410 - rna.close()
411 - for i in range(2, len(lines)):
412 - ss = lines[i].split(' ')[0].split('\t')[0]
413 - # if ss not in method.predictions:
414 - method.predictions.append(ss)
415 - method.ninsertions.append(lines[i].count('+'))
416 - # else:
417 - # print(filename, "not found !")
418 -
419 - def load_results(self, include_noPK=False):
420 - if "RNAsubopt" in self.meth_idx.keys():
421 - self.load_RNAsubopt_results()
422 - if "RNA-MoIP (1by1)" in self.meth_idx.keys():
423 - self.load_RNAMoIP_results()
424 - if "Biokop" in self.meth_idx.keys():
425 - self.load_biokop_results()
426 - if "DESC-D.P.-A" in self.meth_idx.keys():
427 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_desc_raw_A", self.get_method("DESC-D.P.-A"))
428 - if "DESC-D.P.-B" in self.meth_idx.keys():
429 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_desc_raw_B", self.get_method("DESC-D.P.-B"))
430 - if "DESC-ByP-A" in self.meth_idx.keys():
431 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_desc_byp_A", self.get_method("DESC-ByP-A"))
432 - if "DESC-ByP-B" in self.meth_idx.keys():
433 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_desc_byp_B", self.get_method("DESC-ByP-B"))
434 - if "DESC-ByP-C" in self.meth_idx.keys():
435 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_desc_byp_C", self.get_method("DESC-ByP-C"))
436 - if "DESC-ByP-D" in self.meth_idx.keys():
437 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_desc_byp_D", self.get_method("DESC-ByP-D"))
438 - if "BGSU-ByP-A" in self.meth_idx.keys():
439 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_bgsu_byp_A", self.get_method("BGSU-ByP-A"))
440 - if "BGSU-ByP-B" in self.meth_idx.keys():
441 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_bgsu_byp_B", self.get_method("BGSU-ByP-B"))
442 - if "BGSU-ByP-C" in self.meth_idx.keys():
443 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_bgsu_byp_C", self.get_method("BGSU-ByP-C"))
444 - if "BGSU-ByP-D" in self.meth_idx.keys():
445 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_bgsu_byp_D", self.get_method("BGSU-ByP-D"))
446 - if "BGSU-Jar3d-A" in self.meth_idx.keys():
447 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_bgsu_jar3d_A", self.get_method("BGSU-Jar3d-A"))
448 - if "BGSU-Jar3d-B" in self.meth_idx.keys():
449 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_bgsu_jar3d_B", self.get_method("BGSU-Jar3d-B"))
450 - if "BGSU-Jar3d-C" in self.meth_idx.keys():
451 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_bgsu_jar3d_C", self.get_method("BGSU-Jar3d-C"))
452 - if "BGSU-Jar3d-B" in self.meth_idx.keys():
453 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_bgsu_jar3d_D", self.get_method("BGSU-Jar3d-D"))
454 -
455 - if "RIN-D.P.-A" in self.meth_idx.keys():
456 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_rin_raw_A", self.get_method("RIN-D.P.-A"))
457 - if "RIN-D.P.-B" in self.meth_idx.keys():
458 - self.load_biorseo_results(outputDir + "PK/" + self.basename + ".biorseo_rin_raw_B", self.get_method("RIN-D.P.-B"))
459 -
460 - if include_noPK:
461 - if "DESC-D.P.-A-noPK" in self.meth_idx.keys():
462 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_desc_raw_A", self.get_method("DESC-D.P.-A-noPK"))
463 - if "DESC-D.P.-B-noPK" in self.meth_idx.keys():
464 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_desc_raw_B", self.get_method("DESC-D.P.-B-noPK"))
465 - if "DESC-ByP-A-noPK" in self.meth_idx.keys():
466 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_desc_byp_A", self.get_method("DESC-ByP-A-noPK"))
467 - if "DESC-ByP-B-noPK" in self.meth_idx.keys():
468 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_desc_byp_B", self.get_method("DESC-ByP-B-noPK"))
469 - if "DESC-ByP-C-noPK" in self.meth_idx.keys():
470 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_desc_byp_C", self.get_method("DESC-ByP-C-noPK"))
471 - if "DESC-ByP-D-noPK" in self.meth_idx.keys():
472 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_desc_byp_D", self.get_method("DESC-ByP-D-noPK"))
473 - if "BGSU-ByP-A-noPK" in self.meth_idx.keys():
474 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_byp_A", self.get_method("BGSU-ByP-A-noPK"))
475 - if "BGSU-ByP-B-noPK" in self.meth_idx.keys():
476 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_byp_B", self.get_method("BGSU-ByP-B-noPK"))
477 - if "BGSU-ByP-C-noPK" in self.meth_idx.keys():
478 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_byp_C", self.get_method("BGSU-ByP-C-noPK"))
479 - if "BGSU-ByP-D-noPK" in self.meth_idx.keys():
480 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_byp_D", self.get_method("BGSU-ByP-D-noPK"))
481 - if "BGSU-Jar3d-A-noPK" in self.meth_idx.keys():
482 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_jar3d_A", self.get_method("BGSU-Jar3d-A-noPK"))
483 - if "BGSU-Jar3d-B-noPK" in self.meth_idx.keys():
484 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_jar3d_B", self.get_method("BGSU-Jar3d-B-noPK"))
485 - if "BGSU-Jar3d-C-noPK" in self.meth_idx.keys():
486 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_jar3d_C", self.get_method("BGSU-Jar3d-C-noPK"))
487 - if "BGSU-Jar3d-B-noPK" in self.meth_idx.keys():
488 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_jar3d_D", self.get_method("BGSU-Jar3d-D-noPK"))
489 -
490 - if "RIN-D.P.-A-noPK" in self.meth_idx.keys():
491 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_rin_raw_A", self.get_method("RIN-D.P.-A-noPK"))
492 - if "RIN-D.P.-B-noPK" in self.meth_idx.keys():
493 - self.load_biorseo_results(outputDir + "noPK/" + self.basename + ".biorseo_rin_raw_B", self.get_method("RIN-D.P.-B-noPK"))
494 -
495 - def has_complete_results(self, with_PK):
496 - if not with_PK:
497 - if not path.isfile(outputDir + "noPK/" + self.basename + ".subopt"): return False
498 - if not path.isfile(outputDir + "noPK/" + self.basename + ".moip"): return False
499 - if not path.isfile(outputDir + "noPK/" + self.basename + ".moipc"): return False
500 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_desc_raw_A"): return False
501 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_desc_raw_B"): return False
502 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_desc_byp_A"): return False
503 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_desc_byp_B"): return False
504 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_desc_byp_C"): return False
505 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_desc_byp_D"): return False
506 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_byp_A"): return False
507 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_byp_B"): return False
508 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_byp_C"): return False
509 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_byp_D"): return False
510 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_jar3d_A"): return False
511 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_jar3d_B"): return False
512 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_jar3d_C"): return False
513 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_bgsu_jar3d_D"): return False
514 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_rin_raw_A"): return False
515 - if not path.isfile(outputDir + "noPK/" + self.basename + ".biorseo_rin_raw_B"): return False
516 -
517 - return True
518 - else:
519 - if not path.isfile(outputDir + "PK/" + self.basename + ".biok"): return False
520 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_desc_raw_A"): return False
521 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_desc_raw_B"): return False
522 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_desc_byp_A"): return False
523 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_desc_byp_B"): return False
524 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_desc_byp_C"): return False
525 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_desc_byp_D"): return False
526 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_bgsu_byp_A"): return False
527 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_bgsu_byp_B"): return False
528 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_bgsu_byp_C"): return False
529 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_bgsu_byp_D"): return False
530 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_bgsu_jar3d_A"): return False
531 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_bgsu_jar3d_B"): return False
532 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_bgsu_jar3d_C"): return False
533 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_bgsu_jar3d_D"): return False
534 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_rin_raw_A"): return False
535 - if not path.isfile(outputDir + "PK/" + self.basename + ".biorseo_rin_raw_B"): return False
536 -
537 - return True
538 -
539 -
540 -def init(arg1, arg2, arg3):
541 - global n_launched, n_finished, n_skipped
542 - n_launched = arg1
543 - n_finished = arg2
544 - n_skipped = arg3
545 -
546 -def execute_job(j):
547 - global n_launched, n_skipped, n_finished
548 -
549 - # Check if you really need to execute it
550 - if path.isfile(j.results_file) or ((j.checkFunc_ is not None) and j.checkFunc_(*j.checkArgs_)):
551 - # skip it
552 - with n_skipped.get_lock():
553 - n_skipped.value += 1
554 - n_finished.value += 1
555 - print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\tSkipping {j.label} (already finished)")
556 - return (0, 0)
557 -
558 -
559 - # Add the job to log file and run
560 - with n_launched.get_lock():
561 - n_launched.value += 1
562 - if len(j.cmd_):
563 - logfile = open(runDir + "/log_of_the_run.sh", 'a')
564 - logfile.write(" ".join(j.cmd_))
565 - logfile.write("\n")
566 - logfile.close()
567 - print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\t{j.label}")
568 - start_time = time.time()
569 - r = subprocess.run(j.cmd_, timeout=j.timeout_)
570 - end_time = time.time()
571 - if r.returncode != 0:
572 - if r.stderr is not None:
573 - print(r.stderr, flush=True)
574 - print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\tIssue faced with {j.label}, skipping it and adding it to known issues (if not known).")
575 - with n_launched.get_lock():
576 - n_launched.value -= 1
577 - with n_skipped.get_lock():
578 - n_skipped.value += 1
579 - if j.label not in issues:
580 - issues.add(j.label)
581 - with open("benchmark_results/known_issues.txt", "a") as iss:
582 - iss.write(j.label+"\n")
583 - elif j.func_ is not None:
584 - print(f"[{n_launched.value+n_skipped.value}/{jobcount}]\t{j.func_.__name__}({', '.join([str(a) for a in j.args_])})")
585 - start_time = time.time()
586 - r = j.func_(*j.args_)
587 - end_time = time.time()
588 -
589 - # Job is finished
590 - with n_finished.get_lock():
591 - n_finished.value += 1
592 - t = end_time - start_time
593 - return (t,r)
594 -
595 -def launch_JAR3D_worker(loop):
596 - # write motif to a file
597 - newpath = getcwd()+'/' + loop.rna_name + '/'+ loop.header[1:]
598 - if not path.exists(newpath):
599 - makedirs(newpath)
600 - chdir(newpath)
601 - filename = loop.header[1:]+".fasta"
602 - fasta = open(filename, 'w')
603 - fasta.write('>'+loop.get_header()+'\n'+loop.subsequence()+'\n')
604 - fasta.close()
605 -
606 - # Launch Jar3D on it
607 - if loop.type == 'h':
608 - cmd = ["java", "-jar", jar3dexec, filename, HLmotifDir+"/all.txt", loop.header[1:]+".HLloop.csv", loop.header[1:]+".HLseq.csv"]
609 - else:
610 - cmd = ["java", "-jar", jar3dexec, filename, ILmotifDir+"/all.txt", loop.header[1:]+".ILloop.csv", loop.header[1:]+".ILseq.csv"]
611 -
612 - logfile = open(runDir + "/log_of_the_run.sh", 'a')
613 - logfile.write(' '.join(cmd)+"\n")
614 - logfile.close()
615 - subprocess.run(cmd, stdout=subprocess.DEVNULL)
616 -
617 - # Retrieve results
618 - insertion_sites = []
619 - if loop.type == 'h':
620 - capstype = "HL"
621 - else:
622 - capstype = "IL"
623 - csv = open(loop.header[1:]+".%sseq.csv" % capstype, 'r')
624 - l = csv.readline()
625 - while l:
626 - if "true" in l:
627 - insertion_sites.append(InsertionSite(loop, l))
628 - l = csv.readline()
629 - csv.close()
630 -
631 - return insertion_sites
632 -
633 -def launch_JAR3D(seq_, basename):
634 - rnasubopt_preds = []
635 -
636 - # Extracting probable loops from RNA-subopt structures
637 - rna = open(outputDir + f"noPK/{basename}.subopt", "r")
638 - lines = rna.readlines()
639 - rna.close()
640 - for i in range(2, len(lines)):
641 - ss = lines[i].split(' ')[0]
642 - if ss not in rnasubopt_preds:
643 - rnasubopt_preds.append(ss)
644 - HLs = []
645 - ILs = []
646 - for ss in rnasubopt_preds:
647 - loop_candidates = enumerate_loops(ss)
648 - for loop_candidate in loop_candidates:
649 - if len(loop_candidate) == 1 and loop_candidate not in HLs:
650 - HLs.append(loop_candidate)
651 - if len(loop_candidate) == 2 and loop_candidate not in ILs:
652 - ILs.append(loop_candidate)
653 -
654 - # Retrieve subsequences corresponding to the possible loops
655 - loops = []
656 - for i, l in enumerate(HLs):
657 - loops.append(Loop(basename, ">HL%d" % (i+1), seq_[l[0][0]-1:l[0][1]], "h", l))
658 - for i, l in enumerate(ILs):
659 - loops.append(Loop(basename, ">IL%d" % (i+1), seq_[l[0][0]-1:l[0][1]]+'*'+seq_[l[1][0]-1:l[1][1]], "i", l))
660 -
661 - # Scanning loop subsequences against motif database
662 - if not path.exists(basename):
663 - makedirs(basename)
664 - p = Pool(processes=ncores)
665 - insertion_sites = [x for y in p.map(launch_JAR3D_worker, loops) for x in y]
666 - p.close()
667 - p.join()
668 - insertion_sites.sort(reverse=True)
669 - subprocess.call(["rm", "-r", basename])
670 -
671 - # Writing results to CSV file
672 - c = 0
673 - resultsfile = open(outputDir+basename+".bgsu_jar3d.csv", "w")
674 - resultsfile.write("Motif,Rotation,Score,Start1,End1,Start2,End2\n")
675 - for site in insertion_sites:
676 - if site.score > 10:
677 - c += 1
678 - string = "FOUND with score %d:\t\t possible insertion of motif " % site.score + site.atlas_id
679 - if site.rotation:
680 - string += " (reversed)"
681 - string += (" on " + site.loop.get_header() + " at positions")
682 - resultsfile.write(site.atlas_id+',' + str(bool(site.rotation))+",%d" % site.score+',')
683 - positions = [','.join([str(y) for y in x]) for x in site.position]
684 - if len(positions) == 1:
685 - positions.append("-,-")
686 - resultsfile.write(','.join(positions)+'\n')
687 - resultsfile.close()
688 -
689 -def launch_BayesPairing(module_type, seq_, header_, basename):
690 -
691 - cmd = ["python3", "-W", "ignore", "parse_sequences.py","-seq",outputDir + basename + ".fa", "-d", module_type, "-interm","1"]
692 -
693 - logfile = open(runDir + "/log_of_the_run.sh", 'a')
694 - logfile.write(" ".join(cmd))
695 - logfile.write("\n")
696 - logfile.close()
697 -
698 - chdir(bypdir)
699 - out = subprocess.check_output(cmd).decode('utf-8')
700 - BypLog = out.split('\n')
701 - idx = 0
702 - l = BypLog[idx]
703 - while l[:3] != "PUR":
704 - idx += 1
705 - l = BypLog[idx]
706 - insertion_sites = [ x for x in ast.literal_eval(l.split(":")[1][1:])]
707 - if module_type=="carnaval":
708 - rna = open(outputDir + basename + ".rin_byp.csv", "w")
709 - elif module_type=="rna3dmotif":
710 - rna = open(outputDir + basename + ".desc_byp.csv", "w")
711 - else:
712 - rna = open(outputDir + basename + ".bgsu_byp.csv", "w")
713 - rna.write("Motif,Score,Start1,End1,Start2,End2...\n")
714 - for i,module in enumerate(insertion_sites):
715 - if len(module):
716 - for (score, positions, sequence) in zip(*[iter(module)]*3):
717 - pos = []
718 - q = -2
719 - for p in positions:
720 - if p-q > 1:
721 - pos.append(q)
722 - pos.append(p)
723 - q = p
724 - pos.append(q)
725 - rna.write(module_type+str(i)+','+str(int(score)))
726 - for (p,q) in zip(*[iter(pos[1:])]*2):
727 - if q>p:
728 - rna.write(','+str(p)+','+str(q))
729 - rna.write('\n')
730 - rna.close()
731 -
732 -def launch_BayesPairing2(module_type, seq_, header_, basename):
733 -
734 - if module_type=="rna3dmotif":
735 - BP2_type = "rna3dmotif"
736 - else:
737 - BP2_type = "ALL"
738 -
739 - cmd = ["python3", "-W", "ignore", "parse_sequences.py", "-seq", outputDir+basename+".fa", "-samplesize", "1000", "-d", BP2_type, "-o", "output"]
740 -
741 - logfile = open(runDir + "/log_of_the_run.sh", 'a')
742 - logfile.write(" ".join(cmd))
743 - logfile.write("\n")
744 - logfile.close()
745 -
746 - chdir(byp2dir)
747 -
748 - #subprocess.run(cmd)
749 - subprocess.check_output(cmd)
750 - filename = "../output/output.pickle"
751 -
752 - objects = []
753 - with (open(filename, "rb")) as openfile:
754 - while True:
755 - try:
756 - objects.append(pickle.load(openfile))
757 - except EOFError:
758 - #print("EOFError while opening ../output/output.pickle for BP2")
759 - break
760 -
761 - if module_type=="rna3dmotif":
762 - rna = open(outputDir + basename + ".desc_byp2.csv", "w")
763 - else:
764 - rna = open(outputDir + basename + ".bgsu_byp2.csv", "w")
765 - rna.write("Motif,Score,Start1,End1,Start2,End2...\n")
766 -
767 - for i in objects[0][list(objects[0].keys())[0]][0]:
768 - for line in objects[0][list(objects[0].keys())[0]][0][i]:
769 - if abs(line[2]) <= 2.3 : #default treshold of BP2
770 - str_line = module_type + str(i) + "," + str(round(line[2],3))
771 - for pos in line[3]:
772 - str_line += "," + str(pos[0]) + "," + str(pos[-1])
773 -
774 - rna.write(str_line + "\n")
775 -
776 - rna.close()
777 -
778 -def launch_RNAMoIP_worker(c):
779 - # launch gurobi
780 - try:
781 - out = subprocess.check_output(c).decode("utf-8")
782 - except subprocess.CalledProcessError as e:
783 - print(e.output)
784 - exit()
785 - gurobiLog = out.split('\n')
786 -
787 - # parse output
788 - idx = 0
789 - l = gurobiLog[idx]
790 - solution = ""
791 - nsolutions = 0
792 - while l != "Corrected secondary structure:" and l != " NO SOLUTIONS!":
793 - if l[:19] == "Optimal solution nb:":
794 - nsolutions = int(l.split(' ')[-1])
795 - idx += 1
796 - l = gurobiLog[idx]
797 - if nsolutions > 1:
798 - print("WARNING: RNA-MoIP found several solutions !")
799 - if l == "Corrected secondary structure:":
800 - idx+=1
801 - solution = gurobiLog[idx][1:]
802 - idx += 1
803 - motifs = []
804 - while gurobiLog[idx].count('.'):
805 - motif = gurobiLog[idx].split('-')[1]
806 - if motif not in motifs:
807 - motifs.append(motif)
808 - idx += 1
809 - nmotifs = len(motifs)
810 - score = float(gurobiLog[-2][1:-1])
811 - else:
812 - solution = ""
813 - nmotifs = 0
814 - score = 0
815 -
816 - return solution, nmotifs, score
817 -
818 -def launch_RNAMoIP(seq_, header_, basename, one_by_one):
819 - RNAMoIP = moipdir
820 -
821 - # read RNAsubopt predictions
822 - rnasubopt_preds = []
823 - rna = open(outputDir + f"noPK/{basename}.subopt", "r")
824 - lines = rna.readlines()
825 - rna.close()
826 - for i in range(2, len(lines)):
827 - ss = lines[i].split(' ')[0]
828 - if ss not in rnasubopt_preds:
829 - rnasubopt_preds.append(ss)
830 - if one_by_one:
831 - logfile = open(runDir + "/log_of_the_run.sh", 'a')
832 - rna = open(outputDir + f"noPK/{basename}.moip", "w")
833 - rna.write(header_+'\n')
834 - rna.write(seq_+'\n')
835 - for ss in rnasubopt_preds:
836 - c = ["python2", RNAMoIP, "-s", f"{seq_}", "-ss", f"{ss}", "-d", descfolder]
837 - logfile.write(" ".join(c)+'\n')
838 - solution, nmotifs, score = launch_RNAMoIP_worker(c)
839 - rna.write("{}\t{}\t{}\n".format(solution, nmotifs, score))
840 - rna.close()
841 - logfile.close()
842 - else:
843 - subopts = open(f"{runDir}/{basename}.temp_subopt", "w")
844 - for ss in rnasubopt_preds:
845 - subopts.write(ss + '\n')
846 - subopts.close()
847 - c = ["python2", RNAMoIP, "-s", f"{seq_}", "-ss", f"{runDir}/{basename}.temp_subopt", "-d", descfolder]
848 -
849 - logfile = open(runDir + "/log_of_the_run.sh", 'a')
850 - logfile.write(" ".join(c))
851 - logfile.write("\n")
852 - logfile.close()
853 -
854 - solution, nmotifs, score = launch_RNAMoIP_worker(c)
855 - rna = open(outputDir + f"noPK/{basename}.moipc", "w")
856 - rna.write(header_+'\n')
857 - rna.write(seq_+'\n')
858 - rna.write(f"{solution}\t{nmotifs}\t{score}\n")
859 - rna.close()
860 - subprocess.call(["rm", f"{runDir}/{basename}.temp_subopt"])
861 -
862 -def mattews_corr_coeff(tp, tn, fp, fn):
863 - if (tp+fp == 0):
864 - print("We have an issue : no positives detected ! (linear structure)")
865 - return (tp*tn-fp*fn) / sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))
866 -
867 -def accuracy(tp, tn, fp, fn):
868 - return (tp+tn)/(tp+fp+tn+fn)
869 -
870 -def recall_sensitivity(tp, tn, fp, fn):
871 - return tp/(tp+fn)
872 -
873 -def specificity(tp, tn, fp, fn):
874 - return tn/(tn+fp)
875 -
876 -def precision_ppv(tp, tn, fp, fn):
877 - return tp/(tp+fp)
878 -
879 -def npv(tp, tn, fp, fn):
880 - return tn/(tn+fn)
881 -
882 -def f1_score(tp, tn, fp, fn):
883 - return 2*tp/(2*tp+fp+fn)
884 -
885 -def dbn_to_basepairs(structure):
886 - parenthesis = []
887 - brackets = []
888 - braces = []
889 - rafters = []
890 - basepairs = []
891 - As = []
892 - Bs = []
893 - try:
894 - for i, c in enumerate(structure):
895 - if c == '(':
896 - parenthesis.append(i)
897 - if c == '[':
898 - brackets.append(i)
899 - if c == '{':
900 - braces.append(i)
901 - if c == '<':
902 - rafters.append(i)
903 - if c == 'A':
904 - As.append(i)
905 - if c == 'B':
906 - Bs.append(i)
907 - if c == '.':
908 - continue
909 - if c == ')':
910 - basepairs.append((i, parenthesis.pop()))
911 - if c == ']':
912 - basepairs.append((i, brackets.pop()))
913 - if c == '}':
914 - basepairs.append((i, braces.pop()))
915 - if c == '>':
916 - basepairs.append((i, rafters.pop()))
917 - if c == 'a':
918 - basepairs.append((i, As.pop()))
919 - if c == 'b':
920 - basepairs.append((i, Bs.pop()))
921 - except IndexError: # pop from empty list
922 - print("Error in structure :", structure)
923 - exit(0)
924 - return basepairs
925 -
926 -def compare_two_structures(true2d, prediction):
927 - true_basepairs = dbn_to_basepairs(true2d)
928 - pred_basepairs = dbn_to_basepairs(prediction)
929 - tp = 0
930 - fp = 0
931 - tn = 0
932 - fn = 0
933 - for bp in true_basepairs:
934 - if bp in pred_basepairs:
935 - tp += 1
936 - else:
937 - fn += 1
938 - for bp in pred_basepairs:
939 - if bp not in true_basepairs:
940 - fp += 1
941 - tn = len(true2d) * (len(true2d) - 1) * 0.5 - fp - fn - tp
942 - return [tp, tn, fp, fn]
943 -
944 -def enumerate_loops(s):
945 - def resort(unclosedLoops):
946 - loops.insert(len(loops)-1-unclosedLoops, loops[-1])
947 - loops.pop(-1)
948 -
949 - opened = []
950 - openingStart = []
951 - closingStart = []
952 - loops = []
953 - loopsUnclosed = 0
954 - consecutiveOpenings = []
955 - if s[0] == '(':
956 - consecutiveOpenings.append(1)
957 - consecutiveClosings = 0
958 -
959 - lastclosed = -1
960 - previous = ''
961 - for i in range(len(s)):
962 -
963 - # If we arrive on an unpaired segment
964 - if s[i] == '.':
965 - if previous == '(':
966 - openingStart.append(i-1)
967 - if previous == ')':
968 - closingStart.append(i-1)
969 -
970 - # Opening basepair
971 - if s[i] == '(':
972 - if previous == '(':
973 - consecutiveOpenings[-1] += 1
974 - else:
975 - consecutiveOpenings.append(1)
976 - if previous == ')':
977 - closingStart.append(i-1)
978 -
979 - # We have something like (...(
980 - if len(openingStart) and openingStart[-1] == opened[-1]:
981 - # Create a new loop starting with this component.
982 - loops.append([(openingStart[-1], i)])
983 - openingStart.pop(-1)
984 - loopsUnclosed += 1
985 - # We have something like )...( or even )(
986 - if len(closingStart) and closingStart[-1] == lastclosed:
987 - # Append a component to existing multiloop
988 - loops[-1].append((closingStart[-1], i))
989 - closingStart.pop(-1)
990 -
991 - opened.append(i)
992 -
993 - # Closing basepair
994 - if s[i] == ')':
995 - if previous == ')':
996 - consecutiveClosings += 1
997 - else:
998 - consecutiveClosings = 1
999 - # This is not supposed to happen in real data, but whatever.
1000 - if previous == '(':
1001 - openingStart.append(i-1)
1002 -
1003 - # We have something like (...) or ()
1004 - if len(openingStart) and openingStart[-1] == opened[-1]:
1005 - # Create a new loop, and save it as already closed (HL)
1006 - loops.append([(openingStart[-1], i)])
1007 - openingStart.pop(-1)
1008 - resort(loopsUnclosed)
1009 - # We have something like )...)
1010 - if len(closingStart) and closingStart[-1] == lastclosed:
1011 - # Append a component to existing multiloop and close it.
1012 - loops[-1].append((closingStart[-1], i))
1013 - closingStart.pop(-1)
1014 - loopsUnclosed -= 1
1015 - resort(loopsUnclosed)
1016 -
1017 - if i+1 < len(s):
1018 - if s[i+1] != ')': # We are on something like: ).
1019 - # an openingStart has not been correctly detected, like in ...((((((...)))...)))
1020 - if consecutiveClosings < consecutiveOpenings[-1]:
1021 - # Create a new loop (uncompleted)
1022 - loops.append([(opened[-2], opened[-1])])
1023 - loopsUnclosed += 1
1024 -
1025 - # We just completed an HL+stem, like ...(((...))).., we can forget its info
1026 - if consecutiveClosings == consecutiveOpenings[-1]:
1027 - consecutiveClosings = 0
1028 - consecutiveOpenings.pop(-1)
1029 - else: # There are still several basepairs to remember, forget only the processed ones, keep the others
1030 - consecutiveOpenings[-1] -= consecutiveClosings
1031 - consecutiveClosings = 0
1032 -
1033 - else: # We are on something like: ))
1034 - # we are on an closingStart that cannot be correctly detected, like in ...(((...(((...))))))
1035 - if consecutiveClosings == consecutiveOpenings[-1]:
1036 - # Append a component to the uncomplete loop and close it.
1037 - loops[-1].append((i, i+1))
1038 - loopsUnclosed -= 1
1039 - resort(loopsUnclosed)
1040 - # Forget the info about the processed stem.
1041 - consecutiveClosings = 0
1042 - consecutiveOpenings.pop(-1)
1043 -
1044 - opened.pop(-1)
1045 - lastclosed = i
1046 -
1047 - previous = s[i]
1048 - # print(i,"=",s[i],"\t", "consec. Op=", consecutiveOpenings,"Cl=",consecutiveClosings)
1049 -
1050 - return(loops)
1051 -
1052 -ignored_nt_dict = {}
1053 -def is_canonical_nts(seq):
1054 - for c in seq[:-1]:
1055 - if c not in "ACGU":
1056 - if c in ignored_nt_dict.keys():
1057 - ignored_nt_dict[c] += 1
1058 - else:
1059 - ignored_nt_dict[c] = 1
1060 - return False
1061 - return True
1062 -
1063 -def is_canonical_bps(struct):
1064 - if "()" in struct:
1065 - return False
1066 - if "(.)" in struct:
1067 - return False
1068 - if "(..)" in struct:
1069 - return False
1070 - if "[]" in struct:
1071 - return False
1072 - if "[.]" in struct:
1073 - return False
1074 - if "[..]" in struct:
1075 - return False
1076 - return True
1077 -
1078 -def is_all(n, tot):
1079 - if n == tot:
1080 - return "\033[32m%d\033[0m/%d" % (n, tot)
1081 - else:
1082 - return "\033[91m%d\033[0m/%d" % (n, tot)
1083 -
1084 -def load_from_dbn(file, header_style=3):
1085 - container = []
1086 - counter = 0
1087 -
1088 - db = open(file, "r")
1089 - c = 0
1090 - header = ""
1091 - seq = ""
1092 - struct = ""
1093 - while True:
1094 - l = db.readline()
1095 - if l == "":
1096 - break
1097 - c += 1
1098 - c = c % 3
1099 - if c == 1:
1100 - header = l[:-1]
1101 - if c == 2:
1102 - seq = l[:-1].upper()
1103 - if c == 0:
1104 - struct = l[:-1]
1105 - n = len(seq)
1106 -
1107 - if n < 10 or n > 100:
1108 - continue # ignore too short and too long RNAs
1109 - if not '(' in struct:
1110 - continue # ignore linear structures
1111 - if is_canonical_nts(seq) and is_canonical_bps(struct):
1112 - # keeps what's inside brackets at the end as the filename
1113 - if header_style == 1: container.append(RNA(header.replace('/', '_').split('(')[-1][:-1], header, seq, struct))
1114 - # keeps what's inside square brackets at the end as the filename
1115 - if header_style == 2: container.append(RNA(header.replace('/', '_').split('[')[-1][:-41], header, seq, struct))
1116 - # keeps all the header as filename
1117 - if header_style == 3: container.append(RNA(header[1:], header, seq, struct))
1118 - if '[' in struct: counter += 1
1119 - db.close()
1120 - return container, counter
1121 -
1122 -def get_bpRNA_statistics(include_noPK=True):
1123 -
1124 - print("\nLoading bpRNA results from files...")
1125 -
1126 - # load results in objects
1127 - for instance in tqdm(bpRNAContainer, desc="bpRNA instances"):
1128 - instance.load_results(include_noPK=True)
1129 - instance.evaluate()
1130 -
1131 - RNAs_fully_predicted_noPK = [ x for x in bpRNAContainer if x.has_complete_results(with_PK=False) ]
1132 -
1133 - # Get max MCCs for each method without PK, and see who is complete
1134 - x_noPK = [
1135 - [ rna.get_method("RNAsubopt").max_mcc if rna.get_method("RNAsubopt").n_pred else print(rna.basename, "has no RNAsubopt structure (linear)") for rna in bpRNAContainer if rna.get_method("RNAsubopt").n_pred ],
1136 - [ rna.get_method("RNA-MoIP (1by1)").max_mcc if rna.get_method("RNA-MoIP (1by1)").n_pred else print(rna.basename, "has no RNA-MoIP (1by1)") for rna in bpRNAContainer if rna.get_method("RNA-MoIP (1by1)").n_pred ],
1137 - [ rna.get_method("RNA-MoIP (chunk)").max_mcc if rna.get_method("RNA-MoIP (chunk)").n_pred else print(rna.basename, "has no RNA-MoIP (chunk)") for rna in bpRNAContainer if rna.get_method("RNA-MoIP (chunk)").n_pred ],
1138 - [ rna.get_method("DESC-D.P.-A-noPK").max_mcc if rna.get_method("DESC-D.P.-A-noPK").n_pred else print(rna.basename, "has no DESC-D.P.-A-noPK") for rna in bpRNAContainer if rna.get_method("DESC-D.P.-A-noPK").n_pred ],
1139 - [ rna.get_method("DESC-D.P.-B-noPK").max_mcc if rna.get_method("DESC-D.P.-B-noPK").n_pred else print(rna.basename, "has no DESC-D.P.-B-noPK") for rna in bpRNAContainer if rna.get_method("DESC-D.P.-B-noPK").n_pred ],
1140 - [ rna.get_method("DESC-ByP-A-noPK").max_mcc if rna.get_method("DESC-ByP-A-noPK").n_pred else print(rna.basename, "has no DESC-ByP-A-noPK") for rna in bpRNAContainer if rna.get_method("DESC-ByP-A-noPK").n_pred ],
1141 - [ rna.get_method("DESC-ByP-B-noPK").max_mcc if rna.get_method("DESC-ByP-B-noPK").n_pred else print(rna.basename, "has no DESC-ByP-B-noPK") for rna in bpRNAContainer if rna.get_method("DESC-ByP-B-noPK").n_pred ],
1142 - [ rna.get_method("DESC-ByP-C-noPK").max_mcc if rna.get_method("DESC-ByP-C-noPK").n_pred else print(rna.basename, "has no DESC-ByP-C-noPK") for rna in bpRNAContainer if rna.get_method("DESC-ByP-C-noPK").n_pred ],
1143 - [ rna.get_method("DESC-ByP-D-noPK").max_mcc if rna.get_method("DESC-ByP-D-noPK").n_pred else print(rna.basename, "has no DESC-ByP-D-noPK") for rna in bpRNAContainer if rna.get_method("DESC-ByP-D-noPK").n_pred ],
1144 - [ rna.get_method("BGSU-Jar3d-A-noPK").max_mcc if rna.get_method("BGSU-Jar3d-A-noPK").n_pred else print(rna.basename, "has no BGSU-Jar3d-A-noPK") for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-A-noPK").n_pred ],
1145 - [ rna.get_method("BGSU-Jar3d-B-noPK").max_mcc if rna.get_method("BGSU-Jar3d-B-noPK").n_pred else print(rna.basename, "has no BGSU-Jar3d-B-noPK") for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-B-noPK").n_pred ],
1146 - [ rna.get_method("BGSU-Jar3d-C-noPK").max_mcc if rna.get_method("BGSU-Jar3d-C-noPK").n_pred else print(rna.basename, "has no BGSU-Jar3d-C-noPK") for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-C-noPK").n_pred ],
1147 - [ rna.get_method("BGSU-Jar3d-D-noPK").max_mcc if rna.get_method("BGSU-Jar3d-D-noPK").n_pred else print(rna.basename, "has no BGSU-Jar3d-D-noPK") for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-D-noPK").n_pred ],
1148 - [ rna.get_method("BGSU-ByP-A-noPK").max_mcc if rna.get_method("BGSU-ByP-A-noPK").n_pred else print(rna.basename, "has no BGSU-ByP-A-noPK") for rna in bpRNAContainer if rna.get_method("BGSU-ByP-A-noPK").n_pred ],
1149 - [ rna.get_method("BGSU-ByP-B-noPK").max_mcc if rna.get_method("BGSU-ByP-B-noPK").n_pred else print(rna.basename, "has no BGSU-ByP-B-noPK") for rna in bpRNAContainer if rna.get_method("BGSU-ByP-B-noPK").n_pred ],
1150 - [ rna.get_method("BGSU-ByP-C-noPK").max_mcc if rna.get_method("BGSU-ByP-C-noPK").n_pred else print(rna.basename, "has no BGSU-ByP-C-noPK") for rna in bpRNAContainer if rna.get_method("BGSU-ByP-C-noPK").n_pred ],
1151 - [ rna.get_method("BGSU-ByP-D-noPK").max_mcc if rna.get_method("BGSU-ByP-D-noPK").n_pred else print(rna.basename, "has no BGSU-ByP-D-noPK") for rna in bpRNAContainer if rna.get_method("BGSU-ByP-D-noPK").n_pred ],
1152 - [ rna.get_method("RIN-D.P.-A-noPK").max_mcc if rna.get_method("RIN-D.P.-A-noPK").n_pred else print(rna.basename, "has no RIN-D.P.-A-noPK") for rna in bpRNAContainer if rna.get_method("RIN-D.P.-A-noPK").n_pred ],
1153 - [ rna.get_method("RIN-D.P.-B-noPK").max_mcc if rna.get_method("RIN-D.P.-B-noPK").n_pred else print(rna.basename, "has no RIN-D.P.-B-noPK") for rna in bpRNAContainer if rna.get_method("RIN-D.P.-B-noPK").n_pred ],
1154 - ]
1155 -
1156 - x_noPK_fully = [
1157 - [ rna.get_method("RNAsubopt").max_mcc for rna in RNAs_fully_predicted_noPK ],
1158 - [ rna.get_method("RNA-MoIP (1by1)").max_mcc for rna in RNAs_fully_predicted_noPK ],
1159 - [ rna.get_method("RNA-MoIP (chunk)").max_mcc for rna in RNAs_fully_predicted_noPK ],
1160 - [ rna.get_method("DESC-D.P.-A-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1161 - [ rna.get_method("DESC-D.P.-B-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1162 - [ rna.get_method("DESC-ByP-A-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1163 - [ rna.get_method("DESC-ByP-B-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1164 - [ rna.get_method("DESC-ByP-C-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1165 - [ rna.get_method("DESC-ByP-D-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1166 - [ rna.get_method("BGSU-Jar3d-A-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1167 - [ rna.get_method("BGSU-Jar3d-B-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1168 - [ rna.get_method("BGSU-Jar3d-C-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1169 - [ rna.get_method("BGSU-Jar3d-D-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1170 - [ rna.get_method("BGSU-ByP-A-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1171 - [ rna.get_method("BGSU-ByP-B-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1172 - [ rna.get_method("BGSU-ByP-C-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1173 - [ rna.get_method("BGSU-ByP-D-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1174 - [ rna.get_method("RIN-D.P.-A-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1175 - [ rna.get_method("RIN-D.P.-B-noPK").max_mcc for rna in RNAs_fully_predicted_noPK ],
1176 - ] # We ensure having the same number of RNAs in every sample by discarding the ones for which computations did not ended/succeeded.
1177 -
1178 -
1179 - print()
1180 - print("Without PK:")
1181 - print("%s RNAsubopt predictions" % is_all(len(x_noPK[0]), bpRNA_tot))
1182 - print("%s RNA-MoIP 1 by 1 predictions" % is_all(len(x_noPK[1]), bpRNA_tot))
1183 - print("%s RNA-MoIP chunk predictions" % is_all(len(x_noPK[2]), bpRNA_tot))
1184 - print("%s biorseo + DESC + Patternmatch + f1A predictions" % is_all(len(x_noPK[3]), bpRNA_tot))
1185 - print("%s biorseo + DESC + Patternmatch + f1B predictions" % is_all(len(x_noPK[4]), bpRNA_tot))
1186 - print("%s biorseo + DESC + BayesPairing + f1A predictions" % is_all(len(x_noPK[5]), bpRNA_tot))
1187 - print("%s biorseo + DESC + BayesPairing + f1B predictions" % is_all(len(x_noPK[6]), bpRNA_tot))
1188 - print("%s biorseo + DESC + BayesPairing + f1C predictions" % is_all(len(x_noPK[7]), bpRNA_tot))
1189 - print("%s biorseo + DESC + BayesPairing + f1D predictions" % is_all(len(x_noPK[8]), bpRNA_tot))
1190 - print("%s biorseo + BGSU + JAR3D + f1A predictions" % is_all(len(x_noPK[9]), bpRNA_tot))
1191 - print("%s biorseo + BGSU + JAR3D + f1B predictions" % is_all(len(x_noPK[10]), bpRNA_tot))
1192 - print("%s biorseo + BGSU + JAR3D + f1C predictions" % is_all(len(x_noPK[11]), bpRNA_tot))
1193 - print("%s biorseo + BGSU + JAR3D + f1D predictions" % is_all(len(x_noPK[12]), bpRNA_tot))
1194 - print("%s biorseo + BGSU + BayesPairing + f1A predictions" % is_all(len(x_noPK[13]), bpRNA_tot))
1195 - print("%s biorseo + BGSU + BayesPairing + f1B predictions" % is_all(len(x_noPK[14]), bpRNA_tot))
1196 - print("%s biorseo + BGSU + BayesPairing + f1C predictions" % is_all(len(x_noPK[15]), bpRNA_tot))
1197 - print("%s biorseo + BGSU + BayesPairing + f1D predictions" % is_all(len(x_noPK[16]), bpRNA_tot))
1198 - print("%s biorseo + RIN + Patternmatch + f1A predictions" % is_all(len(x_noPK[17]), bpRNA_tot))
1199 - print("%s biorseo + RIN + Patternmatch + f1B predictions" % is_all(len(x_noPK[18]), bpRNA_tot))
1200 -
1201 - print("==> %s ARN were predicted with all methods successful." % is_all(len(x_noPK_fully[0]), bpRNA_tot) )
1202 -
1203 - # Stat tests
1204 - # Search if all methods are equal in positions with Friedman test:
1205 - test = stats.friedmanchisquare(*x_noPK_fully)
1206 - print("Friedman test without PK: H0 = 'The position parameter of all distributions is equal', p-value = ", test.pvalue)
1207 - # ==> No they are not, but none does better, no need to test one further.
1208 - test = stats.wilcoxon(x_noPK_fully[2], x_noPK_fully[3])
1209 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of RNA-MoIP and RawA are equal', p-value = ", test.pvalue)
1210 - test = stats.wilcoxon(x_noPK_fully[2], x_noPK_fully[4])
1211 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of RNA-MoIP and RawB are equal', p-value = ", test.pvalue)
1212 -
1213 -
1214 - RNAs_fully_predicted_PK = [ x for x in bpRNAContainer if x.has_complete_results(with_PK=True) ]
1215 -
1216 - # Get max MCCs for each method with PK, and see who is complete
1217 - x_PK = [
1218 - [ rna.get_method("Biokop").max_mcc if rna.get_method("Biokop").n_pred else print(rna.basename, "has no Biokop") for rna in bpRNAContainer if rna.get_method("Biokop").n_pred ],
1219 - [ rna.get_method("DESC-D.P.-A").max_mcc if rna.get_method("DESC-D.P.-A").n_pred else print(rna.basename, "has no DESC-D.P.-A") for rna in bpRNAContainer if rna.get_method("DESC-D.P.-A").n_pred ],
1220 - [ rna.get_method("DESC-D.P.-B").max_mcc if rna.get_method("DESC-D.P.-B").n_pred else print(rna.basename, "has no DESC-D.P.-B") for rna in bpRNAContainer if rna.get_method("DESC-D.P.-B").n_pred ],
1221 - [ rna.get_method("DESC-ByP-A").max_mcc if rna.get_method("DESC-ByP-A").n_pred else print(rna.basename, "has no DESC-ByP-A") for rna in bpRNAContainer if rna.get_method("DESC-ByP-A").n_pred ],
1222 - [ rna.get_method("DESC-ByP-B").max_mcc if rna.get_method("DESC-ByP-B").n_pred else print(rna.basename, "has no DESC-ByP-B") for rna in bpRNAContainer if rna.get_method("DESC-ByP-B").n_pred ],
1223 - [ rna.get_method("DESC-ByP-C").max_mcc if rna.get_method("DESC-ByP-C").n_pred else print(rna.basename, "has no DESC-ByP-C") for rna in bpRNAContainer if rna.get_method("DESC-ByP-C").n_pred ],
1224 - [ rna.get_method("DESC-ByP-D").max_mcc if rna.get_method("DESC-ByP-D").n_pred else print(rna.basename, "has no DESC-ByP-D") for rna in bpRNAContainer if rna.get_method("DESC-ByP-D").n_pred ],
1225 - [ rna.get_method("BGSU-Jar3d-A").max_mcc if rna.get_method("BGSU-Jar3d-A").n_pred else print(rna.basename, "has no BGSU-Jar3d-A") for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-A").n_pred ],
1226 - [ rna.get_method("BGSU-Jar3d-B").max_mcc if rna.get_method("BGSU-Jar3d-B").n_pred else print(rna.basename, "has no BGSU-Jar3d-B") for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-B").n_pred ],
1227 - [ rna.get_method("BGSU-Jar3d-C").max_mcc if rna.get_method("BGSU-Jar3d-C").n_pred else print(rna.basename, "has no BGSU-Jar3d-C") for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-C").n_pred ],
1228 - [ rna.get_method("BGSU-Jar3d-D").max_mcc if rna.get_method("BGSU-Jar3d-D").n_pred else print(rna.basename, "has no BGSU-Jar3d-D") for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-D").n_pred ],
1229 - [ rna.get_method("BGSU-ByP-A").max_mcc if rna.get_method("BGSU-ByP-A").n_pred else print(rna.basename, "has no BGSU-ByP-A") for rna in bpRNAContainer if rna.get_method("BGSU-ByP-A").n_pred ],
1230 - [ rna.get_method("BGSU-ByP-B").max_mcc if rna.get_method("BGSU-ByP-B").n_pred else print(rna.basename, "has no BGSU-ByP-B") for rna in bpRNAContainer if rna.get_method("BGSU-ByP-B").n_pred ],
1231 - [ rna.get_method("BGSU-ByP-C").max_mcc if rna.get_method("BGSU-ByP-C").n_pred else print(rna.basename, "has no BGSU-ByP-C") for rna in bpRNAContainer if rna.get_method("BGSU-ByP-C").n_pred ],
1232 - [ rna.get_method("BGSU-ByP-D").max_mcc if rna.get_method("BGSU-ByP-D").n_pred else print(rna.basename, "has no BGSU-ByP-D") for rna in bpRNAContainer if rna.get_method("BGSU-ByP-D").n_pred ],
1233 - [ rna.get_method("RIN-D.P.-A").max_mcc if rna.get_method("RIN-D.P.-A").n_pred else print(rna.basename, "has no RIN-D.P.-A") for rna in bpRNAContainer if rna.get_method("RIN-D.P.-A").n_pred ],
1234 - [ rna.get_method("RIN-D.P.-B").max_mcc if rna.get_method("RIN-D.P.-B").n_pred else print(rna.basename, "has no RIN-D.P.-B") for rna in bpRNAContainer if rna.get_method("RIN-D.P.-B").n_pred ],
1235 - ]
1236 -
1237 - # We ensure having the same number of RNAs in every sample by discarding the one for which computations did not ended/succeeded.
1238 - x_PK_fully = [
1239 - [ rna.get_method("Biokop").max_mcc for rna in RNAs_fully_predicted_PK ],
1240 - [ rna.get_method("DESC-D.P.-A").max_mcc for rna in RNAs_fully_predicted_PK ],
1241 - [ rna.get_method("DESC-D.P.-B").max_mcc for rna in RNAs_fully_predicted_PK ],
1242 - [ rna.get_method("DESC-ByP-A").max_mcc for rna in RNAs_fully_predicted_PK ],
1243 - [ rna.get_method("DESC-ByP-B").max_mcc for rna in RNAs_fully_predicted_PK ],
1244 - [ rna.get_method("DESC-ByP-C").max_mcc for rna in RNAs_fully_predicted_PK ],
1245 - [ rna.get_method("DESC-ByP-D").max_mcc for rna in RNAs_fully_predicted_PK ],
1246 - [ rna.get_method("BGSU-Jar3d-A").max_mcc for rna in RNAs_fully_predicted_PK ],
1247 - [ rna.get_method("BGSU-Jar3d-B").max_mcc for rna in RNAs_fully_predicted_PK ],
1248 - [ rna.get_method("BGSU-Jar3d-C").max_mcc for rna in RNAs_fully_predicted_PK ],
1249 - [ rna.get_method("BGSU-Jar3d-D").max_mcc for rna in RNAs_fully_predicted_PK ],
1250 - [ rna.get_method("BGSU-ByP-A").max_mcc for rna in RNAs_fully_predicted_PK ],
1251 - [ rna.get_method("BGSU-ByP-B").max_mcc for rna in RNAs_fully_predicted_PK ],
1252 - [ rna.get_method("BGSU-ByP-C").max_mcc for rna in RNAs_fully_predicted_PK ],
1253 - [ rna.get_method("BGSU-ByP-D").max_mcc for rna in RNAs_fully_predicted_PK ],
1254 - [ rna.get_method("RIN-D.P.-A").max_mcc for rna in RNAs_fully_predicted_PK ],
1255 - [ rna.get_method("RIN-D.P.-B").max_mcc for rna in RNAs_fully_predicted_PK ],
1256 - ]
1257 -
1258 -
1259 -
1260 - print()
1261 - print("With PK:")
1262 - print("%s Biokop predictions" % is_all(len(x_PK[0]), bpRNA_tot))
1263 - print("%s biorseo + DESC + Patternmatch + f1A predictions" % is_all(len(x_PK[1]), bpRNA_tot))
1264 - print("%s biorseo + DESC + Patternmatch + f1B predictions" % is_all(len(x_PK[2]), bpRNA_tot))
1265 - print("%s biorseo + DESC + BayesPairing + f1A predictions" % is_all(len(x_PK[3]), bpRNA_tot))
1266 - print("%s biorseo + DESC + BayesPairing + f1B predictions" % is_all(len(x_PK[4]), bpRNA_tot))
1267 - print("%s biorseo + DESC + BayesPairing + f1C predictions" % is_all(len(x_PK[5]), bpRNA_tot))
1268 - print("%s biorseo + DESC + BayesPairing + f1D predictions" % is_all(len(x_PK[6]), bpRNA_tot))
1269 - print("%s biorseo + BGSU + JAR3D + f1A predictions" % is_all(len(x_PK[7]), bpRNA_tot))
1270 - print("%s biorseo + BGSU + JAR3D + f1B predictions" % is_all(len(x_PK[8]), bpRNA_tot))
1271 - print("%s biorseo + BGSU + JAR3D + f1C predictions" % is_all(len(x_PK[9]), bpRNA_tot))
1272 - print("%s biorseo + BGSU + JAR3D + f1D predictions" % is_all(len(x_PK[10]), bpRNA_tot))
1273 - print("%s biorseo + BGSU + BayesPairing + f1A predictions" % is_all(len(x_PK[11]), bpRNA_tot))
1274 - print("%s biorseo + BGSU + BayesPairing + f1B predictions" % is_all(len(x_PK[12]), bpRNA_tot))
1275 - print("%s biorseo + BGSU + BayesPairing + f1C predictions" % is_all(len(x_PK[13]), bpRNA_tot))
1276 - print("%s biorseo + BGSU + BayesPairing + f1D predictions" % is_all(len(x_PK[14]), bpRNA_tot))
1277 - print("%s biorseo + RIN + Patternmatch + f1A predictions" % is_all(len(x_PK[15]), bpRNA_tot))
1278 - print("%s biorseo + RIN + Patternmatch + f1B predictions" % is_all(len(x_PK[16]), bpRNA_tot))
1279 -
1280 - print("==> %s ARN were predicted with all methods successful." % is_all(len(x_PK_fully[0]), bpRNA_tot) )
1281 -
1282 - # stat tests
1283 - # First, search if all methods are equal in positions with Friedman test:
1284 - test = stats.friedmanchisquare(*x_PK_fully)
1285 - print("Friedman test with PK: H0 = 'The position parameter of all distributions is equal', p-value = ", test.pvalue)
1286 - # it looks like some methods do better. Let's test the difference:
1287 - test = stats.wilcoxon(x_PK_fully[0], x_PK_fully[1])
1288 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and RawA are equal', p-value = ", test.pvalue)
1289 - test = stats.wilcoxon(x_PK_fully[0], x_PK_fully[2])
1290 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and RawB are equal', p-value = ", test.pvalue)
1291 - test = stats.wilcoxon(x_PK_fully[0], x_PK_fully[7])
1292 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and Jar3dA are equal', p-value = ", test.pvalue)
1293 - test = stats.wilcoxon(x_PK_fully[0], x_PK_fully[8])
1294 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and Jar3dB are equal', p-value = ", test.pvalue)
1295 - test = stats.wilcoxon(x_PK_fully[0], x_PK_fully[9])
1296 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and Jar3dC are equal', p-value = ", test.pvalue)
1297 - test = stats.wilcoxon(x_PK_fully[0], x_PK_fully[10])
1298 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and Jar3dD are equal', p-value = ", test.pvalue)
1299 -
1300 - n = [
1301 - [ rna.get_method("Biokop").n_pred for rna in bpRNAContainer if rna.get_method("Biokop").n_pred ],
1302 - [ rna.get_method("RNAsubopt").n_pred for rna in bpRNAContainer if rna.get_method("RNAsubopt").n_pred ],
1303 - [ rna.get_method("RNA-MoIP (1by1)").n_pred for rna in bpRNAContainer if rna.get_method("RNA-MoIP (1by1)").n_pred ],
1304 - [ rna.get_method("RNA-MoIP (chunk)").n_pred for rna in bpRNAContainer if rna.get_method("RNA-MoIP (chunk)").n_pred ],
1305 - [ rna.get_method("DESC-D.P.-A").n_pred for rna in bpRNAContainer if rna.get_method("DESC-D.P.-A").n_pred ],
1306 - [ rna.get_method("DESC-D.P.-B").n_pred for rna in bpRNAContainer if rna.get_method("DESC-D.P.-B").n_pred ],
1307 - [ rna.get_method("DESC-ByP-A").n_pred for rna in bpRNAContainer if rna.get_method("DESC-ByP-A").n_pred ],
1308 - [ rna.get_method("DESC-ByP-B").n_pred for rna in bpRNAContainer if rna.get_method("DESC-ByP-B").n_pred ],
1309 - [ rna.get_method("DESC-ByP-C").n_pred for rna in bpRNAContainer if rna.get_method("DESC-ByP-C").n_pred ],
1310 - [ rna.get_method("DESC-ByP-D").n_pred for rna in bpRNAContainer if rna.get_method("DESC-ByP-D").n_pred ],
1311 - [ rna.get_method("BGSU-Jar3d-A").n_pred for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-A").n_pred ],
1312 - [ rna.get_method("BGSU-Jar3d-B").n_pred for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-B").n_pred ],
1313 - [ rna.get_method("BGSU-Jar3d-C").n_pred for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-C").n_pred ],
1314 - [ rna.get_method("BGSU-Jar3d-D").n_pred for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-D").n_pred ],
1315 - [ rna.get_method("BGSU-ByP-A").n_pred for rna in bpRNAContainer if rna.get_method("BGSU-ByP-A").n_pred ],
1316 - [ rna.get_method("BGSU-ByP-B").n_pred for rna in bpRNAContainer if rna.get_method("BGSU-ByP-B").n_pred ],
1317 - [ rna.get_method("BGSU-ByP-C").n_pred for rna in bpRNAContainer if rna.get_method("BGSU-ByP-C").n_pred ],
1318 - [ rna.get_method("BGSU-ByP-D").n_pred for rna in bpRNAContainer if rna.get_method("BGSU-ByP-D").n_pred ],
1319 - [ rna.get_method("RIN-D.P.-A").n_pred for rna in bpRNAContainer if rna.get_method("RIN-D.P.-A").n_pred ],
1320 - [ rna.get_method("RIN-D.P.-B").n_pred for rna in bpRNAContainer if rna.get_method("RIN-D.P.-B").n_pred ],
1321 - ]
1322 -
1323 - r = [
1324 - [ rna.get_method("RNA-MoIP (1by1)").ratio for rna in bpRNAContainer if rna.get_method("RNA-MoIP (1by1)").n_pred > 1 ],
1325 - [ rna.get_method("DESC-D.P.-A").ratio for rna in bpRNAContainer if rna.get_method("DESC-D.P.-A").n_pred > 1 ],
1326 - [ rna.get_method("DESC-D.P.-B").ratio for rna in bpRNAContainer if rna.get_method("DESC-D.P.-B").n_pred > 1 ],
1327 - [ rna.get_method("DESC-ByP-A").ratio for rna in bpRNAContainer if rna.get_method("DESC-ByP-A").n_pred > 1 ],
1328 - [ rna.get_method("DESC-ByP-B").ratio for rna in bpRNAContainer if rna.get_method("DESC-ByP-B").n_pred > 1 ],
1329 - [ rna.get_method("DESC-ByP-C").ratio for rna in bpRNAContainer if rna.get_method("DESC-ByP-C").n_pred > 1 ],
1330 - [ rna.get_method("DESC-ByP-D").ratio for rna in bpRNAContainer if rna.get_method("DESC-ByP-D").n_pred > 1 ],
1331 - [ rna.get_method("BGSU-Jar3d-A").ratio for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-A").n_pred > 1 ],
1332 - [ rna.get_method("BGSU-Jar3d-B").ratio for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-B").n_pred > 1 ],
1333 - [ rna.get_method("BGSU-Jar3d-C").ratio for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-C").n_pred > 1 ],
1334 - [ rna.get_method("BGSU-Jar3d-D").ratio for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-D").n_pred > 1 ],
1335 - [ rna.get_method("BGSU-ByP-A").ratio for rna in bpRNAContainer if rna.get_method("BGSU-ByP-A").n_pred > 1 ],
1336 - [ rna.get_method("BGSU-ByP-B").ratio for rna in bpRNAContainer if rna.get_method("BGSU-ByP-B").n_pred > 1 ],
1337 - [ rna.get_method("BGSU-ByP-C").ratio for rna in bpRNAContainer if rna.get_method("BGSU-ByP-C").n_pred > 1 ],
1338 - [ rna.get_method("BGSU-ByP-D").ratio for rna in bpRNAContainer if rna.get_method("BGSU-ByP-D").n_pred > 1 ],
1339 - [ rna.get_method("RIN-D.P.-A").ratio for rna in bpRNAContainer if rna.get_method("RIN-D.P.-A").n_pred > 1 ],
1340 - [ rna.get_method("RIN-D.P.-B").ratio for rna in bpRNAContainer if rna.get_method("RIN-D.P.-B").n_pred > 1 ],
1341 - ]
1342 -
1343 - max_i = [
1344 - [ max(rna.get_method("RNA-MoIP (1by1)").ninsertions) for rna in bpRNAContainer if rna.get_method("RNA-MoIP (1by1)").n_pred ],
1345 - [ max(rna.get_method("RNA-MoIP (chunk)").ninsertions) for rna in bpRNAContainer if rna.get_method("RNA-MoIP (chunk)").n_pred ],
1346 - [ max(rna.get_method("DESC-D.P.-A").ninsertions) for rna in bpRNAContainer if rna.get_method("DESC-D.P.-A").n_pred ],
1347 - [ max(rna.get_method("DESC-D.P.-B").ninsertions) for rna in bpRNAContainer if rna.get_method("DESC-D.P.-B").n_pred ],
1348 - [ max(rna.get_method("DESC-ByP-A").ninsertions) for rna in bpRNAContainer if rna.get_method("DESC-ByP-A").n_pred ],
1349 - [ max(rna.get_method("DESC-ByP-B").ninsertions) for rna in bpRNAContainer if rna.get_method("DESC-ByP-B").n_pred ],
1350 - [ max(rna.get_method("DESC-ByP-C").ninsertions) for rna in bpRNAContainer if rna.get_method("DESC-ByP-C").n_pred ],
1351 - [ max(rna.get_method("DESC-ByP-D").ninsertions) for rna in bpRNAContainer if rna.get_method("DESC-ByP-D").n_pred ],
1352 - [ max(rna.get_method("BGSU-Jar3d-A").ninsertions) for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-A").n_pred ],
1353 - [ max(rna.get_method("BGSU-Jar3d-B").ninsertions) for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-B").n_pred ],
1354 - [ max(rna.get_method("BGSU-Jar3d-C").ninsertions) for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-C").n_pred ],
1355 - [ max(rna.get_method("BGSU-Jar3d-D").ninsertions) for rna in bpRNAContainer if rna.get_method("BGSU-Jar3d-D").n_pred ],
1356 - [ max(rna.get_method("BGSU-ByP-A").ninsertions) for rna in bpRNAContainer if rna.get_method("BGSU-ByP-A").n_pred ],
1357 - [ max(rna.get_method("BGSU-ByP-B").ninsertions) for rna in bpRNAContainer if rna.get_method("BGSU-ByP-B").n_pred ],
1358 - [ max(rna.get_method("BGSU-ByP-C").ninsertions) for rna in bpRNAContainer if rna.get_method("BGSU-ByP-C").n_pred ],
1359 - [ max(rna.get_method("BGSU-ByP-D").ninsertions) for rna in bpRNAContainer if rna.get_method("BGSU-ByP-D").n_pred ],
1360 - [ max(rna.get_method("RIN-D.P.-A").ninsertions) for rna in bpRNAContainer if rna.get_method("RIN-D.P.-A").n_pred ],
1361 - [ max(rna.get_method("RIN-D.P.-B").ninsertions) for rna in bpRNAContainer if rna.get_method("RIN-D.P.-B").n_pred ],
1362 - ]
1363 -
1364 - return x_noPK_fully, x_PK_fully, n, r, max_i
1365 -
1366 -def get_Pseudobase_statistics():
1367 -
1368 - # load results in objects
1369 - print("\nLoading Pseudobase results from files...")
1370 - for instance in tqdm(PseudobaseContainer, desc="Pseudobase instances"):
1371 - instance.load_results()
1372 - instance.evaluate()
1373 -
1374 - RNAs_fully_predicted_Pseudobase = [ x for x in PseudobaseContainer if x.has_complete_results(with_PK=True)]
1375 -
1376 - x_pseudobase = [
1377 - [ rna.get_method("Biokop").max_mcc if rna.get_method("Biokop").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("Biokop").n_pred ],
1378 - [ rna.get_method("RNAsubopt").max_mcc if rna.get_method("RNAsubopt").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("RNAsubopt").n_pred ],
1379 - [ rna.get_method("RNA-MoIP (1by1)").max_mcc if rna.get_method("RNA-MoIP (1by1)").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("RNA-MoIP (1by1)").n_pred ],
1380 - [ rna.get_method("RNA-MoIP (chunk)").max_mcc if rna.get_method("RNA-MoIP (chunk)").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("RNA-MoIP (chunk)").n_pred ],
1381 - [ rna.get_method("DESC-D.P.-A").max_mcc if rna.get_method("DESC-D.P.-A").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("DESC-D.P.-A").n_pred ],
1382 - [ rna.get_method("DESC-D.P.-B").max_mcc if rna.get_method("DESC-D.P.-B").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("DESC-D.P.-B").n_pred ],
1383 - [ rna.get_method("DESC-ByP-A").max_mcc if rna.get_method("DESC-ByP-A").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("DESC-ByP-A").n_pred ],
1384 - [ rna.get_method("DESC-ByP-B").max_mcc if rna.get_method("DESC-ByP-B").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("DESC-ByP-B").n_pred ],
1385 - [ rna.get_method("DESC-ByP-C").max_mcc if rna.get_method("DESC-ByP-C").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("DESC-ByP-C").n_pred ],
1386 - [ rna.get_method("DESC-ByP-D").max_mcc if rna.get_method("DESC-ByP-D").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("DESC-ByP-D").n_pred ],
1387 - [ rna.get_method("BGSU-Jar3d-A").max_mcc if rna.get_method("BGSU-Jar3d-A").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("BGSU-Jar3d-A").n_pred ],
1388 - [ rna.get_method("BGSU-Jar3d-B").max_mcc if rna.get_method("BGSU-Jar3d-B").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("BGSU-Jar3d-B").n_pred ],
1389 - [ rna.get_method("BGSU-Jar3d-C").max_mcc if rna.get_method("BGSU-Jar3d-C").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("BGSU-Jar3d-C").n_pred ],
1390 - [ rna.get_method("BGSU-Jar3d-D").max_mcc if rna.get_method("BGSU-Jar3d-D").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("BGSU-Jar3d-D").n_pred ],
1391 - [ rna.get_method("BGSU-ByP-A").max_mcc if rna.get_method("BGSU-ByP-A").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("BGSU-ByP-A").n_pred ],
1392 - [ rna.get_method("BGSU-ByP-B").max_mcc if rna.get_method("BGSU-ByP-B").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("BGSU-ByP-B").n_pred ],
1393 - [ rna.get_method("BGSU-ByP-C").max_mcc if rna.get_method("BGSU-ByP-C").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("BGSU-ByP-C").n_pred ],
1394 - [ rna.get_method("BGSU-ByP-D").max_mcc if rna.get_method("BGSU-ByP-D").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("BGSU-ByP-D").n_pred ],
1395 - [ rna.get_method("RIN-D.P.-A").max_mcc if rna.get_method("RIN-D.P.-A").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("RIN-D.P.-A").n_pred ],
1396 - [ rna.get_method("RIN-D.P.-B").max_mcc if rna.get_method("RIN-D.P.-B").n_pred else print(rna.basename, "has no") for rna in PseudobaseContainer if rna.get_method("RIN-D.P.-B").n_pred ],
1397 - ]
1398 -
1399 - # We ensure having the same number of RNAs in every sample by discarding the one for which computations did not ended/succeeded.
1400 - x_pseudobase_fully = [
1401 - [ rna.get_method("Biokop").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1402 - [ rna.get_method("RNAsubopt").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1403 - [ rna.get_method("RNA-MoIP (1by1)").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1404 - [ rna.get_method("RNA-MoIP (chunk)").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1405 - [ rna.get_method("DESC-D.P.-A").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1406 - [ rna.get_method("DESC-D.P.-B").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1407 - [ rna.get_method("DESC-ByP-A").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1408 - [ rna.get_method("DESC-ByP-B").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1409 - [ rna.get_method("DESC-ByP-C").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1410 - [ rna.get_method("DESC-ByP-D").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1411 - [ rna.get_method("BGSU-Jar3d-A").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1412 - [ rna.get_method("BGSU-Jar3d-B").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1413 - [ rna.get_method("BGSU-Jar3d-C").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1414 - [ rna.get_method("BGSU-Jar3d-D").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1415 - [ rna.get_method("BGSU-ByP-A").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1416 - [ rna.get_method("BGSU-ByP-B").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1417 - [ rna.get_method("BGSU-ByP-C").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1418 - [ rna.get_method("BGSU-ByP-D").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1419 - [ rna.get_method("RIN-D.P.-A").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1420 - [ rna.get_method("RIN-D.P.-B").max_mcc for rna in RNAs_fully_predicted_Pseudobase],
1421 - ]
1422 -
1423 -
1424 - print()
1425 - print("With PK:")
1426 - print("%s Biokop predictions" % is_all(len(x_pseudobase[0]), Pseudobase_tot))
1427 - print("%s RNAsubopt predictions" % is_all(len(x_pseudobase[1]), Pseudobase_tot))
1428 - print("%s RNA-MoIP 1 by 1 predictions" % is_all(len(x_pseudobase[2]), Pseudobase_tot))
1429 - print("%s RNA-MoIP chunk predictions" % is_all(len(x_pseudobase[3]), Pseudobase_tot))
1430 - print("%s biorseo + DESC + Patternmatch + f1A predictions" % is_all(len(x_pseudobase[4]), Pseudobase_tot))
1431 - print("%s biorseo + DESC + Patternmatch + f1B predictions" % is_all(len(x_pseudobase[5]), Pseudobase_tot))
1432 - print("%s biorseo + DESC + BayesPairing + f1A predictions" % is_all(len(x_pseudobase[6]), Pseudobase_tot))
1433 - print("%s biorseo + DESC + BayesPairing + f1B predictions" % is_all(len(x_pseudobase[7]), Pseudobase_tot))
1434 - print("%s biorseo + DESC + BayesPairing + f1C predictions" % is_all(len(x_pseudobase[8]), Pseudobase_tot))
1435 - print("%s biorseo + DESC + BayesPairing + f1D predictions" % is_all(len(x_pseudobase[9]), Pseudobase_tot))
1436 - print("%s biorseo + BGSU + JAR3D + f1A predictions" % is_all(len(x_pseudobase[10]), Pseudobase_tot))
1437 - print("%s biorseo + BGSU + JAR3D + f1B predictions" % is_all(len(x_pseudobase[11]), Pseudobase_tot))
1438 - print("%s biorseo + BGSU + JAR3D + f1C predictions" % is_all(len(x_pseudobase[12]), Pseudobase_tot))
1439 - print("%s biorseo + BGSU + JAR3D + f1D predictions" % is_all(len(x_pseudobase[13]), Pseudobase_tot))
1440 - print("%s biorseo + BGSU + BayesPairing + f1A predictions" % is_all(len(x_pseudobase[14]), Pseudobase_tot))
1441 - print("%s biorseo + BGSU + BayesPairing + f1B predictions" % is_all(len(x_pseudobase[15]), Pseudobase_tot))
1442 - print("%s biorseo + BGSU + BayesPairing + f1C predictions" % is_all(len(x_pseudobase[16]), Pseudobase_tot))
1443 - print("%s biorseo + BGSU + BayesPairing + f1D predictions" % is_all(len(x_pseudobase[17]), Pseudobase_tot))
1444 - print("%s biorseo + RIN + Patternmatch + f1A predictions" % is_all(len(x_pseudobase[18]), Pseudobase_tot))
1445 - print("%s biorseo + RIN + Patternmatch + f1B predictions" % is_all(len(x_pseudobase[19]), Pseudobase_tot))
1446 -
1447 - print("==> %s ARN were predicted with all methods successful." % is_all(len(x_pseudobase_fully[0]), Pseudobase_tot) )
1448 -
1449 - # stat tests
1450 - # First, search if all methods are equal in positions with Friedman test:
1451 - test = stats.friedmanchisquare(*x_pseudobase_fully)
1452 - print("Friedman test with PK: H0 = 'The position parameter of all distributions is equal', p-value = ", test.pvalue)
1453 - # it looks like some methods do better. Let's test the difference:
1454 - test = stats.wilcoxon(x_pseudobase_fully[0], x_pseudobase_fully[4])
1455 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and RawA are equal', p-value = ", test.pvalue)
1456 - test = stats.wilcoxon(x_pseudobase_fully[0], x_pseudobase_fully[5])
1457 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and RawB are equal', p-value = ", test.pvalue)
1458 - test = stats.wilcoxon(x_pseudobase_fully[0], x_pseudobase_fully[10])
1459 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and Jar3dA are equal', p-value = ", test.pvalue)
1460 - test = stats.wilcoxon(x_pseudobase_fully[0], x_pseudobase_fully[11])
1461 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and Jar3dB are equal', p-value = ", test.pvalue)
1462 - test = stats.wilcoxon(x_pseudobase_fully[0], x_pseudobase_fully[12])
1463 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and Jar3dC are equal', p-value = ", test.pvalue)
1464 - test = stats.wilcoxon(x_pseudobase_fully[0], x_pseudobase_fully[13])
1465 - print("Wilcoxon signed rank test with PK: H0 = 'The position parameter of Biokop and Jar3dD are equal', p-value = ", test.pvalue)
1466 - return x_pseudobase_fully
1467 -
1468 -def print_StudyCase_results():
1469 - print("\nLoading study case results from files...")
1470 -
1471 - # load results in objects
1472 - for instance in StudycaseContainer:
1473 - instance.load_results()
1474 - instance.evaluate(verbose=True)
1475 -
1476 -# ================= EXTRACTION OF STRUCTURES FROM FILES ===============================
1477 -
1478 -if __name__ == '__main__':
1479 -
1480 - print("> Loading files...", flush=True)
1481 - bpRNAContainer, bpRNA_pk_counter = load_from_dbn(bpRNAFile, header_style=1)
1482 - PseudobaseContainer, Pseudobase_pk_counter = load_from_dbn(PseudobaseFile, header_style=3)
1483 - StudycaseContainer, StudyCase_pk_counter = load_from_dbn(StudyCaseFile, header_style=1)
1484 -
1485 - for nt, number in ignored_nt_dict.items():
1486 - print("\t> ignored %d sequences because of char %c" % (number, nt))
1487 -
1488 - bpRNA_tot = len(bpRNAContainer)
1489 - Pseudobase_tot = len(PseudobaseContainer)
1490 - StudyCase_tot = len(StudycaseContainer)
1491 - print("\t> Loaded %d RNAs of length between 10 and 100 from RNA Strand. %d of them contain pseudoknots." % (bpRNA_tot, bpRNA_pk_counter))
1492 - print("\t> Loaded %d RNAs of length between 10 and 100 from Pseudobase. %d of them contain pseudoknots." % (Pseudobase_tot, Pseudobase_pk_counter))
1493 - print("\t> Loaded %d RNAs of length between 10 and 100 from study case. %d of them contain pseudoknots." % (StudyCase_tot, StudyCase_pk_counter))
1494 -
1495 - issues = set()
1496 - if path.isfile("benchmark_results/known_issues.txt"):
1497 - with open("benchmark_results/known_issues.txt") as f:
1498 - issues = set([ j[:-1] for j in f.readlines() ])
1499 - print(f"\t> Ignoring {len(issues)} known failing jobs.")
1500 -
1501 - #================= PREDICTION OF STRUCTURES ===============================
1502 -
1503 - #define job list
1504 - print("> Defining jobs...")
1505 - fulljoblist = []
1506 - joblabel_list = set()
1507 -
1508 - if path.isfile("containers.pickle"):
1509 - with open("containers.pickle", "rb") as cont:
1510 - bpRNAContainer, PseudobaseContainer = pickle.load(cont)
1511 - else:
1512 - for instance in tqdm(bpRNAContainer, desc="bpRNA jobs"):
1513 - instance.add_method_evaluation(instance, "RNAsubopt", flat=False)
1514 - instance.add_method_evaluation(instance, "Biokop")
1515 - instance.add_method_evaluation(instance, "RNA-MoIP (1by1)")
1516 - instance.add_method_evaluation(instance, "RNA-MoIP (chunk)")
1517 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="D.P.", obj_func="A", PK=False)
1518 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="D.P.", obj_func="A", PK=True)
1519 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="D.P.", obj_func="B", PK=False)
1520 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="D.P.", obj_func="B", PK=True)
1521 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="A", PK=False)
1522 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="A", PK=True)
1523 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="B", PK=False)
1524 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="B", PK=True)
1525 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="C", PK=False)
1526 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="C", PK=True)
1527 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="D", PK=False)
1528 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="D", PK=True)
1529 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="A", PK=False)
1530 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="A", PK=True)
1531 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="B", PK=False)
1532 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="B", PK=True)
1533 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="C", PK=False)
1534 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="C", PK=True)
1535 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="D", PK=False)
1536 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="D", PK=True)
1537 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="A", PK=False)
1538 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="A", PK=True)
1539 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="B", PK=False)
1540 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="B", PK=True)
1541 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="C", PK=False)
1542 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="C", PK=True)
1543 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="D", PK=False)
1544 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="D", PK=True)
1545 - instance.add_method_evaluation(instance, tool="biorseo", data_source="RIN", placement_method="D.P.", obj_func="A", PK=False)
1546 - instance.add_method_evaluation(instance, tool="biorseo", data_source="RIN", placement_method="D.P.", obj_func="A", PK=True)
1547 - instance.add_method_evaluation(instance, tool="biorseo", data_source="RIN", placement_method="D.P.", obj_func="B", PK=False)
1548 - instance.add_method_evaluation(instance, tool="biorseo", data_source="RIN", placement_method="D.P.", obj_func="B", PK=True)
1549 -
1550 - for method in instance.methods:
1551 - for i in range(len(method.joblist)):
1552 - j = method.joblist[i]
1553 - if j.label in joblabel_list: # look for a duplicate job (Jar3d, BayesPairing, RNAsubopt...)
1554 - # for index, job in enumerate(fulljoblist):
1555 - # if job.label == j.label:
1556 - # method.joblist[i] = fulljoblist[index] # point to the previous occurrence
1557 - # break
1558 - continue
1559 - else:
1560 - fulljoblist.append(j)
1561 - joblabel_list.add(j.label)
1562 -
1563 - for instance in tqdm(PseudobaseContainer, desc="Pseudobase jobs"):
1564 - instance.add_method_evaluation(instance, "RNAsubopt", flat=False)
1565 - instance.add_method_evaluation(instance, "Biokop")
1566 - instance.add_method_evaluation(instance, "RNA-MoIP (1by1)")
1567 - instance.add_method_evaluation(instance, "RNA-MoIP (chunk)")
1568 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="D.P.", obj_func="A")
1569 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="D.P.", obj_func="B")
1570 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="A")
1571 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="B")
1572 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="C")
1573 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="D")
1574 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="A")
1575 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="B")
1576 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="C")
1577 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="D")
1578 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="A")
1579 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="B")
1580 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="C")
1581 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="D")
1582 - instance.add_method_evaluation(instance, tool="biorseo", data_source="RIN", placement_method="D.P.", obj_func="A")
1583 - instance.add_method_evaluation(instance, tool="biorseo", data_source="RIN", placement_method="D.P.", obj_func="B")
1584 -
1585 - for method in instance.methods:
1586 - for i in range(len(method.joblist)):
1587 - j = method.joblist[i]
1588 - if j.label in joblabel_list: # look for a duplicate job (Jar3d, BayesPairing, RNAsubopt...)
1589 - # for index, job in enumerate(fulljoblist):
1590 - # if job.label == j.label:
1591 - # method.joblist[i] = fulljoblist[index] # point to the previous occurrence
1592 - # break
1593 - continue
1594 - else:
1595 - fulljoblist.append(j)
1596 - joblabel_list.add(j.label)
1597 -
1598 - with open("containers.pickle", "wb") as cont:
1599 - pickle.dump((bpRNAContainer, PseudobaseContainer), cont)
1600 -
1601 - for instance in StudycaseContainer: # We need to define these separately because we do not want concurrency, to measure proper run times.
1602 - instance.add_method_evaluation(instance, "RNAsubopt", flat=True)
1603 - instance.add_method_evaluation(instance, "Biokop", flat=True)
1604 - instance.add_method_evaluation(instance, "RNA-MoIP (1by1)", flat=True)
1605 - instance.add_method_evaluation(instance, "RNA-MoIP (chunk)", flat=True)
1606 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="D.P.", obj_func="A", flat=True)
1607 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="D.P.", obj_func="B", flat=True)
1608 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="A", flat=True)
1609 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="B", flat=True)
1610 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="C", flat=True)
1611 - instance.add_method_evaluation(instance, tool="biorseo", data_source="DESC", placement_method="ByP", obj_func="D", flat=True)
1612 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="A", flat=True)
1613 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="B", flat=True)
1614 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="C", flat=True)
1615 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="ByP", obj_func="D", flat=True)
1616 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="A", flat=True)
1617 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="B", flat=True)
1618 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="C", flat=True)
1619 - instance.add_method_evaluation(instance, tool="biorseo", data_source="BGSU", placement_method="Jar3d", obj_func="D", flat=True)
1620 - instance.add_method_evaluation(instance, tool="biorseo", data_source="RIN", placement_method="D.P.", obj_func="A", flat=True)
1621 - instance.add_method_evaluation(instance, tool="biorseo", data_source="RIN", placement_method="D.P.", obj_func="B", flat=True)
1622 -
1623 - for method in instance.methods:
1624 - for i in range(len(method.joblist)):
1625 - j = method.joblist[i]
1626 - if j.label in joblabel_list: # look for a duplicate job (Jar3d, BayesPairing, RNAsubopt...)
1627 - # for index, job in enumerate(fulljoblist):
1628 - # if job.label == j.label:
1629 - # method.joblist[i] = fulljoblist[index] # point to the previous occurrence
1630 - # break
1631 - continue
1632 - else:
1633 - fulljoblist.append(j)
1634 - joblabel_list.add(j.label)
1635 -
1636 - # # sort jobs in a tree structure
1637 - # jobs = {}
1638 - # jobcount = len(fulljoblist)
1639 - # for job in fulljoblist:
1640 - # if job.priority_ not in jobs.keys():
1641 - # jobs[job.priority_] = {}
1642 - # if job.nthreads not in jobs[job.priority_].keys():
1643 - # print(f"New job priority/concurrency: {job.priority_} {job.nthreads}")
1644 - # jobs[job.priority_][job.nthreads] = []
1645 - # jobs[job.priority_][job.nthreads].append(job)
1646 - # nprio = max(jobs.keys())
1647 - # # for each priority level
1648 - # for i in range(1,nprio+1):
1649 - # if i not in jobs.keys(): continue # ignore this priority level if no job available
1650 - # different_thread_numbers = [n for n in jobs[i].keys()]
1651 - # different_thread_numbers.sort()
1652 - # print("processing jobs of priority", i)
1653 - # # jobs should be processed 1 by 1, 2 by 2, or n by n depending on their definition
1654 - # for n in different_thread_numbers:
1655 - # bunch = jobs[i][n]
1656 - # if not len(bunch): continue # ignore if no jobs should be processed n by n
1657 - # print("using", n, "processes:")
1658 - # try :
1659 - # # execute jobs of priority i that should be processed n by n:
1660 - # p = MyPool(initializer = init, initargs = (n_launched, n_finished, n_skipped), processes=n, maxtasksperchild=10)
1661 - # raw_results = p.map(execute_job, bunch)
1662 - # p.close()
1663 - # p.join()
1664 -
1665 - # # extract computation times
1666 - # times = [ r[0] for r in raw_results ]
1667 - # for j, t in zip(bunch, times):
1668 - # j.comp_time = t
1669 -
1670 - # except (subprocess.TimeoutExpired) :
1671 - # print("Skipping, took more than 3600s")
1672 - # pass
1673 -
1674 -
1675 - # ================= Statistics ========================
1676 -
1677 - if path.isfile("pickleresults.pickle"):
1678 - with open("pickleresults.pickle", "rb") as rf:
1679 - t = pickle.load(rf)
1680 - x_noPK_fully, x_PK_fully, n, r, max_i, x_pseudobase_fully = t
1681 - else:
1682 - x_noPK_fully, x_PK_fully, n, r, max_i = get_bpRNA_statistics()
1683 - x_pseudobase_fully = get_Pseudobase_statistics()
1684 - with open("pickleresults.pickle", "wb") as rf:
1685 - pickle.dump((x_noPK_fully, x_PK_fully, n, r, max_i, x_pseudobase_fully), rf)
1686 - # print_StudyCase_results()
1687 -
1688 - # ================= PLOTS OF RESULTS =======================================
1689 -
1690 - colors = [
1691 - '#911eb4', #purple
1692 - '#000075', #navy
1693 - '#ffe119', '#ffe119', # yellow
1694 - '#e6194B', '#e6194B', #red
1695 - '#3cb44b', '#3cb44b', '#3cb44b', '#3cb44b', #green
1696 - '#4363d8', '#4363d8', '#4363d8', '#4363d8', #blue
1697 - '#3cb44b', '#3cb44b', '#3cb44b', '#3cb44b', # green
1698 - '#bbbbff', '#bbbbff' # grey-blue
1699 - ]
1700 -
1701 - def plot_best_MCCs(x_noPK_fully, x_PK_fully, x_pseudobase_fully):
1702 -
1703 - print("Best MCCs...")
1704 - labels = [
1705 - "Biokop \n",
1706 - "RNA\nsubopt", "RNA-\nMoIP\n1by1", "RNA-\nMoIP\nchunk",
1707 - "$f_{1A}$", "$f_{1B}$",
1708 - "$f_{1A}$", "$f_{1B}$", "$f_{1C}$", "$f_{1D}$",
1709 - "$f_{1A}$", "$f_{1B}$", "$f_{1C}$", "$f_{1D}$",
1710 - "$f_{1A}$", "$f_{1B}$", "$f_{1C}$", "$f_{1D}$",
1711 - "$f_{1A}$", "$f_{1B}$",
1712 - ]
1713 -
1714 - fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(10,5), dpi=150)
1715 - fig.suptitle(" \n ")
1716 - fig.subplots_adjust(left=0.1, right=0.97, top=0.83, bottom=0.05)
1717 -
1718 -
1719 -
1720 - # Line 1 : no Pseudoknots
1721 - xpos = [ 1+x for x in range(len(x_noPK_fully)) ] # skip Biokop's column
1722 - vplot = axes[0].violinplot(x_noPK_fully, showmeans=False, showmedians=False, showextrema=False,
1723 - points=len(x_noPK_fully[0]), positions=xpos)
1724 - axes[0].set_xticks(xpos)
1725 - for patch, color in zip(vplot['bodies'], colors[1:]):
1726 - patch.set_facecolor(color)
1727 - patch.set_edgecolor(color)
1728 - patch.set_alpha(0.5)
1729 - quartile1, medians, quartile3 = np.percentile(x_noPK_fully, [25, 50, 75], axis=1)
1730 - axes[0].scatter(xpos, medians, marker='o', color='k', s=30, zorder=3)
1731 - axes[0].vlines(xpos, quartile1, quartile3, color='k', linestyle='-', lw=1)
1732 - for x, y1, y2 in zip(xpos, quartile1, quartile3):
1733 - bar1 = Line2D([x-0.1, x+0.1], [y1, y1], color="k", lw=1)
1734 - bar2 = Line2D([x-0.1, x+0.1], [y2, y2], color="k", lw=1)
1735 - axes[0].add_line(bar1)
1736 - axes[0].add_line(bar2)
1737 - axes[0].set_ylabel("(A)\nmax MCC\n(%d RNAs)" % (len(x_noPK_fully[0])), fontsize=12)
1738 -
1739 - # Line 2 : Pseudoknots supported
1740 - xpos = [ 0 ] + [ i for i in range(4,20) ]
1741 - vplot = axes[1].violinplot(x_PK_fully, showmeans=False, showmedians=False, showextrema=False,
1742 - points=len(x_PK_fully[0]), positions=xpos)
1743 - for patch, color in zip(vplot['bodies'], colors[:1] + colors[4:]):
1744 - patch.set_facecolor(color)
1745 - patch.set_edgecolor(color)
1746 - patch.set_alpha(0.5)
1747 - quartile1, medians, quartile3 = np.percentile(x_PK_fully, [25, 50, 75], axis=1)
1748 - axes[1].scatter(xpos, medians, marker='o', color='k', s=30, zorder=3)
1749 - axes[1].vlines(xpos, quartile1, quartile3, color='k', linestyle='-', lw=1)
1750 - for x, y1, y2 in zip(xpos, quartile1, quartile3):
1751 - bar1 = Line2D([x-0.1, x+0.1], [y1, y1], color="k", lw=1)
1752 - bar2 = Line2D([x-0.1, x+0.1], [y2, y2], color="k", lw=1)
1753 - axes[1].add_line(bar1)
1754 - axes[1].add_line(bar2)
1755 - axes[1].set_ylabel("(B)\nmax MCC\n(%d RNAs)" % (len(x_PK_fully[0])), fontsize=12)
1756 -
1757 - # Line 3 : all methods on pseudoknotted dataset
1758 - xpos = [ x for x in range(len(x_pseudobase_fully)) ]
1759 - vplot = axes[2].violinplot(x_pseudobase_fully, showmeans=False, showmedians=False, showextrema=False,
1760 - points=len(x_pseudobase_fully[0]), positions=xpos)
1761 - for patch, color in zip(vplot['bodies'], colors):
1762 - patch.set_facecolor(color)
1763 - patch.set_edgecolor(color)
1764 - patch.set_alpha(0.5)
1765 - quartile1, medians, quartile3 = np.percentile(x_pseudobase_fully, [25, 50, 75], axis=1)
1766 - axes[2].scatter(xpos, medians, marker='o', color='k', s=30, zorder=3)
1767 - axes[2].vlines(xpos, quartile1, quartile3, color='k', linestyle='-', lw=1)
1768 - for x, y1, y2 in zip(xpos, quartile1, quartile3):
1769 - bar1 = Line2D([x-0.1, x+0.1], [y1, y1], color="k", lw=1)
1770 - bar2 = Line2D([x-0.1, x+0.1], [y2, y2], color="k", lw=1)
1771 - axes[2].add_line(bar1)
1772 - axes[2].add_line(bar2)
1773 - axes[2].set_ylabel("(C)\nmax MCC\n(%d RNAs)" % (len(x_pseudobase_fully[0])), fontsize=12)
1774 -
1775 - for ax in axes:
1776 - ax.set_ylim((0.0, 1.01))
1777 - ax.set_xlim((-1, 20))
1778 - yticks = [ i/10 for i in range(0, 11, 2) ]
1779 - ax.set_yticks(yticks)
1780 - for y in yticks:
1781 - ax.axhline(y=y, color="grey", linestyle="--", linewidth=1)
1782 - ax.tick_params(top=False, bottom=False, labeltop=False, labelbottom=False)
1783 - ax.set_xticks([i for i in range(20)])
1784 - axes[0].tick_params(top=True, bottom=False, labeltop=True, labelbottom=False)
1785 - axes[0].set_xticklabels(labels)
1786 - for i, tick in enumerate(axes[0].xaxis.get_major_ticks()):
1787 - if i<4: # Reduce size of Biokop, RNAsubopt and RNA-MoIP labels to stay readable
1788 - tick.label2.set_fontsize(10)
1789 - else:
1790 - tick.label2.set_fontsize(12)
1791 -
1792 - def plot_more_info():
1793 - # ======= number of solutions, insertion ratio, etc ========================
1794 -
1795 - # Figure : number of solutions
1796 - print("Number of solutions...")
1797 - plt.figure(figsize=(9,2.5), dpi=80)
1798 - plt.suptitle(" \n ")
1799 - plt.subplots_adjust(left=0.05, right=0.97, top=0.6, bottom=0.05)
1800 - xpos = [ x for x in range(len(n)) ]
1801 - for y in [ 10*x for x in range(8) ]:
1802 - plt.axhline(y=y, color="grey", linestyle="-", linewidth=0.5)
1803 - plt.axhline(y=1, color="grey", linestyle="-", linewidth=0.5)
1804 - vplot = plt.violinplot(n, showmeans=False, showmedians=False, showextrema=False, points=len(n[0]), positions=xpos)
1805 - for patch, color in zip(vplot['bodies'], colors):
1806 - patch.set_facecolor(color)
1807 - patch.set_edgecolor(color)
1808 - patch.set_alpha(0.5)
1809 - labels = [
1810 - "Biokop",
1811 - "RNAsubopt","RNA-MoIP\n1by1", "RNA-MoIP\nchunk",
1812 - "$f_{1A}$", "$f_{1B}$",
1813 - "$f_{1A}$", "$f_{1B}$", "$f_{1C}$", "$f_{1D}$",
1814 - "$f_{1A}$", "$f_{1B}$", "$f_{1C}$", "$f_{1D}$",
1815 - "$f_{1A}$", "$f_{1B}$", "$f_{1C}$", "$f_{1D}$",
1816 - "$f_{1A}$", "$f_{1B}$"
1817 - ]
1818 - plt.xlim((-1,20))
1819 - plt.tick_params(top=False, bottom=False, labeltop=False, labelbottom=False)
1820 - plt.xticks([ i for i in range(len(labels))], labels)
1821 - plt.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False)
1822 - for i, tick in enumerate(plt.gca().xaxis.get_major_ticks()):
1823 - if i<4: # Reduce size of RNA-MoIP labels to stay readable
1824 - # tick.label2.set_fontsize(8)
1825 - tick.label2.set_rotation(90)
1826 - else:
1827 - tick.label2.set_fontsize(12)
1828 - plt.yticks([ 20*x for x in range(3) ])
1829 - plt.ylim((0,40))
1830 - plt.savefig("number_of_solutions.png")
1831 -
1832 - # Figure : max number of insertions and ratio
1833 - fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10,4), dpi=80)
1834 - fig.suptitle(" \n ")
1835 - fig.subplots_adjust(left=0.09, right=0.99, top=0.7, bottom=0.05)
1836 -
1837 - # Figure : max inserted
1838 - print("Max inserted...")
1839 - xpos = [ x for x in range(18) ]
1840 - axes[0].set_yticks([ 5*x for x in range(3) ])
1841 - for y in [ 2*x for x in range(7) ]:
1842 - axes[0].axhline(y=y, color="grey", linestyle="-", linewidth=0.5)
1843 - vplot = axes[0].violinplot(max_i, showmeans=False, showmedians=False, showextrema=False, points=len(max_i[0]), positions=xpos)
1844 - for patch, color in zip(vplot['bodies'], colors[2:]):
1845 - patch.set_facecolor(color)
1846 - patch.set_edgecolor(color)
1847 - patch.set_alpha(0.5)
1848 - axes[0].set_ylabel("(A)", fontsize=12)
1849 -
1850 - # Figure : insertion ratio
1851 - print("Ratio of insertions...")
1852 - xpos = [ 0 ] + [ x for x in range(2, 1+len(r)) ]
1853 - axes[1].set_ylim((-0.01, 1.01))
1854 - yticks = [ 0, 0.5, 1.0 ]
1855 - axes[1].set_yticks(yticks)
1856 - for y in yticks:
1857 - axes[1].axhline(y=y, color="grey", linestyle="-", linewidth=0.5)
1858 - vplot = axes[1].violinplot(r, showmeans=False, showmedians=False, showextrema=False, points=len(r[0]), positions=xpos)
1859 - for patch, color in zip(vplot['bodies'], [colors[2]] + colors[4:]):
1860 - patch.set_facecolor(color)
1861 - patch.set_edgecolor(color)
1862 - patch.set_alpha(0.5)
1863 - for i,x in enumerate(xpos):
1864 - axes[1].annotate(str(len(r[i])), (x-0.25, 0.05), fontsize=8)
1865 - axes[1].set_ylabel("(B)", fontsize=12)
1866 -
1867 - labels = labels[2:]
1868 - for ax in axes:
1869 - ax.set_xlim((-1,18))
1870 - ax.tick_params(top=False, bottom=False, labeltop=False, labelbottom=False)
1871 - ax.set_xticks([ i for i in range(18)])
1872 - axes[0].tick_params(top=True, bottom=False, labeltop=True, labelbottom=False)
1873 - axes[0].set_xticklabels(labels)
1874 - for i, tick in enumerate(axes[0].xaxis.get_major_ticks()):
1875 - if i<2: # Reduce size of RNA-MoIP labels to stay readable
1876 - # tick.label2.set_fontsize(9)
1877 - tick.label2.set_rotation(90)
1878 - else:
1879 - tick.label2.set_fontsize(12)
1880 -
1881 - def compare_subopt_MoIP():
1882 - # ================== MCC performance ====================================
1883 -
1884 - plt.figure(figsize=(10,4), dpi=80)
1885 - bpRNAContainer.sort(key=lambda x: x.get_method("RNA-MoIP (chunk)").max_mcc)
1886 -
1887 - x = [
1888 - [ rna.get_method("RNA-MoIP (chunk)").max_mcc for rna in bpRNAContainer ],
1889 - [ rna.get_method("RNA-MoIP (1by1)").max_mcc for rna in bpRNAContainer ],
1890 - [ rna.get_method("RNAsubopt").max_mcc for rna in bpRNAContainer ]
1891 - ]
1892 - diffs = [
1893 - [ x[1][i] - x[0][i] for i in range(len(x[0])) ], # 1by1 - chunk
1894 - [ x[1][i] - x[2][i] for i in range(len(x[0])) ], # 1by1 - subopt
1895 - [ x[0][i] - x[2][i] for i in range(len(x[0])) ] # chunk - subopt
1896 - ]
1897 -
1898 - plt.subplot(121)
1899 - colors = [ 'firebrick','goldenrod', 'xkcd:blue']
1900 - labels = ["RNA-MoIP 'chunk' MCC (1 solution)", "Best RNA-MoIP '1by1' MCC", "Best RNAsubopt MCC" ]
1901 - for y, col, lab in zip(x, colors, labels):
1902 - plt.scatter(range(len(y)), y, color=col, label=lab, marker='o', s=2)
1903 - plt.axvline(x=0, color='black', linewidth=1)
1904 - plt.xlabel("RNA Strand verified structures (10 < nt < 100)")
1905 - plt.ylabel("Mattews Correlation Coefficient")
1906 - plt.ylim((-0.05,1.05))
1907 - plt.title("(a) Performance of the prediction method")
1908 - plt.legend(loc="lower right")
1909 -
1910 - plt.subplot(122)
1911 - plt.axhline(y=0, color="black")
1912 - plt.boxplot(diffs)
1913 - plt.ylabel("Difference in max MCC")
1914 - plt.title("(b) Difference between prediction methods")
1915 - plt.xticks([1,2,3], ["MoIP '1by1'\n-\nMoIP 'chunk'", "MoIP '1by1'\n-\nRNAsubopt", "MoIP 'chunk'\n-\nRNAsubopt"])
1916 - plt.subplots_adjust(wspace=0.25, bottom=0.2, left=0.1, right=0.99)
1917 -
1918 - plot_best_MCCs(x_noPK_fully, x_PK_fully, x_pseudobase_fully)
1919 - plt.savefig("best_MCCs.png")
1920 - plot_more_info()
1921 - plt.savefig("detailed_stats.png")
1922 - compare_subopt_MoIP()
1923 - plt.savefig("compare_subopt_MOIP.png")
1 ->All values represent delta G in units of deka cal/mol (kcal/mol*100)
2 ->Stacking 5' X1 Y1 3'
3 -> 3' X2 Y2 5'
4 ->X1X2 = AU CG GC UA GU UG (row headings)
5 ->Y1Y2 = AU CG GC UA GU UG (column headings)
6 - -90 -220 -210 -110 -60 -140
7 --210 -330 -240 -210 -140 -210
8 --240 -340 -330 -220 -150 -250
9 --130 -240 -210 -90 -100 -130
10 --130 -250 -210 -140 -50 130
11 --100 -150 -140 -60 30 -50
12 ->Hairpin Loop Energies: size = 1,2,3,..,30
13 - 0 0 570 560 560 540 590 560 640 650 660 670 680 690 690 700 710 710 720 720 730 730 740 740 750 750 750 760 760 770
14 ->Bulge loop Energies: size = 1,2,3,..,30
15 - 380 280 320 360 400 440 460 470 480 490 500 510 520 530 540 540 550 550 560 570 570 580 580 580 590 590 600 600 600 610
16 ->Interior Loop Energies: size = 1,2,3,..,30
17 - 0 0 0 170 180 200 220 230 240 250 260 270 280 290 300 300 310 310 320 330 330 340 340 340 350 350 360 360 360 370
18 ->NINIO asymmetry Terms: m1, m2, m3, m4, max
19 ->Energy = MAX[ max, asymmetry*m#] where # = MIN(4,L1,L2)
20 ->and L1, L2 are lengths of each side of loop,
21 ->and asymmetry = |L1-L2|
22 - 50 50 50 50 300
23 ->Triloops 5' .. 3' (Not listed equals 0 bonus)
24 -AAAAU 0
25 ->Tetraloops 5' .. 3' (Not listed equals 0 bonus)
26 -AGAAAU -200
27 -AGCAAU -150
28 -AGUAAU -150
29 -AGUGAU -150
30 -CGAAAG -300
31 -CGAAGG -250
32 -CGAGAG -200
33 -CGCAAG -300
34 -CGCGAG -250
35 -CGGAAG -300
36 -CGGGAG -150
37 -CGUAAG -200
38 -CGUGAG -300
39 -CUAACG -200
40 -CUACGG -250
41 -CUUCGG -300
42 -GGAAAC -300
43 -GGAAGC -150
44 -GGAGAC -300
45 -GGCAAC -250
46 -GGCGAC -150
47 -GGGAAC -150
48 -GGGAGC -150
49 -GGGGAC -300
50 -GGUGAC -300
51 -GUGAAC -150
52 -UGAAAA -150
53 -UGAAAG -200
54 -UGAGAG -250
55 -UGGAAA -150
56 ->Mismatch HP:
57 ->Columns 5'-AU,CG,GC,UA,GU,UG-3'
58 ->rows 5'-AA,AC,AG,AU,CA,..,UU-3'
59 - -30 -150 -110 -50 20 -50
60 - -50 -150 -150 -30 -50 -30
61 - -30 -140 -130 -60 -30 -60
62 - -30 -180 -210 -50 -30 -50
63 - -10 -100 -110 -20 -10 -20
64 - -20 -90 -70 -10 -20 -10
65 --150 -290 -240 -120 -150 -170
66 - -20 -80 -50 0 -20 0
67 --110 -220 -240 -140 -90 -80
68 --120 -200 -290 -120 -110 -120
69 - -20 -160 -140 -70 -30 -30
70 - 20 -110 -120 -20 0 -70
71 - -30 -170 -190 -30 -30 -60
72 - -30 -140 -100 -10 -30 -10
73 - -60 -180 -220 -50 -40 -60
74 --110 -200 -150 -80 -110 -80
75 ->Mismatch Interior:
76 ->Columns 5'-AU,CG,GC,UA,GU,UG-3'
77 ->rows 5'-AA,AC,AG,AU,CA,..,UU-3'
78 - 70 0 0 70 70 70
79 - 70 0 0 70 70 70
80 - -40 -110 -110 -40 -40 -40
81 - 70 0 0 70 70 70
82 - 70 0 0 70 70 70
83 - 70 0 0 70 70 70
84 - 70 0 0 70 70 70
85 - 70 0 0 70 70 70
86 - -40 -110 -110 -40 -40 -40
87 - 70 0 0 70 70 70
88 - 70 0 0 70 70 70
89 - 70 0 0 70 70 70
90 - 70 0 0 70 70 70
91 - 70 0 0 70 70 70
92 - 70 0 0 70 70 70
93 - 0 -70 -70 0 0 0
94 ->Dangle Energies: 5' X1 Y 3'
95 -> 3' X2 . 5'
96 ->Columns: Y = A, C, G, U
97 ->Rows = X1X2 = AU,CG,GC,UA,GU,UG
98 - -80 -50 -80 -60
99 --170 -80 -170 -120
100 --110 -40 -130 -60
101 - -70 -10 -70 -10
102 - -80 -50 -80 -60
103 - -70 -10 -70 -10
104 ->Dangle Energies: 5' X1 . 3'
105 -> 3' X2 Y 5'
106 ->Columns: Y = A, C, G, U
107 ->Rows = X1X2 = AU,CG,GC,UA,GU,UG
108 - -30 -10 -20 -20
109 - -20 -30 0 0
110 - -50 -30 -20 -10
111 - -30 -30 -40 -20
112 - -30 -10 -20 -20
113 - -30 -30 -40 -20
114 ->Multiloop terms: ALPHA_1, ALPHA_2, ALPHA_3
115 ->ML penalty = ALPHA_1 + s * ALPHA_2 + u *ALPHA_3
116 ->s = # stems adjacent to ML, u = unpaired bases in ML
117 - 340 40 0
118 ->AT_PENALTY:
119 ->Penalty for non GC pairs that terminate a helix
120 - 50
121 ->Interior Loops 1x1
122 ->CG..AU = 5'- C X A 3'
123 -> 3'- G Y U 5'
124 ->Rows: X = A C G U (X constant for a row)
125 ->Columns: Y = A C G U (Y constant in column)
126 -AU..AU
127 - 170 170 170 170
128 - 170 170 170 170
129 - 170 170 -40 170
130 - 170 170 170 150
131 -AU..CG
132 - 110 110 110 110
133 - 110 110 110 110
134 - 110 110 -100 110
135 - 110 110 110 100
136 -AU..GC
137 - 110 110 110 110
138 - 110 110 110 110
139 - 110 110 -100 110
140 - 110 110 110 110
141 -AU..UA
142 - 170 170 170 170
143 - 170 170 170 170
144 - 170 170 -40 170
145 - 170 170 170 120
146 -AU..GU
147 - 170 170 170 170
148 - 170 170 170 170
149 - 170 170 -40 170
150 - 170 170 170 170
151 -AU..UG
152 - 170 170 170 170
153 - 170 170 170 170
154 - 170 170 -40 170
155 - 170 170 170 170
156 -CG..AU
157 - 110 110 110 110
158 - 110 110 110 110
159 - 110 110 -100 110
160 - 110 110 110 110
161 -CG..CG
162 - 40 -40 40 40
163 - 30 50 40 50
164 - -10 40 -170 40
165 - 40 0 40 -30
166 -CG..GC
167 - 110 40 40 40
168 - 40 40 40 40
169 - 40 40 -140 40
170 - 40 40 40 40
171 -CG..UA
172 - 110 110 110 110
173 - 110 110 110 110
174 - 110 110 -100 110
175 - 110 110 110 110
176 -CG..GU
177 - 110 110 110 110
178 - 110 110 110 110
179 - 110 110 -100 110
180 - 110 110 110 110
181 -CG..UG
182 - 110 110 110 110
183 - 110 110 110 110
184 - 110 110 -100 110
185 - 110 110 110 110
186 -GC..AU
187 - 110 110 110 110
188 - 110 110 110 110
189 - 110 110 -100 110
190 - 110 110 110 110
191 -GC..CG
192 - 80 40 40 40
193 - 40 40 40 40
194 - 40 40 -210 40
195 - 40 40 40 -70
196 -GC..GC
197 - 40 30 -10 40
198 - -40 50 40 0
199 - 40 40 -170 40
200 - 40 50 40 -30
201 -GC..UA
202 - 110 110 110 110
203 - 110 110 110 110
204 - 110 110 -100 110
205 - 110 110 110 100
206 -GC..GU
207 - 110 110 110 110
208 - 110 110 110 110
209 - 110 110 -100 110
210 - 110 110 110 110
211 -GC..UG
212 - 110 110 110 110
213 - 110 110 110 110
214 - 110 110 -100 110
215 - 110 110 110 110
216 -UA..AU
217 - 170 170 170 170
218 - 170 170 170 170
219 - 170 170 -40 170
220 - 170 170 170 180
221 -UA..CG
222 - 110 110 110 110
223 - 110 110 110 110
224 - 110 110 -100 110
225 - 110 110 110 110
226 -UA..GC
227 - 110 110 110 110
228 - 110 110 110 110
229 - 110 110 -100 110
230 - 110 110 110 110
231 -UA..UA
232 - 170 170 170 170
233 - 170 170 170 170
234 - 170 170 -40 170
235 - 170 170 170 150
236 -UA..GU
237 - 170 170 170 170
238 - 170 170 170 170
239 - 170 170 -40 170
240 - 170 170 170 170
241 -UA..UG
242 - 170 170 170 170
243 - 170 170 170 170
244 - 170 170 -40 170
245 - 170 170 170 170
246 -GU..AU
247 - 170 170 170 170
248 - 170 170 170 170
249 - 170 170 -40 170
250 - 170 170 170 170
251 -GU..CG
252 - 110 110 110 110
253 - 110 110 110 110
254 - 110 110 -100 110
255 - 110 110 110 110
256 -GU..GC
257 - 110 110 110 110
258 - 110 110 110 110
259 - 110 110 -100 110
260 - 110 110 110 110
261 -GU..UA
262 - 170 170 170 170
263 - 170 170 170 170
264 - 170 170 -40 170
265 - 170 170 170 170
266 -GU..GU
267 - 170 170 170 170
268 - 170 170 170 170
269 - 170 170 -40 170
270 - 170 170 170 170
271 -GU..UG
272 - 170 170 170 170
273 - 170 170 170 170
274 - 170 170 -40 170
275 - 170 170 170 170
276 -UG..AU
277 - 170 170 170 170
278 - 170 170 170 170
279 - 170 170 -40 170
280 - 170 170 170 170
281 -UG..CG
282 - 110 110 110 110
283 - 110 110 110 110
284 - 110 110 -100 110
285 - 110 110 110 110
286 -UG..GC
287 - 110 110 110 110
288 - 110 110 110 110
289 - 110 110 -100 110
290 - 110 110 110 110
291 -UG..UA
292 - 170 170 170 170
293 - 170 170 170 170
294 - 170 170 -40 170
295 - 170 170 170 170
296 -UG..GU
297 - 170 170 170 170
298 - 170 170 170 170
299 - 170 170 -40 170
300 - 170 170 170 170
301 -UG..UG
302 - 170 170 170 170
303 - 170 170 170 170
304 - 170 170 -40 170
305 - 170 170 170 170
306 ->Interior Loops 2x2
307 ->CG.AG..AU = 5'- C A G A -3'
308 -> 3'- G Y X U -5'
309 ->Rows: X = A C G U (X constant for a row)
310 ->Columns: Y = A C G U (Y constant in column)
311 -AU.AA..AU
312 - 280 260 150 200
313 - 230 220 110 200
314 - 170 160 50 200
315 - 200 200 200 200
316 -AU.AC..AU
317 - 280 260 150 200
318 - 340 260 250 200
319 - 200 200 200 200
320 - 340 260 250 200
321 -AU.AG..AU
322 - 170 160 50 200
323 - 200 200 200 200
324 - 210 200 90 200
325 - 100 -20 50 200
326 -AU.AU..AU
327 - 200 200 200 200
328 - 310 230 220 200
329 - 220 110 180 200
330 - 290 180 250 200
331 -AU.CA..AU
332 - 250 310 200 310
333 - 210 200 200 200
334 - 150 240 200 240
335 - 200 200 200 200
336 -AU.CC..AU
337 - 250 250 200 250
338 - 250 250 200 250
339 - 200 200 200 200
340 - 250 250 200 250
341 -AU.CG..AU
342 - 150 240 200 240
343 - 200 200 200 200
344 - 190 240 200 240
345 - -30 70 200 70
346 -AU.CU..AU
347 - 200 200 200 200
348 - 220 220 200 220
349 - 100 190 200 190
350 - 170 160 200 160
351 -AU.GA..AU
352 - 150 200 210 230
353 - 110 200 160 90
354 - 50 200 100 210
355 - 200 200 200 200
356 -AU.GC..AU
357 - 150 200 210 130
358 - 250 200 270 230
359 - 200 200 200 200
360 - 250 200 270 230
361 -AU.GG..AU
362 - 50 200 100 210
363 - 200 200 200 200
364 - 90 200 140 170
365 - 50 200 30 -150
366 -AU.GU..AU
367 - 200 200 200 200
368 - 220 200 240 200
369 - 180 200 150 -20
370 - 250 200 220 230
371 -AU.UA..AU
372 - 200 310 130 270
373 - 200 200 -10 120
374 - 200 240 110 240
375 - 200 200 200 200
376 -AU.UC..AU
377 - 200 250 30 170
378 - 200 250 130 170
379 - 200 200 200 200
380 - 200 250 130 170
381 -AU.UG..AU
382 - 200 240 110 240
383 - 200 200 200 200
384 - 200 240 70 200
385 - 200 70 -250 70
386 -AU.UU..AU
387 - 200 200 200 200
388 - 200 220 100 140
389 - 200 190 -120 190
390 - 200 160 130 80
391 -AU.AA..CG
392 - 210 200 90 200
393 - 190 170 60 200
394 - 10 0 -110 200
395 - 200 200 200 200
396 -AU.AC..CG
397 - 180 170 60 200
398 - 250 170 160 200
399 - 200 200 200 200
400 - 150 70 70 200
401 -AU.AG..CG
402 - 70 60 -50 200
403 - 200 200 200 200
404 - 180 160 50 200
405 - 0 -120 -50 200
406 -AU.AU..CG
407 - 200 200 200 200
408 - 250 180 170 200
409 - 40 -80 -10 200
410 - 210 100 170 200
411 -AU.CA..CG
412 - 190 240 200 240
413 - 160 160 200 160
414 - -10 80 200 80
415 - 200 200 200 200
416 -AU.CC..CG
417 - 160 150 200 150
418 - 160 160 200 160
419 - 200 200 200 200
420 - 60 60 200 60
421 -AU.CG..CG
422 - 50 140 200 140
423 - 200 200 200 200
424 - 150 210 200 210
425 --130 -30 200 -30
426 -AU.CU..CG
427 - 200 200 200 200
428 - 170 160 200 160
429 - -90 10 200 10
430 - 90 80 200 80
431 -AU.GA..CG
432 - 90 200 140 170
433 - 60 200 120 40
434 --110 200 -60 50
435 - 200 200 200 200
436 -AU.GC..CG
437 - 60 200 110 40
438 - 160 200 180 140
439 - 200 200 200 200
440 - 70 200 80 50
441 -AU.GG..CG
442 - -50 200 0 110
443 - 200 200 200 200
444 - 50 200 110 130
445 - -50 200 -70 -250
446 -AU.GU..CG
447 - 200 200 200 200
448 - 170 200 180 150
449 - -10 200 -30 -210
450 - 170 200 140 150
451 -AU.UA..CG
452 - 200 240 70 200
453 - 200 160 -50 80
454 - 200 80 -50 80
455 - 200 200 200 200
456 -AU.UC..CG
457 - 200 150 -60 70
458 - 200 160 50 80
459 - 200 200 200 200
460 - 200 60 -50 -20
461 -AU.UG..CG
462 - 200 140 10 150
463 - 200 200 200 200
464 - 200 210 40 170
465 - 200 -30 -350 -30
466 -AU.UU..CG
467 - 200 200 200 200
468 - 200 160 50 80
469 - 200 10 -310 10
470 - 200 80 50 0
471 -AU.AA..GC
472 - 200 190 80 200
473 - 190 180 70 200
474 - 100 90 -20 200
475 - 200 200 200 200
476 -AU.AC..GC
477 - 240 220 110 200
478 - 280 210 200 200
479 - 200 200 200 200
480 - 270 190 180 200
481 -AU.AG..GC
482 - 100 90 -20 200
483 - 200 200 200 200
484 - 180 160 50 200
485 - 30 -80 -10 200
486 -AU.AU..GC
487 - 200 200 200 200
488 - 270 190 180 200
489 - 180 70 140 200
490 - 220 100 180 200
491 -AU.CA..GC
492 - 180 230 200 230
493 - 170 160 200 160
494 - 80 170 200 170
495 - 200 200 200 200
496 -AU.CC..GC
497 - 210 210 200 210
498 - 200 190 200 190
499 - 200 200 200 200
500 - 180 180 200 180
501 -AU.CG..GC
502 - 80 170 200 170
503 - 200 200 200 200
504 - 150 210 200 210
505 - -90 0 200 0
506 -AU.CU..GC
507 - 200 200 200 200
508 - 180 180 200 180
509 - 60 150 200 150
510 - 90 90 200 90
511 -AU.GA..GC
512 - 80 200 130 160
513 - 70 200 120 50
514 - -20 200 30 140
515 - 200 200 200 200
516 -AU.GC..GC
517 - 110 200 170 90
518 - 200 200 210 180
519 - 200 200 200 200
520 - 180 200 200 160
521 -AU.GG..GC
522 - -20 200 30 140
523 - 200 200 200 200
524 - 50 200 110 130
525 - -10 200 -40 -210
526 -AU.GU..GC
527 - 200 200 200 200
528 - 180 200 200 160
529 - 140 200 110 -60
530 - 180 200 150 160
531 -AU.UA..GC
532 - 200 230 60 190
533 - 200 160 -50 80
534 - 200 170 40 180
535 - 200 200 200 200
536 -AU.UC..GC
537 - 200 210 0 130
538 - 200 190 80 110
539 - 200 200 200 200
540 - 200 180 70 100
541 -AU.UG..GC
542 - 200 170 40 180
543 - 200 200 200 200
544 - 200 210 40 170
545 - 200 0 -310 0
546 -AU.UU..GC
547 - 200 200 200 200
548 - 200 180 70 100
549 - 200 150 -160 160
550 - 200 90 60 10
551 -AU.AA..UA
552 - 280 260 150 200
553 - 250 240 130 200
554 - 150 140 30 200
555 - 200 200 200 200
556 -AU.AC..UA
557 - 260 250 140 200
558 - 310 230 220 200
559 - 200 200 200 200
560 - 310 230 220 200
561 -AU.AG..UA
562 - 150 140 30 200
563 - 200 200 200 200
564 - 210 190 80 200
565 - 130 20 90 200
566 -AU.AU..UA
567 - 200 200 200 200
568 - 310 230 220 200
569 - 230 120 190 200
570 - 270 150 220 200
571 -AU.CA..UA
572 - 250 310 200 310
573 - 230 220 200 220
574 - 130 220 200 220
575 - 200 200 200 200
576 -AU.CC..UA
577 - 240 230 200 230
578 - 220 220 200 220
579 - 200 200 200 200
580 - 220 220 200 220
581 -AU.CG..UA
582 - 130 220 200 220
583 - 200 200 200 200
584 - 180 240 200 240
585 - 10 100 200 100
586 -AU.CU..UA
587 - 200 200 200 200
588 - 220 220 200 220
589 - 110 200 200 200
590 - 140 140 200 140
591 -AU.GA..UA
592 - 150 200 210 230
593 - 130 200 180 110
594 - 30 200 80 190
595 - 200 200 200 200
596 -AU.GC..UA
597 - 140 200 190 120
598 - 220 200 240 200
599 - 200 200 200 200
600 - 220 200 240 200
601 -AU.GG..UA
602 - 30 200 80 190
603 - 200 200 200 200
604 - 80 200 140 160
605 - 90 200 70 -110
606 -AU.GU..UA
607 - 200 200 200 200
608 - 220 200 240 200
609 - 190 200 160 -10
610 - 220 200 200 200
611 -AU.UA..UA
612 - 200 310 130 270
613 - 200 220 10 140
614 - 200 220 90 220
615 - 200 200 200 200
616 -AU.UC..UA
617 - 200 230 20 150
618 - 200 220 100 140
619 - 200 200 200 200
620 - 200 220 100 140
621 -AU.UG..UA
622 - 200 220 90 220
623 - 200 200 200 200
624 - 200 240 70 200
625 - 200 100 -210 110
626 -AU.UU..UA
627 - 200 200 200 200
628 - 200 220 100 140
629 - 200 200 -110 200
630 - 200 140 110 60
631 -AU.AA..GU
632 - 280 260 150 200
633 - 230 220 110 200
634 - 170 160 50 200
635 - 200 200 200 200
636 -AU.AC..GU
637 - 280 260 150 200
638 - 340 260 250 200
639 - 200 200 200 200
640 - 340 260 250 200
641 -AU.AG..GU
642 - 170 160 50 200
643 - 200 200 200 200
644 - 210 200 90 200
645 - 100 -20 50 200
646 -AU.AU..GU
647 - 200 200 200 200
648 - 310 230 220 200
649 - 220 110 180 200
650 - 290 180 250 200
651 -AU.CA..GU
652 - 250 310 200 310
653 - 210 200 200 200
654 - 150 240 200 240
655 - 200 200 200 200
656 -AU.CC..GU
657 - 250 250 200 250
658 - 250 250 200 250
659 - 200 200 200 200
660 - 250 250 200 250
661 -AU.CG..GU
662 - 150 240 200 240
663 - 200 200 200 200
664 - 190 240 200 240
665 - -30 70 200 70
666 -AU.CU..GU
667 - 200 200 200 200
668 - 220 220 200 220
669 - 100 190 200 190
670 - 170 160 200 160
671 -AU.GA..GU
672 - 150 200 210 230
673 - 110 200 160 90
674 - 50 200 100 210
675 - 200 200 200 200
676 -AU.GC..GU
677 - 150 200 210 130
678 - 250 200 270 230
679 - 200 200 200 200
680 - 250 200 270 230
681 -AU.GG..GU
682 - 50 200 100 210
683 - 200 200 200 200
684 - 90 200 140 170
685 - 50 200 30 -150
686 -AU.GU..GU
687 - 200 200 200 200
688 - 220 200 240 200
689 - 180 200 150 -20
690 - 250 200 220 230
691 -AU.UA..GU
692 - 200 310 130 270
693 - 200 200 -10 120
694 - 200 240 110 240
695 - 200 200 200 200
696 -AU.UC..GU
697 - 200 250 30 170
698 - 200 250 130 170
699 - 200 200 200 200
700 - 200 250 130 170
701 -AU.UG..GU
702 - 200 240 110 240
703 - 200 200 200 200
704 - 200 240 70 200
705 - 200 70 -250 70
706 -AU.UU..GU
707 - 200 200 200 200
708 - 200 220 100 140
709 - 200 190 -120 190
710 - 200 160 130 80
711 -AU.AA..UG
712 - 280 260 150 200
713 - 250 240 130 200
714 - 150 140 30 200
715 - 200 200 200 200
716 -AU.AC..UG
717 - 260 250 140 200
718 - 310 230 220 200
719 - 200 200 200 200
720 - 310 230 220 200
721 -AU.AG..UG
722 - 150 140 30 200
723 - 200 200 200 200
724 - 210 190 80 200
725 - 130 20 90 200
726 -AU.AU..UG
727 - 200 200 200 200
728 - 310 230 220 200
729 - 230 120 190 200
730 - 270 150 220 200
731 -AU.CA..UG
732 - 250 310 200 310
733 - 230 220 200 220
734 - 130 220 200 220
735 - 200 200 200 200
736 -AU.CC..UG
737 - 240 230 200 230
738 - 220 220 200 220
739 - 200 200 200 200
740 - 220 220 200 220
741 -AU.CG..UG
742 - 130 220 200 220
743 - 200 200 200 200
744 - 180 240 200 240
745 - 10 100 200 100
746 -AU.CU..UG
747 - 200 200 200 200
748 - 220 220 200 220
749 - 110 200 200 200
750 - 140 140 200 140
751 -AU.GA..UG
752 - 150 200 210 230
753 - 130 200 180 110
754 - 30 200 80 190
755 - 200 200 200 200
756 -AU.GC..UG
757 - 140 200 190 120
758 - 220 200 240 200
759 - 200 200 200 200
760 - 220 200 240 200
761 -AU.GG..UG
762 - 30 200 80 190
763 - 200 200 200 200
764 - 80 200 140 160
765 - 90 200 70 -110
766 -AU.GU..UG
767 - 200 200 200 200
768 - 220 200 240 200
769 - 190 200 160 -10
770 - 220 200 200 200
771 -AU.UA..UG
772 - 200 310 130 270
773 - 200 220 10 140
774 - 200 220 90 220
775 - 200 200 200 200
776 -AU.UC..UG
777 - 200 230 20 150
778 - 200 220 100 140
779 - 200 200 200 200
780 - 200 220 100 140
781 -AU.UG..UG
782 - 200 220 90 220
783 - 200 200 200 200
784 - 200 240 70 200
785 - 200 100 -210 110
786 -AU.UU..UG
787 - 200 200 200 200
788 - 200 220 100 140
789 - 200 200 -110 200
790 - 200 140 110 60
791 -CG.AA..AU
792 - 200 240 100 200
793 - 160 190 60 200
794 - 100 130 0 200
795 - 200 200 200 200
796 -CG.AC..AU
797 - 200 240 100 200
798 - 260 240 200 200
799 - 200 200 200 200
800 - 260 240 200 200
801 -CG.AG..AU
802 - 100 130 0 200
803 - 200 200 200 200
804 - 140 170 40 200
805 - 20 -40 0 200
806 -CG.AU..AU
807 - 200 200 200 200
808 - 230 210 170 200
809 - 150 80 130 200
810 - 220 150 200 200
811 -CG.CA..AU
812 - 190 280 200 270
813 - 150 180 200 160
814 - 90 220 200 200
815 - 200 200 200 200
816 -CG.CC..AU
817 - 190 220 200 210
818 - 190 220 200 210
819 - 200 200 200 200
820 - 190 220 200 210
821 -CG.CG..AU
822 - 90 220 200 200
823 - 200 200 200 200
824 - 130 220 200 200
825 - -90 40 200 30
826 -CG.CU..AU
827 - 200 200 200 200
828 - 160 190 200 180
829 - 40 170 200 150
830 - 110 140 200 120
831 -CG.GA..AU
832 - 100 200 180 180
833 - 60 200 130 40
834 - 0 200 70 160
835 - 200 200 200 200
836 -CG.GC..AU
837 - 100 200 180 80
838 - 200 200 240 180
839 - 200 200 200 200
840 - 200 200 240 180
841 -CG.GG..AU
842 - 0 200 70 160
843 - 200 200 200 200
844 - 40 200 110 120
845 - 0 200 0 -200
846 -CG.GU..AU
847 - 200 200 200 200
848 - 170 200 210 150
849 - 130 200 120 -70
850 - 200 200 190 180
851 -CG.UA..AU
852 - 200 270 30 220
853 - 200 160 -110 70
854 - 200 200 10 190
855 - 200 200 200 200
856 -CG.UC..AU
857 - 200 210 -70 120
858 - 200 210 30 120
859 - 200 200 200 200
860 - 200 210 30 120
861 -CG.UG..AU
862 - 200 200 10 190
863 - 200 200 200 200
864 - 200 200 -30 150
865 - 200 30 -350 20
866 -CG.UU..AU
867 - 200 200 200 200
868 - 200 180 0 90
869 - 200 150 -220 150
870 - 200 120 30 30
871 -CG.AA..CG
872 - 50 60 0 200
873 - 110 150 -70 200
874 - -30 10 -160 200
875 - 200 200 200 200
876 -CG.AC..CG
877 - 110 110 -100 200
878 - 170 150 -60 200
879 - 200 200 200 200
880 - 70 50 20 200
881 -CG.AG..CG
882 - 40 50 -70 200
883 - 200 200 200 200
884 - 100 140 0 200
885 - 10 -70 -80 200
886 -CG.AU..CG
887 - 200 200 200 200
888 - 180 150 120 200
889 - -50 -60 -60 200
890 - 150 0 90 200
891 -CG.CA..CG
892 - 130 220 200 200
893 - 100 130 200 120
894 - -70 70 200 40
895 - 200 200 200 200
896 -CG.CC..CG
897 - 100 190 200 110
898 - 100 130 200 120
899 - 200 200 200 200
900 - 0 30 200 170
901 -CG.CG..CG
902 - 70 70 200 100
903 - 200 200 200 200
904 - 90 180 200 170
905 --190 -30 200 -70
906 -CG.CU..CG
907 - 200 200 200 200
908 - 110 140 200 120
909 --150 -20 200 -30
910 - -20 -10 200 20
911 -CG.GA..CG
912 - -20 200 110 90
913 - -40 200 90 0
914 --170 200 -90 30
915 - 200 200 200 200
916 -CG.GC..CG
917 - 70 200 80 -10
918 - 110 200 150 100
919 - 200 200 200 200
920 - 20 200 50 0
921 -CG.GG..CG
922 - -50 200 -20 60
923 - 200 200 200 200
924 - 0 200 80 90
925 - -90 200 -100 -300
926 -CG.GU..CG
927 - 200 200 200 200
928 - 120 200 150 100
929 --130 200 -60 -240
930 - 90 200 110 60
931 -CG.UA..CG
932 - 200 200 -10 140
933 - 200 120 -160 30
934 - 200 40 -160 50
935 - 200 200 200 200
936 -CG.UC..CG
937 - 200 110 -160 30
938 - 200 120 -60 30
939 - 200 200 200 200
940 - 200 20 -160 10
941 -CG.UG..CG
942 - 200 50 -60 140
943 - 200 200 200 200
944 - 200 170 -70 120
945 - 200 -70 -440 -100
946 -CG.UU..CG
947 - 200 200 200 200
948 - 200 120 -50 30
949 - 200 -10 -410 10
950 - 200 40 -100 60
951 -CG.AA..GC
952 - 130 160 30 200
953 - 120 150 20 200
954 - 30 60 -70 200
955 - 200 200 200 200
956 -CG.AC..GC
957 - 160 200 60 200
958 - 210 180 150 200
959 - 200 200 200 200
960 - 190 170 130 200
961 -CG.AG..GC
962 - 30 60 -70 200
963 - 200 200 200 200
964 - 100 140 0 200
965 - -40 -110 -60 200
966 -CG.AU..GC
967 - 200 200 200 200
968 - 190 170 130 200
969 - 110 40 90 200
970 - 140 80 130 200
971 -CG.CA..GC
972 - 120 210 200 190
973 - 110 140 200 120
974 - 20 150 200 130
975 - 200 200 200 200
976 -CG.CC..GC
977 - 150 180 200 170
978 - 140 170 200 150
979 - 200 200 200 200
980 - 120 150 200 140
981 -CG.CG..GC
982 - 20 150 200 130
983 - 200 200 200 200
984 - 90 180 200 170
985 --150 -20 200 -40
986 -CG.CU..GC
987 - 200 200 200 200
988 - 120 150 200 140
989 - 0 130 200 110
990 - 30 60 200 50
991 -CG.GA..GC
992 - 30 200 100 110
993 - 20 200 90 0
994 - -70 200 0 90
995 - 200 200 200 200
996 -CG.GC..GC
997 - 60 200 140 40
998 - 150 200 180 130
999 - 200 200 200 200
1000 - 130 200 170 110
1001 -CG.GG..GC
1002 - -70 200 0 90
1003 - 200 200 200 200
1004 - 0 200 80 90
1005 - -60 200 -70 -260
1006 -CG.GU..GC
1007 - 200 200 200 200
1008 - 130 200 170 110
1009 - 90 200 90 -110
1010 - 130 200 120 110
1011 -CG.UA..GC
1012 - 200 190 -40 140
1013 - 200 120 -150 30
1014 - 200 130 -60 130
1015 - 200 200 200 200
1016 -CG.UC..GC
1017 - 200 170 -110 80
1018 - 200 150 -20 60
1019 - 200 200 200 200
1020 - 200 140 -40 50
1021 -CG.UG..GC
1022 - 200 130 -60 130
1023 - 200 200 200 200
1024 - 200 170 -70 120
1025 - 200 -40 -420 -50
1026 -CG.UU..GC
1027 - 200 200 200 200
1028 - 200 140 -40 50
1029 - 200 110 -260 110
1030 - 200 50 -50 -40
1031 -CG.AA..UA
1032 - 200 240 100 200
1033 - 180 210 80 200
1034 - 80 110 -20 200
1035 - 200 200 200 200
1036 -CG.AC..UA
1037 - 190 220 90 200
1038 - 230 210 170 200
1039 - 200 200 200 200
1040 - 230 210 170 200
1041 -CG.AG..UA
1042 - 80 110 -20 200
1043 - 200 200 200 200
1044 - 130 170 30 200
1045 - 60 0 40 200
1046 -CG.AU..UA
1047 - 200 200 200 200
1048 - 230 210 170 200
1049 - 160 90 140 200
1050 - 190 130 180 200
1051 -CG.CA..UA
1052 - 190 280 200 270
1053 - 170 200 200 180
1054 - 70 200 200 180
1055 - 200 200 200 200
1056 -CG.CC..UA
1057 - 180 210 200 190
1058 - 160 190 200 180
1059 - 200 200 200 200
1060 - 160 190 200 180
1061 -CG.CG..UA
1062 - 70 200 200 180
1063 - 200 200 200 200
1064 - 120 210 200 200
1065 - -50 80 200 70
1066 -CG.CU..UA
1067 - 200 200 200 200
1068 - 160 190 200 180
1069 - 50 180 200 160
1070 - 80 110 200 100
1071 -CG.GA..UA
1072 - 100 200 180 180
1073 - 80 200 150 60
1074 - -20 200 50 140
1075 - 200 200 200 200
1076 -CG.GC..UA
1077 - 90 200 160 70
1078 - 170 200 210 150
1079 - 200 200 200 200
1080 - 170 200 210 150
1081 -CG.GG..UA
1082 - -20 200 50 140
1083 - 200 200 200 200
1084 - 30 200 110 110
1085 - 40 200 40 -160
1086 -CG.GU..UA
1087 - 200 200 200 200
1088 - 170 200 210 150
1089 - 140 200 130 -60
1090 - 180 200 170 160
1091 -CG.UA..UA
1092 - 200 270 30 220
1093 - 200 180 -90 90
1094 - 200 180 -10 180
1095 - 200 200 200 200
1096 -CG.UC..UA
1097 - 200 190 -80 100
1098 - 200 180 0 90
1099 - 200 200 200 200
1100 - 200 180 0 90
1101 -CG.UG..UA
1102 - 200 180 -10 180
1103 - 200 200 200 200
1104 - 200 200 -40 150
1105 - 200 70 -310 60
1106 -CG.UU..UA
1107 - 200 200 200 200
1108 - 200 180 0 90
1109 - 200 160 -210 160
1110 - 200 100 0 10
1111 -CG.AA..GU
1112 - 200 240 100 200
1113 - 160 190 60 200
1114 - 100 130 0 200
1115 - 200 200 200 200
1116 -CG.AC..GU
1117 - 200 240 100 200
1118 - 260 240 200 200
1119 - 200 200 200 200
1120 - 260 240 200 200
1121 -CG.AG..GU
1122 - 100 130 0 200
1123 - 200 200 200 200
1124 - 140 170 40 200
1125 - 20 -40 0 200
1126 -CG.AU..GU
1127 - 200 200 200 200
1128 - 230 210 170 200
1129 - 150 80 130 200
1130 - 220 150 200 200
1131 -CG.CA..GU
1132 - 190 280 200 270
1133 - 150 180 200 160
1134 - 90 220 200 200
1135 - 200 200 200 200
1136 -CG.CC..GU
1137 - 190 220 200 210
1138 - 190 220 200 210
1139 - 200 200 200 200
1140 - 190 220 200 210
1141 -CG.CG..GU
1142 - 90 220 200 200
1143 - 200 200 200 200
1144 - 130 220 200 200
1145 - -90 40 200 30
1146 -CG.CU..GU
1147 - 200 200 200 200
1148 - 160 190 200 180
1149 - 40 170 200 150
1150 - 110 140 200 120
1151 -CG.GA..GU
1152 - 100 200 180 180
1153 - 60 200 130 40
1154 - 0 200 70 160
1155 - 200 200 200 200
1156 -CG.GC..GU
1157 - 100 200 180 80
1158 - 200 200 240 180
1159 - 200 200 200 200
1160 - 200 200 240 180
1161 -CG.GG..GU
1162 - 0 200 70 160
1163 - 200 200 200 200
1164 - 40 200 110 120
1165 - 0 200 0 -200
1166 -CG.GU..GU
1167 - 200 200 200 200
1168 - 170 200 210 150
1169 - 130 200 120 -70
1170 - 200 200 190 180
1171 -CG.UA..GU
1172 - 200 270 30 220
1173 - 200 160 -110 70
1174 - 200 200 10 190
1175 - 200 200 200 200
1176 -CG.UC..GU
1177 - 200 210 -70 120
1178 - 200 210 30 120
1179 - 200 200 200 200
1180 - 200 210 30 120
1181 -CG.UG..GU
1182 - 200 200 10 190
1183 - 200 200 200 200
1184 - 200 200 -30 150
1185 - 200 30 -350 20
1186 -CG.UU..GU
1187 - 200 200 200 200
1188 - 200 180 0 90
1189 - 200 150 -220 150
1190 - 200 120 30 30
1191 -CG.AA..UG
1192 - 200 240 100 200
1193 - 180 210 80 200
1194 - 80 110 -20 200
1195 - 200 200 200 200
1196 -CG.AC..UG
1197 - 190 220 90 200
1198 - 230 210 170 200
1199 - 200 200 200 200
1200 - 230 210 170 200
1201 -CG.AG..UG
1202 - 80 110 -20 200
1203 - 200 200 200 200
1204 - 130 170 30 200
1205 - 60 0 40 200
1206 -CG.AU..UG
1207 - 200 200 200 200
1208 - 230 210 170 200
1209 - 160 90 140 200
1210 - 190 130 180 200
1211 -CG.CA..UG
1212 - 190 280 200 270
1213 - 170 200 200 180
1214 - 70 200 200 180
1215 - 200 200 200 200
1216 -CG.CC..UG
1217 - 180 210 200 190
1218 - 160 190 200 180
1219 - 200 200 200 200
1220 - 160 190 200 180
1221 -CG.CG..UG
1222 - 70 200 200 180
1223 - 200 200 200 200
1224 - 120 210 200 200
1225 - -50 80 200 70
1226 -CG.CU..UG
1227 - 200 200 200 200
1228 - 160 190 200 180
1229 - 50 180 200 160
1230 - 80 110 200 100
1231 -CG.GA..UG
1232 - 100 200 180 180
1233 - 80 200 150 60
1234 - -20 200 50 140
1235 - 200 200 200 200
1236 -CG.GC..UG
1237 - 90 200 160 70
1238 - 170 200 210 150
1239 - 200 200 200 200
1240 - 170 200 210 150
1241 -CG.GG..UG
1242 - -20 200 50 140
1243 - 200 200 200 200
1244 - 30 200 110 110
1245 - 40 200 40 -160
1246 -CG.GU..UG
1247 - 200 200 200 200
1248 - 170 200 210 150
1249 - 140 200 130 -60
1250 - 180 200 170 160
1251 -CG.UA..UG
1252 - 200 270 30 220
1253 - 200 180 -90 90
1254 - 200 180 -10 180
1255 - 200 200 200 200
1256 -CG.UC..UG
1257 - 200 190 -80 100
1258 - 200 180 0 90
1259 - 200 200 200 200
1260 - 200 180 0 90
1261 -CG.UG..UG
1262 - 200 180 -10 180
1263 - 200 200 200 200
1264 - 200 200 -40 150
1265 - 200 70 -310 60
1266 -CG.UU..UG
1267 - 200 200 200 200
1268 - 200 180 0 90
1269 - 200 160 -210 160
1270 - 200 100 0 10
1271 -GC.AA..AU
1272 - 210 180 70 200
1273 - 170 140 30 200
1274 - 110 80 -30 200
1275 - 200 200 200 200
1276 -GC.AC..AU
1277 - 210 180 70 200
1278 - 270 180 170 200
1279 - 200 200 200 200
1280 - 270 180 170 200
1281 -GC.AG..AU
1282 - 110 80 -30 200
1283 - 200 200 200 200
1284 - 150 120 10 200
1285 - 30 -100 -30 200
1286 -GC.AU..AU
1287 - 200 200 200 200
1288 - 240 150 140 200
1289 - 160 30 100 200
1290 - 230 100 170 200
1291 -GC.CA..AU
1292 - 190 250 200 250
1293 - 140 140 200 150
1294 - 80 180 200 190
1295 - 200 200 200 200
1296 -GC.CC..AU
1297 - 190 190 200 190
1298 - 190 190 200 190
1299 - 200 200 200 200
1300 - 190 190 200 190
1301 -GC.CG..AU
1302 - 80 180 200 190
1303 - 200 200 200 200
1304 - 120 180 200 190
1305 - -90 10 200 10
1306 -GC.CU..AU
1307 - 200 200 200 200
1308 - 160 160 200 160
1309 - 30 130 200 140
1310 - 100 100 200 110
1311 -GC.GA..AU
1312 - 10 200 180 40
1313 - -30 200 130 -110
1314 - -90 200 70 10
1315 - 200 200 200 200
1316 -GC.GC..AU
1317 - 10 200 180 -60
1318 - 110 200 240 40
1319 - 200 200 200 200
1320 - 110 200 240 40
1321 -GC.GG..AU
1322 - -90 200 70 10
1323 - 200 200 200 200
1324 - -50 200 110 -30
1325 - -90 200 0 -350
1326 -GC.GU..AU
1327 - 200 200 200 200
1328 - 80 200 210 10
1329 - 40 200 120 -220
1330 - 110 200 190 30
1331 -GC.UA..AU
1332 - 200 150 0 210
1333 - 200 40 -150 70
1334 - 200 90 -30 190
1335 - 200 200 200 200
1336 -GC.UC..AU
1337 - 200 90 -100 110
1338 - 200 90 0 110
1339 - 200 200 200 200
1340 - 200 90 0 110
1341 -GC.UG..AU
1342 - 200 90 -30 190
1343 - 200 200 200 200
1344 - 200 80 -70 150
1345 - 200 -90 -390 10
1346 -GC.UU..AU
1347 - 200 200 200 200
1348 - 200 60 -30 80
1349 - 200 40 -260 140
1350 - 200 0 -10 30
1351 -GC.AA..CG
1352 - 150 120 10 200
1353 - 120 90 -10 200
1354 - -50 -80 -190 200
1355 - 200 200 200 200
1356 -GC.AC..CG
1357 - 120 90 -20 200
1358 - 180 90 90 200
1359 - 200 200 200 200
1360 - 80 0 -10 200
1361 -GC.AG..CG
1362 - 10 -20 -130 200
1363 - 200 200 200 200
1364 - 110 80 -20 200
1365 - -70 -200 -130 200
1366 -GC.AU..CG
1367 - 200 200 200 200
1368 - 190 100 90 200
1369 - -30 -160 -90 200
1370 - 150 20 90 200
1371 -GC.CA..CG
1372 - 120 180 200 190
1373 - 100 100 200 100
1374 - -80 20 200 30
1375 - 200 200 200 200
1376 -GC.CC..CG
1377 - 90 90 200 100
1378 - 100 100 200 100
1379 - 200 200 200 200
1380 - 0 0 200 0
1381 -GC.CG..CG
1382 - -10 90 200 90
1383 - 200 200 200 200
1384 - 90 150 200 150
1385 --190 -90 200 -90
1386 -GC.CU..CG
1387 - 200 200 200 200
1388 - 100 100 200 110
1389 --150 -50 200 -50
1390 - 20 20 200 30
1391 -GC.GA..CG
1392 - -50 200 110 -30
1393 - -80 200 90 -150
1394 --260 200 -90 -150
1395 - 200 200 200 200
1396 -GC.GC..CG
1397 - -80 200 80 -160
1398 - 20 200 150 -50
1399 - 200 200 200 200
1400 - -80 200 50 -150
1401 -GC.GG..CG
1402 --190 200 -20 -90
1403 - 200 200 200 200
1404 - -90 200 80 -60
1405 --190 200 -100 -450
1406 -GC.GU..CG
1407 - 200 200 200 200
1408 - 30 200 150 -50
1409 --150 200 -60 -410
1410 - 30 200 110 -50
1411 -GC.UA..CG
1412 - 200 80 -70 150
1413 - 200 0 -190 20
1414 - 200 -80 -190 30
1415 - 200 200 200 200
1416 -GC.UC..CG
1417 - 200 0 -200 20
1418 - 200 0 -90 20
1419 - 200 200 200 200
1420 - 200 -100 -190 -70
1421 -GC.UG..CG
1422 - 200 -10 -130 90
1423 - 200 200 200 200
1424 - 200 50 -100 110
1425 - 200 -190 -490 -90
1426 -GC.UU..CG
1427 - 200 200 200 200
1428 - 200 0 -90 30
1429 - 200 -150 -450 -50
1430 - 200 -70 -90 -50
1431 -GC.AA..GC
1432 - 50 110 40 200
1433 - 130 100 70 200
1434 - -20 70 -50 200
1435 - 200 200 200 200
1436 -GC.AC..GC
1437 - 60 110 50 200
1438 - 220 190 70 200
1439 - 200 200 200 200
1440 - 200 110 50 200
1441 -GC.AG..GC
1442 - 0 -100 -70 200
1443 - 200 200 200 200
1444 - 110 80 -20 200
1445 - -10 -160 -60 200
1446 -GC.AU..GC
1447 - 200 200 200 200
1448 - 200 110 100 200
1449 - 90 -10 60 200
1450 - 140 30 140 200
1451 -GC.CA..GC
1452 - 110 170 200 180
1453 - 100 100 200 110
1454 - -40 110 200 120
1455 - 200 200 200 200
1456 -GC.CC..GC
1457 - 150 150 200 150
1458 - 130 130 200 140
1459 - 200 200 200 200
1460 - 120 120 200 120
1461 -GC.CG..GC
1462 - -70 -60 200 120
1463 - 200 200 200 200
1464 - 90 150 200 150
1465 --160 -60 200 -50
1466 -GC.CU..GC
1467 - 200 200 200 200
1468 - 120 120 200 120
1469 - 0 100 200 100
1470 - 30 30 200 30
1471 -GC.GA..GC
1472 - -30 200 100 -50
1473 - -70 200 90 -150
1474 --170 200 0 -130
1475 - 200 200 200 200
1476 -GC.GC..GC
1477 - 10 200 140 -60
1478 - 70 200 180 -20
1479 - 200 200 200 200
1480 - 40 200 170 -10
1481 -GC.GG..GC
1482 --160 200 0 -60
1483 - 200 200 200 200
1484 - -90 200 80 -60
1485 --160 200 -70 -410
1486 -GC.GU..GC
1487 - 200 200 200 200
1488 - 40 200 170 -30
1489 - 30 200 90 -240
1490 - 50 200 120 10
1491 -GC.UA..GC
1492 - 200 70 10 150
1493 - 200 0 -190 -20
1494 - 200 20 -90 90
1495 - 200 200 200 200
1496 -GC.UC..GC
1497 - 200 50 -70 0
1498 - 200 30 -30 -10
1499 - 200 200 200 200
1500 - 200 20 -70 40
1501 -GC.UG..GC
1502 - 200 20 -80 90
1503 - 200 200 200 200
1504 - 200 50 -100 110
1505 - 200 -160 -440 -100
1506 -GC.UU..GC
1507 - 200 200 200 200
1508 - 200 170 -70 20
1509 - 200 0 -300 60
1510 - 200 10 -100 60
1511 -GC.AA..UA
1512 - 210 180 70 200
1513 - 190 160 50 200
1514 - 90 60 -50 200
1515 - 200 200 200 200
1516 -GC.AC..UA
1517 - 200 170 60 200
1518 - 240 150 140 200
1519 - 200 200 200 200
1520 - 240 150 140 200
1521 -GC.AG..UA
1522 - 90 60 -50 200
1523 - 200 200 200 200
1524 - 140 110 0 200
1525 - 70 -60 10 200
1526 -GC.AU..UA
1527 - 200 200 200 200
1528 - 240 150 140 200
1529 - 170 40 110 200
1530 - 200 70 150 200
1531 -GC.CA..UA
1532 - 190 250 200 250
1533 - 160 160 200 170
1534 - 60 160 200 170
1535 - 200 200 200 200
1536 -GC.CC..UA
1537 - 170 170 200 180
1538 - 160 160 200 160
1539 - 200 200 200 200
1540 - 160 160 200 160
1541 -GC.CG..UA
1542 - 60 160 200 170
1543 - 200 200 200 200
1544 - 120 180 200 180
1545 - -50 50 200 50
1546 -GC.CU..UA
1547 - 200 200 200 200
1548 - 160 160 200 160
1549 - 40 140 200 150
1550 - 80 80 200 80
1551 -GC.GA..UA
1552 - 10 200 180 40
1553 - -10 200 150 -90
1554 --110 200 50 -10
1555 - 200 200 200 200
1556 -GC.GC..UA
1557 - 0 200 160 -80
1558 - 80 200 210 10
1559 - 200 200 200 200
1560 - 80 200 210 10
1561 -GC.GG..UA
1562 --110 200 50 -10
1563 - 200 200 200 200
1564 - -60 200 110 -30
1565 - -50 200 40 -310
1566 -GC.GU..UA
1567 - 200 200 200 200
1568 - 80 200 210 10
1569 - 50 200 130 -210
1570 - 80 200 170 10
1571 -GC.UA..UA
1572 - 200 150 0 210
1573 - 200 60 -130 90
1574 - 200 70 -50 170
1575 - 200 200 200 200
1576 -GC.UC..UA
1577 - 200 70 -120 100
1578 - 200 60 -30 80
1579 - 200 200 200 200
1580 - 200 60 -30 80
1581 -GC.UG..UA
1582 - 200 70 -50 170
1583 - 200 200 200 200
1584 - 200 80 -70 140
1585 - 200 -50 -350 50
1586 -GC.UU..UA
1587 - 200 200 200 200
1588 - 200 60 -30 80
1589 - 200 50 -250 150
1590 - 200 -20 -30 0
1591 -GC.AA..GU
1592 - 210 180 70 200
1593 - 170 140 30 200
1594 - 110 80 -30 200
1595 - 200 200 200 200
1596 -GC.AC..GU
1597 - 210 180 70 200
1598 - 270 180 170 200
1599 - 200 200 200 200
1600 - 270 180 170 200
1601 -GC.AG..GU
1602 - 110 80 -30 200
1603 - 200 200 200 200
1604 - 150 120 10 200
1605 - 30 -100 -30 200
1606 -GC.AU..GU
1607 - 200 200 200 200
1608 - 240 150 140 200
1609 - 160 30 100 200
1610 - 230 100 170 200
1611 -GC.CA..GU
1612 - 190 250 200 250
1613 - 140 140 200 150
1614 - 80 180 200 190
1615 - 200 200 200 200
1616 -GC.CC..GU
1617 - 190 190 200 190
1618 - 190 190 200 190
1619 - 200 200 200 200
1620 - 190 190 200 190
1621 -GC.CG..GU
1622 - 80 180 200 190
1623 - 200 200 200 200
1624 - 120 180 200 190
1625 - -90 10 200 10
1626 -GC.CU..GU
1627 - 200 200 200 200
1628 - 160 160 200 160
1629 - 30 130 200 140
1630 - 100 100 200 110
1631 -GC.GA..GU
1632 - 10 200 180 40
1633 - -30 200 130 -110
1634 - -90 200 70 10
1635 - 200 200 200 200
1636 -GC.GC..GU
1637 - 10 200 180 -60
1638 - 110 200 240 40
1639 - 200 200 200 200
1640 - 110 200 240 40
1641 -GC.GG..GU
1642 - -90 200 70 10
1643 - 200 200 200 200
1644 - -50 200 110 -30
1645 - -90 200 0 -350
1646 -GC.GU..GU
1647 - 200 200 200 200
1648 - 80 200 210 10
1649 - 40 200 120 -220
1650 - 110 200 190 30
1651 -GC.UA..GU
1652 - 200 150 0 210
1653 - 200 40 -150 70
1654 - 200 90 -30 190
1655 - 200 200 200 200
1656 -GC.UC..GU
1657 - 200 90 -100 110
1658 - 200 90 0 110
1659 - 200 200 200 200
1660 - 200 90 0 110
1661 -GC.UG..GU
1662 - 200 90 -30 190
1663 - 200 200 200 200
1664 - 200 80 -70 150
1665 - 200 -90 -390 10
1666 -GC.UU..GU
1667 - 200 200 200 200
1668 - 200 60 -30 80
1669 - 200 40 -260 140
1670 - 200 0 -10 30
1671 -GC.AA..UG
1672 - 210 180 70 200
1673 - 190 160 50 200
1674 - 90 60 -50 200
1675 - 200 200 200 200
1676 -GC.AC..UG
1677 - 200 170 60 200
1678 - 240 150 140 200
1679 - 200 200 200 200
1680 - 240 150 140 200
1681 -GC.AG..UG
1682 - 90 60 -50 200
1683 - 200 200 200 200
1684 - 140 110 0 200
1685 - 70 -60 10 200
1686 -GC.AU..UG
1687 - 200 200 200 200
1688 - 240 150 140 200
1689 - 170 40 110 200
1690 - 200 70 150 200
1691 -GC.CA..UG
1692 - 190 250 200 250
1693 - 160 160 200 170
1694 - 60 160 200 170
1695 - 200 200 200 200
1696 -GC.CC..UG
1697 - 170 170 200 180
1698 - 160 160 200 160
1699 - 200 200 200 200
1700 - 160 160 200 160
1701 -GC.CG..UG
1702 - 60 160 200 170
1703 - 200 200 200 200
1704 - 120 180 200 180
1705 - -50 50 200 50
1706 -GC.CU..UG
1707 - 200 200 200 200
1708 - 160 160 200 160
1709 - 40 140 200 150
1710 - 80 80 200 80
1711 -GC.GA..UG
1712 - 10 200 180 40
1713 - -10 200 150 -90
1714 --110 200 50 -10
1715 - 200 200 200 200
1716 -GC.GC..UG
1717 - 0 200 160 -80
1718 - 80 200 210 10
1719 - 200 200 200 200
1720 - 80 200 210 10
1721 -GC.GG..UG
1722 --110 200 50 -10
1723 - 200 200 200 200
1724 - -60 200 110 -30
1725 - -50 200 40 -310
1726 -GC.GU..UG
1727 - 200 200 200 200
1728 - 80 200 210 10
1729 - 50 200 130 -210
1730 - 80 200 170 10
1731 -GC.UA..UG
1732 - 200 150 0 210
1733 - 200 60 -130 90
1734 - 200 70 -50 170
1735 - 200 200 200 200
1736 -GC.UC..UG
1737 - 200 70 -120 100
1738 - 200 60 -30 80
1739 - 200 200 200 200
1740 - 200 60 -30 80
1741 -GC.UG..UG
1742 - 200 70 -50 170
1743 - 200 200 200 200
1744 - 200 80 -70 140
1745 - 200 -50 -350 50
1746 -GC.UU..UG
1747 - 200 200 200 200
1748 - 200 60 -30 80
1749 - 200 50 -250 150
1750 - 200 -20 -30 0
1751 -UA.AA..AU
1752 - 280 280 170 200
1753 - 230 230 130 200
1754 - 170 170 70 200
1755 - 200 200 200 200
1756 -UA.AC..AU
1757 - 280 280 170 200
1758 - 340 280 270 200
1759 - 200 200 200 200
1760 - 340 280 270 200
1761 -UA.AG..AU
1762 - 170 170 70 200
1763 - 200 200 200 200
1764 - 210 210 110 200
1765 - 100 0 70 200
1766 -UA.AU..AU
1767 - 200 200 200 200
1768 - 310 250 240 200
1769 - 220 120 200 200
1770 - 290 190 270 200
1771 -UA.CA..AU
1772 - 230 340 200 310
1773 - 190 230 200 200
1774 - 130 270 200 240
1775 - 200 200 200 200
1776 -UA.CC..AU
1777 - 230 280 200 250
1778 - 230 280 200 250
1779 - 200 200 200 200
1780 - 230 280 200 250
1781 -UA.CG..AU
1782 - 130 270 200 240
1783 - 200 200 200 200
1784 - 170 270 200 240
1785 - -50 100 200 70
1786 -UA.CU..AU
1787 - 200 200 200 200
1788 - 200 250 200 220
1789 - 80 220 200 190
1790 - 150 190 200 160
1791 -UA.GA..AU
1792 - 170 200 210 220
1793 - 130 200 170 80
1794 - 70 200 110 200
1795 - 200 200 200 200
1796 -UA.GC..AU
1797 - 170 200 210 120
1798 - 270 200 270 220
1799 - 200 200 200 200
1800 - 270 200 270 220
1801 -UA.GG..AU
1802 - 70 200 110 200
1803 - 200 200 200 200
1804 - 110 200 150 160
1805 - 70 200 30 -160
1806 -UA.GU..AU
1807 - 200 200 200 200
1808 - 240 200 240 190
1809 - 200 200 160 -30
1810 - 270 200 230 220
1811 -UA.UA..AU
1812 - 200 340 100 290
1813 - 200 230 -50 150
1814 - 200 270 70 270
1815 - 200 200 200 200
1816 -UA.UC..AU
1817 - 200 280 0 190
1818 - 200 280 100 190
1819 - 200 200 200 200
1820 - 200 280 100 190
1821 -UA.UG..AU
1822 - 200 270 70 270
1823 - 200 200 200 200
1824 - 200 270 30 230
1825 - 200 100 -290 90
1826 -UA.UU..AU
1827 - 200 200 200 200
1828 - 200 250 70 160
1829 - 200 220 -160 220
1830 - 200 190 90 110
1831 -UA.AA..CG
1832 - 210 210 110 200
1833 - 190 190 80 200
1834 - 10 10 -90 200
1835 - 200 200 200 200
1836 -UA.AC..CG
1837 - 180 180 80 200
1838 - 250 190 180 200
1839 - 200 200 200 200
1840 - 150 90 90 200
1841 -UA.AG..CG
1842 - 70 70 -30 200
1843 - 200 200 200 200
1844 - 180 180 70 200
1845 - 0 -100 -30 200
1846 -UA.AU..CG
1847 - 200 200 200 200
1848 - 250 190 190 200
1849 - 40 -60 10 200
1850 - 210 110 190 200
1851 -UA.CA..CG
1852 - 170 270 200 240
1853 - 140 190 200 160
1854 - -30 110 200 80
1855 - 200 200 200 200
1856 -UA.CC..CG
1857 - 140 180 200 150
1858 - 140 190 200 160
1859 - 200 200 200 200
1860 - 40 90 200 60
1861 -UA.CG..CG
1862 - 30 170 200 140
1863 - 200 200 200 200
1864 - 130 240 200 210
1865 --150 0 200 -30
1866 -UA.CU..CG
1867 - 200 200 200 200
1868 - 150 190 200 160
1869 --110 40 200 10
1870 - 70 110 200 80
1871 -UA.GA..CG
1872 - 110 200 150 160
1873 - 80 200 120 30
1874 - -90 200 -50 40
1875 - 200 200 200 200
1876 -UA.GC..CG
1877 - 80 200 120 30
1878 - 180 200 180 130
1879 - 200 200 200 200
1880 - 90 200 80 40
1881 -UA.GG..CG
1882 - -30 200 10 100
1883 - 200 200 200 200
1884 - 70 200 110 120
1885 - -30 200 -70 -260
1886 -UA.GU..CG
1887 - 200 200 200 200
1888 - 190 200 190 140
1889 - 10 200 -30 -220
1890 - 190 200 150 140
1891 -UA.UA..CG
1892 - 200 270 30 230
1893 - 200 190 -90 100
1894 - 200 110 -90 110
1895 - 200 200 200 200
1896 -UA.UC..CG
1897 - 200 180 -100 100
1898 - 200 190 10 100
1899 - 200 200 200 200
1900 - 200 90 -90 0
1901 -UA.UG..CG
1902 - 200 170 -30 170
1903 - 200 200 200 200
1904 - 200 240 0 190
1905 - 200 0 -390 -10
1906 -UA.UU..CG
1907 - 200 200 200 200
1908 - 200 190 10 110
1909 - 200 40 -350 30
1910 - 200 110 10 30
1911 -UA.AA..GC
1912 - 200 200 100 200
1913 - 190 190 90 200
1914 - 100 100 0 200
1915 - 200 200 200 200
1916 -UA.AC..GC
1917 - 240 240 130 200
1918 - 280 220 220 200
1919 - 200 200 200 200
1920 - 270 210 200 200
1921 -UA.AG..GC
1922 - 100 100 0 200
1923 - 200 200 200 200
1924 - 180 180 70 200
1925 - 30 -70 10 200
1926 -UA.AU..GC
1927 - 200 200 200 200
1928 - 270 210 200 200
1929 - 180 80 160 200
1930 - 220 120 190 200
1931 -UA.CA..GC
1932 - 160 260 200 230
1933 - 150 190 200 160
1934 - 60 200 200 170
1935 - 200 200 200 200
1936 -UA.CC..GC
1937 - 190 240 200 210
1938 - 180 220 200 190
1939 - 200 200 200 200
1940 - 160 210 200 180
1941 -UA.CG..GC
1942 - 60 200 200 170
1943 - 200 200 200 200
1944 - 130 240 200 210
1945 --110 30 200 0
1946 -UA.CU..GC
1947 - 200 200 200 200
1948 - 160 210 200 180
1949 - 40 180 200 150
1950 - 70 120 200 90
1951 -UA.GA..GC
1952 - 100 200 140 150
1953 - 90 200 130 40
1954 - 0 200 40 130
1955 - 200 200 200 200
1956 -UA.GC..GC
1957 - 130 200 170 80
1958 - 220 200 220 170
1959 - 200 200 200 200
1960 - 200 200 200 150
1961 -UA.GG..GC
1962 - 0 200 40 130
1963 - 200 200 200 200
1964 - 70 200 110 120
1965 - 10 200 -30 -220
1966 -UA.GU..GC
1967 - 200 200 200 200
1968 - 200 200 200 150
1969 - 160 200 120 -70
1970 - 190 200 150 150
1971 -UA.UA..GC
1972 - 200 260 20 220
1973 - 200 190 -90 110
1974 - 200 200 0 200
1975 - 200 200 200 200
1976 -UA.UC..GC
1977 - 200 240 -40 150
1978 - 200 220 40 140
1979 - 200 200 200 200
1980 - 200 210 30 120
1981 -UA.UG..GC
1982 - 200 200 0 200
1983 - 200 200 200 200
1984 - 200 240 0 190
1985 - 200 30 -350 30
1986 -UA.UU..GC
1987 - 200 200 200 200
1988 - 200 210 30 120
1989 - 200 180 -200 180
1990 - 200 120 20 30
1991 -UA.AA..UA
1992 - 280 280 170 200
1993 - 250 250 150 200
1994 - 150 150 50 200
1995 - 200 200 200 200
1996 -UA.AC..UA
1997 - 260 260 160 200
1998 - 310 250 240 200
1999 - 200 200 200 200
2000 - 310 250 240 200
2001 -UA.AG..UA
2002 - 150 150 50 200
2003 - 200 200 200 200
2004 - 210 210 100 200
2005 - 130 30 110 200
2006 -UA.AU..UA
2007 - 200 200 200 200
2008 - 310 250 240 200
2009 - 230 130 210 200
2010 - 270 170 240 200
2011 -UA.CA..UA
2012 - 230 340 200 310
2013 - 210 250 200 220
2014 - 110 250 200 220
2015 - 200 200 200 200
2016 -UA.CC..UA
2017 - 220 260 200 230
2018 - 200 250 200 220
2019 - 200 200 200 200
2020 - 200 250 200 220
2021 -UA.CG..UA
2022 - 110 250 200 220
2023 - 200 200 200 200
2024 - 160 270 200 240
2025 - -10 130 200 100
2026 -UA.CU..UA
2027 - 200 200 200 200
2028 - 200 250 200 220
2029 - 90 230 200 200
2030 - 120 170 200 140
2031 -UA.GA..UA
2032 - 170 200 210 220
2033 - 150 200 190 100
2034 - 50 200 90 180
2035 - 200 200 200 200
2036 -UA.GC..UA
2037 - 160 200 200 110
2038 - 240 200 240 190
2039 - 200 200 200 200
2040 - 240 200 240 190
2041 -UA.GG..UA
2042 - 50 200 90 180
2043 - 200 200 200 200
2044 - 100 200 140 150
2045 - 110 200 70 -120
2046 -UA.GU..UA
2047 - 200 200 200 200
2048 - 240 200 240 190
2049 - 210 200 170 -20
2050 - 240 200 200 190
2051 -UA.UA..UA
2052 - 200 340 100 290
2053 - 200 250 -30 170
2054 - 200 250 50 250
2055 - 200 200 200 200
2056 -UA.UC..UA
2057 - 200 260 -20 180
2058 - 200 250 70 160
2059 - 200 200 200 200
2060 - 200 250 70 160
2061 -UA.UG..UA
2062 - 200 250 50 250
2063 - 200 200 200 200
2064 - 200 270 30 220
2065 - 200 130 -250 130
2066 -UA.UU..UA
2067 - 200 200 200 200
2068 - 200 250 70 160
2069 - 200 230 -150 230
2070 - 200 170 70 80
2071 -UA.AA..GU
2072 - 280 280 170 200
2073 - 230 230 130 200
2074 - 170 170 70 200
2075 - 200 200 200 200
2076 -UA.AC..GU
2077 - 280 280 170 200
2078 - 340 280 270 200
2079 - 200 200 200 200
2080 - 340 280 270 200
2081 -UA.AG..GU
2082 - 170 170 70 200
2083 - 200 200 200 200
2084 - 210 210 110 200
2085 - 100 0 70 200
2086 -UA.AU..GU
2087 - 200 200 200 200
2088 - 310 250 240 200
2089 - 220 120 200 200
2090 - 290 190 270 200
2091 -UA.CA..GU
2092 - 230 340 200 310
2093 - 190 230 200 200
2094 - 130 270 200 240
2095 - 200 200 200 200
2096 -UA.CC..GU
2097 - 230 280 200 250
2098 - 230 280 200 250
2099 - 200 200 200 200
2100 - 230 280 200 250
2101 -UA.CG..GU
2102 - 130 270 200 240
2103 - 200 200 200 200
2104 - 170 270 200 240
2105 - -50 100 200 70
2106 -UA.CU..GU
2107 - 200 200 200 200
2108 - 200 250 200 220
2109 - 80 220 200 190
2110 - 150 190 200 160
2111 -UA.GA..GU
2112 - 170 200 210 220
2113 - 130 200 170 80
2114 - 70 200 110 200
2115 - 200 200 200 200
2116 -UA.GC..GU
2117 - 170 200 210 120
2118 - 270 200 270 220
2119 - 200 200 200 200
2120 - 270 200 270 220
2121 -UA.GG..GU
2122 - 70 200 110 200
2123 - 200 200 200 200
2124 - 110 200 150 160
2125 - 70 200 30 -160
2126 -UA.GU..GU
2127 - 200 200 200 200
2128 - 240 200 240 190
2129 - 200 200 160 -30
2130 - 270 200 230 220
2131 -UA.UA..GU
2132 - 200 340 100 290
2133 - 200 230 -50 150
2134 - 200 270 70 270
2135 - 200 200 200 200
2136 -UA.UC..GU
2137 - 200 280 0 190
2138 - 200 280 100 190
2139 - 200 200 200 200
2140 - 200 280 100 190
2141 -UA.UG..GU
2142 - 200 270 70 270
2143 - 200 200 200 200
2144 - 200 270 30 230
2145 - 200 100 -290 90
2146 -UA.UU..GU
2147 - 200 200 200 200
2148 - 200 250 70 160
2149 - 200 220 -160 220
2150 - 200 190 90 110
2151 -UA.AA..UG
2152 - 280 280 170 200
2153 - 250 250 150 200
2154 - 150 150 50 200
2155 - 200 200 200 200
2156 -UA.AC..UG
2157 - 260 260 160 200
2158 - 310 250 240 200
2159 - 200 200 200 200
2160 - 310 250 240 200
2161 -UA.AG..UG
2162 - 150 150 50 200
2163 - 200 200 200 200
2164 - 210 210 100 200
2165 - 130 30 110 200
2166 -UA.AU..UG
2167 - 200 200 200 200
2168 - 310 250 240 200
2169 - 230 130 210 200
2170 - 270 170 240 200
2171 -UA.CA..UG
2172 - 230 340 200 310
2173 - 210 250 200 220
2174 - 110 250 200 220
2175 - 200 200 200 200
2176 -UA.CC..UG
2177 - 220 260 200 230
2178 - 200 250 200 220
2179 - 200 200 200 200
2180 - 200 250 200 220
2181 -UA.CG..UG
2182 - 110 250 200 220
2183 - 200 200 200 200
2184 - 160 270 200 240
2185 - -10 130 200 100
2186 -UA.CU..UG
2187 - 200 200 200 200
2188 - 200 250 200 220
2189 - 90 230 200 200
2190 - 120 170 200 140
2191 -UA.GA..UG
2192 - 170 200 210 220
2193 - 150 200 190 100
2194 - 50 200 90 180
2195 - 200 200 200 200
2196 -UA.GC..UG
2197 - 160 200 200 110
2198 - 240 200 240 190
2199 - 200 200 200 200
2200 - 240 200 240 190
2201 -UA.GG..UG
2202 - 50 200 90 180
2203 - 200 200 200 200
2204 - 100 200 140 150
2205 - 110 200 70 -120
2206 -UA.GU..UG
2207 - 200 200 200 200
2208 - 240 200 240 190
2209 - 210 200 170 -20
2210 - 240 200 200 190
2211 -UA.UA..UG
2212 - 200 340 100 290
2213 - 200 250 -30 170
2214 - 200 250 50 250
2215 - 200 200 200 200
2216 -UA.UC..UG
2217 - 200 260 -20 180
2218 - 200 250 70 160
2219 - 200 200 200 200
2220 - 200 250 70 160
2221 -UA.UG..UG
2222 - 200 250 50 250
2223 - 200 200 200 200
2224 - 200 270 30 220
2225 - 200 130 -250 130
2226 -UA.UU..UG
2227 - 200 200 200 200
2228 - 200 250 70 160
2229 - 200 230 -150 230
2230 - 200 170 70 80
2231 -GU.AA..AU
2232 - 280 260 150 200
2233 - 230 220 110 200
2234 - 170 160 50 200
2235 - 200 200 200 200
2236 -GU.AC..AU
2237 - 280 260 150 200
2238 - 340 260 250 200
2239 - 200 200 200 200
2240 - 340 260 250 200
2241 -GU.AG..AU
2242 - 170 160 50 200
2243 - 200 200 200 200
2244 - 210 200 90 200
2245 - 100 -20 50 200
2246 -GU.AU..AU
2247 - 200 200 200 200
2248 - 310 230 220 200
2249 - 220 110 180 200
2250 - 290 180 250 200
2251 -GU.CA..AU
2252 - 250 310 200 310
2253 - 210 200 200 200
2254 - 150 240 200 240
2255 - 200 200 200 200
2256 -GU.CC..AU
2257 - 250 250 200 250
2258 - 250 250 200 250
2259 - 200 200 200 200
2260 - 250 250 200 250
2261 -GU.CG..AU
2262 - 150 240 200 240
2263 - 200 200 200 200
2264 - 190 240 200 240
2265 - -30 70 200 70
2266 -GU.CU..AU
2267 - 200 200 200 200
2268 - 220 220 200 220
2269 - 100 190 200 190
2270 - 170 160 200 160
2271 -GU.GA..AU
2272 - 150 200 210 230
2273 - 110 200 160 90
2274 - 50 200 100 210
2275 - 200 200 200 200
2276 -GU.GC..AU
2277 - 150 200 210 130
2278 - 250 200 270 230
2279 - 200 200 200 200
2280 - 250 200 270 230
2281 -GU.GG..AU
2282 - 50 200 100 210
2283 - 200 200 200 200
2284 - 90 200 140 170
2285 - 50 200 30 -150
2286 -GU.GU..AU
2287 - 200 200 200 200
2288 - 220 200 240 200
2289 - 180 200 150 -20
2290 - 250 200 220 230
2291 -GU.UA..AU
2292 - 200 310 130 270
2293 - 200 200 -10 120
2294 - 200 240 110 240
2295 - 200 200 200 200
2296 -GU.UC..AU
2297 - 200 250 30 170
2298 - 200 250 130 170
2299 - 200 200 200 200
2300 - 200 250 130 170
2301 -GU.UG..AU
2302 - 200 240 110 240
2303 - 200 200 200 200
2304 - 200 240 70 200
2305 - 200 70 -250 70
2306 -GU.UU..AU
2307 - 200 200 200 200
2308 - 200 220 100 140
2309 - 200 190 -120 190
2310 - 200 160 130 80
2311 -GU.AA..CG
2312 - 210 200 90 200
2313 - 190 170 60 200
2314 - 10 0 -110 200
2315 - 200 200 200 200
2316 -GU.AC..CG
2317 - 180 170 60 200
2318 - 250 170 160 200
2319 - 200 200 200 200
2320 - 150 70 70 200
2321 -GU.AG..CG
2322 - 70 60 -50 200
2323 - 200 200 200 200
2324 - 180 160 50 200
2325 - 0 -120 -50 200
2326 -GU.AU..CG
2327 - 200 200 200 200
2328 - 250 180 170 200
2329 - 40 -80 -10 200
2330 - 210 100 170 200
2331 -GU.CA..CG
2332 - 190 240 200 240
2333 - 160 160 200 160
2334 - -10 80 200 80
2335 - 200 200 200 200
2336 -GU.CC..CG
2337 - 160 150 200 150
2338 - 160 160 200 160
2339 - 200 200 200 200
2340 - 60 60 200 60
2341 -GU.CG..CG
2342 - 50 140 200 140
2343 - 200 200 200 200
2344 - 150 210 200 210
2345 --130 -30 200 -30
2346 -GU.CU..CG
2347 - 200 200 200 200
2348 - 170 160 200 160
2349 - -90 10 200 10
2350 - 90 80 200 80
2351 -GU.GA..CG
2352 - 90 200 140 170
2353 - 60 200 120 40
2354 --110 200 -60 50
2355 - 200 200 200 200
2356 -GU.GC..CG
2357 - 60 200 110 40
2358 - 160 200 180 140
2359 - 200 200 200 200
2360 - 70 200 80 50
2361 -GU.GG..CG
2362 - -50 200 0 110
2363 - 200 200 200 200
2364 - 50 200 110 130
2365 - -50 200 -70 -250
2366 -GU.GU..CG
2367 - 200 200 200 200
2368 - 170 200 180 150
2369 - -10 200 -30 -210
2370 - 170 200 140 150
2371 -GU.UA..CG
2372 - 200 240 70 200
2373 - 200 160 -50 80
2374 - 200 80 -50 80
2375 - 200 200 200 200
2376 -GU.UC..CG
2377 - 200 150 -60 70
2378 - 200 160 50 80
2379 - 200 200 200 200
2380 - 200 60 -50 -20
2381 -GU.UG..CG
2382 - 200 140 10 150
2383 - 200 200 200 200
2384 - 200 210 40 170
2385 - 200 -30 -350 -30
2386 -GU.UU..CG
2387 - 200 200 200 200
2388 - 200 160 50 80
2389 - 200 10 -310 10
2390 - 200 80 50 0
2391 -GU.AA..GC
2392 - 200 190 80 200
2393 - 190 180 70 200
2394 - 100 90 -20 200
2395 - 200 200 200 200
2396 -GU.AC..GC
2397 - 240 220 110 200
2398 - 280 210 200 200
2399 - 200 200 200 200
2400 - 270 190 180 200
2401 -GU.AG..GC
2402 - 100 90 -20 200
2403 - 200 200 200 200
2404 - 180 160 50 200
2405 - 30 -80 -10 200
2406 -GU.AU..GC
2407 - 200 200 200 200
2408 - 270 190 180 200
2409 - 180 70 140 200
2410 - 220 100 180 200
2411 -GU.CA..GC
2412 - 180 230 200 230
2413 - 170 160 200 160
2414 - 80 170 200 170
2415 - 200 200 200 200
2416 -GU.CC..GC
2417 - 210 210 200 210
2418 - 200 190 200 190
2419 - 200 200 200 200
2420 - 180 180 200 180
2421 -GU.CG..GC
2422 - 80 170 200 170
2423 - 200 200 200 200
2424 - 150 210 200 210
2425 - -90 0 200 0
2426 -GU.CU..GC
2427 - 200 200 200 200
2428 - 180 180 200 180
2429 - 60 150 200 150
2430 - 90 90 200 90
2431 -GU.GA..GC
2432 - 80 200 130 160
2433 - 70 200 120 50
2434 - -20 200 30 140
2435 - 200 200 200 200
2436 -GU.GC..GC
2437 - 110 200 170 90
2438 - 200 200 210 180
2439 - 200 200 200 200
2440 - 180 200 200 160
2441 -GU.GG..GC
2442 - -20 200 30 140
2443 - 200 200 200 200
2444 - 50 200 110 130
2445 - -10 200 -40 -210
2446 -GU.GU..GC
2447 - 200 200 200 200
2448 - 180 200 200 160
2449 - 140 200 110 -60
2450 - 180 200 150 160
2451 -GU.UA..GC
2452 - 200 230 60 190
2453 - 200 160 -50 80
2454 - 200 170 40 180
2455 - 200 200 200 200
2456 -GU.UC..GC
2457 - 200 210 0 130
2458 - 200 190 80 110
2459 - 200 200 200 200
2460 - 200 180 70 100
2461 -GU.UG..GC
2462 - 200 170 40 180
2463 - 200 200 200 200
2464 - 200 210 40 170
2465 - 200 0 -310 0
2466 -GU.UU..GC
2467 - 200 200 200 200
2468 - 200 180 70 100
2469 - 200 150 -160 160
2470 - 200 90 60 10
2471 -GU.AA..UA
2472 - 280 260 150 200
2473 - 250 240 130 200
2474 - 150 140 30 200
2475 - 200 200 200 200
2476 -GU.AC..UA
2477 - 260 250 140 200
2478 - 310 230 220 200
2479 - 200 200 200 200
2480 - 310 230 220 200
2481 -GU.AG..UA
2482 - 150 140 30 200
2483 - 200 200 200 200
2484 - 210 190 80 200
2485 - 130 20 90 200
2486 -GU.AU..UA
2487 - 200 200 200 200
2488 - 310 230 220 200
2489 - 230 120 190 200
2490 - 270 150 220 200
2491 -GU.CA..UA
2492 - 250 310 200 310
2493 - 230 220 200 220
2494 - 130 220 200 220
2495 - 200 200 200 200
2496 -GU.CC..UA
2497 - 240 230 200 230
2498 - 220 220 200 220
2499 - 200 200 200 200
2500 - 220 220 200 220
2501 -GU.CG..UA
2502 - 130 220 200 220
2503 - 200 200 200 200
2504 - 180 240 200 240
2505 - 10 100 200 100
2506 -GU.CU..UA
2507 - 200 200 200 200
2508 - 220 220 200 220
2509 - 110 200 200 200
2510 - 140 140 200 140
2511 -GU.GA..UA
2512 - 150 200 210 230
2513 - 130 200 180 110
2514 - 30 200 80 190
2515 - 200 200 200 200
2516 -GU.GC..UA
2517 - 140 200 190 120
2518 - 220 200 240 200
2519 - 200 200 200 200
2520 - 220 200 240 200
2521 -GU.GG..UA
2522 - 30 200 80 190
2523 - 200 200 200 200
2524 - 80 200 140 160
2525 - 90 200 70 -110
2526 -GU.GU..UA
2527 - 200 200 200 200
2528 - 220 200 240 200
2529 - 190 200 160 -10
2530 - 220 200 200 200
2531 -GU.UA..UA
2532 - 200 310 130 270
2533 - 200 220 10 140
2534 - 200 220 90 220
2535 - 200 200 200 200
2536 -GU.UC..UA
2537 - 200 230 20 150
2538 - 200 220 100 140
2539 - 200 200 200 200
2540 - 200 220 100 140
2541 -GU.UG..UA
2542 - 200 220 90 220
2543 - 200 200 200 200
2544 - 200 240 70 200
2545 - 200 100 -210 110
2546 -GU.UU..UA
2547 - 200 200 200 200
2548 - 200 220 100 140
2549 - 200 200 -110 200
2550 - 200 140 110 60
2551 -GU.AA..GU
2552 - 280 260 150 200
2553 - 230 220 110 200
2554 - 170 160 50 200
2555 - 200 200 200 200
2556 -GU.AC..GU
2557 - 280 260 150 200
2558 - 340 260 250 200
2559 - 200 200 200 200
2560 - 340 260 250 200
2561 -GU.AG..GU
2562 - 170 160 50 200
2563 - 200 200 200 200
2564 - 210 200 90 200
2565 - 100 -20 50 200
2566 -GU.AU..GU
2567 - 200 200 200 200
2568 - 310 230 220 200
2569 - 220 110 180 200
2570 - 290 180 250 200
2571 -GU.CA..GU
2572 - 250 310 200 310
2573 - 210 200 200 200
2574 - 150 240 200 240
2575 - 200 200 200 200
2576 -GU.CC..GU
2577 - 250 250 200 250
2578 - 250 250 200 250
2579 - 200 200 200 200
2580 - 250 250 200 250
2581 -GU.CG..GU
2582 - 150 240 200 240
2583 - 200 200 200 200
2584 - 190 240 200 240
2585 - -30 70 200 70
2586 -GU.CU..GU
2587 - 200 200 200 200
2588 - 220 220 200 220
2589 - 100 190 200 190
2590 - 170 160 200 160
2591 -GU.GA..GU
2592 - 150 200 210 230
2593 - 110 200 160 90
2594 - 50 200 100 210
2595 - 200 200 200 200
2596 -GU.GC..GU
2597 - 150 200 210 130
2598 - 250 200 270 230
2599 - 200 200 200 200
2600 - 250 200 270 230
2601 -GU.GG..GU
2602 - 50 200 100 210
2603 - 200 200 200 200
2604 - 90 200 140 170
2605 - 50 200 30 -150
2606 -GU.GU..GU
2607 - 200 200 200 200
2608 - 220 200 240 200
2609 - 180 200 150 -20
2610 - 250 200 220 230
2611 -GU.UA..GU
2612 - 200 310 130 270
2613 - 200 200 -10 120
2614 - 200 240 110 240
2615 - 200 200 200 200
2616 -GU.UC..GU
2617 - 200 250 30 170
2618 - 200 250 130 170
2619 - 200 200 200 200
2620 - 200 250 130 170
2621 -GU.UG..GU
2622 - 200 240 110 240
2623 - 200 200 200 200
2624 - 200 240 70 200
2625 - 200 70 -250 70
2626 -GU.UU..GU
2627 - 200 200 200 200
2628 - 200 220 100 140
2629 - 200 190 -120 190
2630 - 200 160 130 80
2631 -GU.AA..UG
2632 - 280 260 150 200
2633 - 250 240 130 200
2634 - 150 140 30 200
2635 - 200 200 200 200
2636 -GU.AC..UG
2637 - 260 250 140 200
2638 - 310 230 220 200
2639 - 200 200 200 200
2640 - 310 230 220 200
2641 -GU.AG..UG
2642 - 150 140 30 200
2643 - 200 200 200 200
2644 - 210 190 80 200
2645 - 130 20 90 200
2646 -GU.AU..UG
2647 - 200 200 200 200
2648 - 310 230 220 200
2649 - 230 120 190 200
2650 - 270 150 220 200
2651 -GU.CA..UG
2652 - 250 310 200 310
2653 - 230 220 200 220
2654 - 130 220 200 220
2655 - 200 200 200 200
2656 -GU.CC..UG
2657 - 240 230 200 230
2658 - 220 220 200 220
2659 - 200 200 200 200
2660 - 220 220 200 220
2661 -GU.CG..UG
2662 - 130 220 200 220
2663 - 200 200 200 200
2664 - 180 240 200 240
2665 - 10 100 200 100
2666 -GU.CU..UG
2667 - 200 200 200 200
2668 - 220 220 200 220
2669 - 110 200 200 200
2670 - 140 140 200 140
2671 -GU.GA..UG
2672 - 150 200 210 230
2673 - 130 200 180 110
2674 - 30 200 80 190
2675 - 200 200 200 200
2676 -GU.GC..UG
2677 - 140 200 190 120
2678 - 220 200 240 200
2679 - 200 200 200 200
2680 - 220 200 240 200
2681 -GU.GG..UG
2682 - 30 200 80 190
2683 - 200 200 200 200
2684 - 80 200 140 160
2685 - 90 200 70 -110
2686 -GU.GU..UG
2687 - 200 200 200 200
2688 - 220 200 240 200
2689 - 190 200 160 -10
2690 - 220 200 200 200
2691 -GU.UA..UG
2692 - 200 310 130 270
2693 - 200 220 10 140
2694 - 200 220 90 220
2695 - 200 200 200 200
2696 -GU.UC..UG
2697 - 200 230 20 150
2698 - 200 220 100 140
2699 - 200 200 200 200
2700 - 200 220 100 140
2701 -GU.UG..UG
2702 - 200 220 90 220
2703 - 200 200 200 200
2704 - 200 240 70 200
2705 - 200 100 -210 110
2706 -GU.UU..UG
2707 - 200 200 200 200
2708 - 200 220 100 140
2709 - 200 200 -110 200
2710 - 200 140 110 60
2711 -UG.AA..AU
2712 - 280 280 170 200
2713 - 230 230 130 200
2714 - 170 170 70 200
2715 - 200 200 200 200
2716 -UG.AC..AU
2717 - 280 280 170 200
2718 - 340 280 270 200
2719 - 200 200 200 200
2720 - 340 280 270 200
2721 -UG.AG..AU
2722 - 170 170 70 200
2723 - 200 200 200 200
2724 - 210 210 110 200
2725 - 100 0 70 200
2726 -UG.AU..AU
2727 - 200 200 200 200
2728 - 310 250 240 200
2729 - 220 120 200 200
2730 - 290 190 270 200
2731 -UG.CA..AU
2732 - 230 340 200 310
2733 - 190 230 200 200
2734 - 130 270 200 240
2735 - 200 200 200 200
2736 -UG.CC..AU
2737 - 230 280 200 250
2738 - 230 280 200 250
2739 - 200 200 200 200
2740 - 230 280 200 250
2741 -UG.CG..AU
2742 - 130 270 200 240
2743 - 200 200 200 200
2744 - 170 270 200 240
2745 - -50 100 200 70
2746 -UG.CU..AU
2747 - 200 200 200 200
2748 - 200 250 200 220
2749 - 80 220 200 190
2750 - 150 190 200 160
2751 -UG.GA..AU
2752 - 170 200 210 220
2753 - 130 200 170 80
2754 - 70 200 110 200
2755 - 200 200 200 200
2756 -UG.GC..AU
2757 - 170 200 210 120
2758 - 270 200 270 220
2759 - 200 200 200 200
2760 - 270 200 270 220
2761 -UG.GG..AU
2762 - 70 200 110 200
2763 - 200 200 200 200
2764 - 110 200 150 160
2765 - 70 200 30 -160
2766 -UG.GU..AU
2767 - 200 200 200 200
2768 - 240 200 240 190
2769 - 200 200 160 -30
2770 - 270 200 230 220
2771 -UG.UA..AU
2772 - 200 340 100 290
2773 - 200 230 -50 150
2774 - 200 270 70 270
2775 - 200 200 200 200
2776 -UG.UC..AU
2777 - 200 280 0 190
2778 - 200 280 100 190
2779 - 200 200 200 200
2780 - 200 280 100 190
2781 -UG.UG..AU
2782 - 200 270 70 270
2783 - 200 200 200 200
2784 - 200 270 30 230
2785 - 200 100 -290 90
2786 -UG.UU..AU
2787 - 200 200 200 200
2788 - 200 250 70 160
2789 - 200 220 -160 220
2790 - 200 190 90 110
2791 -UG.AA..CG
2792 - 210 210 110 200
2793 - 190 190 80 200
2794 - 10 10 -90 200
2795 - 200 200 200 200
2796 -UG.AC..CG
2797 - 180 180 80 200
2798 - 250 190 180 200
2799 - 200 200 200 200
2800 - 150 90 90 200
2801 -UG.AG..CG
2802 - 70 70 -30 200
2803 - 200 200 200 200
2804 - 180 180 70 200
2805 - 0 -100 -30 200
2806 -UG.AU..CG
2807 - 200 200 200 200
2808 - 250 190 190 200
2809 - 40 -60 10 200
2810 - 210 110 190 200
2811 -UG.CA..CG
2812 - 170 270 200 240
2813 - 140 190 200 160
2814 - -30 110 200 80
2815 - 200 200 200 200
2816 -UG.CC..CG
2817 - 140 180 200 150
2818 - 140 190 200 160
2819 - 200 200 200 200
2820 - 40 90 200 60
2821 -UG.CG..CG
2822 - 30 170 200 140
2823 - 200 200 200 200
2824 - 130 240 200 210
2825 --150 0 200 -30
2826 -UG.CU..CG
2827 - 200 200 200 200
2828 - 150 190 200 160
2829 --110 40 200 10
2830 - 70 110 200 80
2831 -UG.GA..CG
2832 - 110 200 150 160
2833 - 80 200 120 30
2834 - -90 200 -50 40
2835 - 200 200 200 200
2836 -UG.GC..CG
2837 - 80 200 120 30
2838 - 180 200 180 130
2839 - 200 200 200 200
2840 - 90 200 80 40
2841 -UG.GG..CG
2842 - -30 200 10 100
2843 - 200 200 200 200
2844 - 70 200 110 120
2845 - -30 200 -70 -260
2846 -UG.GU..CG
2847 - 200 200 200 200
2848 - 190 200 190 140
2849 - 10 200 -30 -220
2850 - 190 200 150 140
2851 -UG.UA..CG
2852 - 200 270 30 230
2853 - 200 190 -90 100
2854 - 200 110 -90 110
2855 - 200 200 200 200
2856 -UG.UC..CG
2857 - 200 180 -100 100
2858 - 200 190 10 100
2859 - 200 200 200 200
2860 - 200 90 -90 0
2861 -UG.UG..CG
2862 - 200 170 -30 170
2863 - 200 200 200 200
2864 - 200 240 0 190
2865 - 200 0 -390 -10
2866 -UG.UU..CG
2867 - 200 200 200 200
2868 - 200 190 10 110
2869 - 200 40 -350 30
2870 - 200 110 10 30
2871 -UG.AA..GC
2872 - 200 200 100 200
2873 - 190 190 90 200
2874 - 100 100 0 200
2875 - 200 200 200 200
2876 -UG.AC..GC
2877 - 240 240 130 200
2878 - 280 220 220 200
2879 - 200 200 200 200
2880 - 270 210 200 200
2881 -UG.AG..GC
2882 - 100 100 0 200
2883 - 200 200 200 200
2884 - 180 180 70 200
2885 - 30 -70 10 200
2886 -UG.AU..GC
2887 - 200 200 200 200
2888 - 270 210 200 200
2889 - 180 80 160 200
2890 - 220 120 190 200
2891 -UG.CA..GC
2892 - 160 260 200 230
2893 - 150 190 200 160
2894 - 60 200 200 170
2895 - 200 200 200 200
2896 -UG.CC..GC
2897 - 190 240 200 210
2898 - 180 220 200 190
2899 - 200 200 200 200
2900 - 160 210 200 180
2901 -UG.CG..GC
2902 - 60 200 200 170
2903 - 200 200 200 200
2904 - 130 240 200 210
2905 --110 30 200 0
2906 -UG.CU..GC
2907 - 200 200 200 200
2908 - 160 210 200 180
2909 - 40 180 200 150
2910 - 70 120 200 90
2911 -UG.GA..GC
2912 - 100 200 140 150
2913 - 90 200 130 40
2914 - 0 200 40 130
2915 - 200 200 200 200
2916 -UG.GC..GC
2917 - 130 200 170 80
2918 - 220 200 220 170
2919 - 200 200 200 200
2920 - 200 200 200 150
2921 -UG.GG..GC
2922 - 0 200 40 130
2923 - 200 200 200 200
2924 - 70 200 110 120
2925 - 10 200 -30 -220
2926 -UG.GU..GC
2927 - 200 200 200 200
2928 - 200 200 200 150
2929 - 160 200 120 -70
2930 - 190 200 150 150
2931 -UG.UA..GC
2932 - 200 260 20 220
2933 - 200 190 -90 110
2934 - 200 200 0 200
2935 - 200 200 200 200
2936 -UG.UC..GC
2937 - 200 240 -40 150
2938 - 200 220 40 140
2939 - 200 200 200 200
2940 - 200 210 30 120
2941 -UG.UG..GC
2942 - 200 200 0 200
2943 - 200 200 200 200
2944 - 200 240 0 190
2945 - 200 30 -350 30
2946 -UG.UU..GC
2947 - 200 200 200 200
2948 - 200 210 30 120
2949 - 200 180 -200 180
2950 - 200 120 20 30
2951 -UG.AA..UA
2952 - 280 280 170 200
2953 - 250 250 150 200
2954 - 150 150 50 200
2955 - 200 200 200 200
2956 -UG.AC..UA
2957 - 260 260 160 200
2958 - 310 250 240 200
2959 - 200 200 200 200
2960 - 310 250 240 200
2961 -UG.AG..UA
2962 - 150 150 50 200
2963 - 200 200 200 200
2964 - 210 210 100 200
2965 - 130 30 110 200
2966 -UG.AU..UA
2967 - 200 200 200 200
2968 - 310 250 240 200
2969 - 230 130 210 200
2970 - 270 170 240 200
2971 -UG.CA..UA
2972 - 230 340 200 310
2973 - 210 250 200 220
2974 - 110 250 200 220
2975 - 200 200 200 200
2976 -UG.CC..UA
2977 - 220 260 200 230
2978 - 200 250 200 220
2979 - 200 200 200 200
2980 - 200 250 200 220
2981 -UG.CG..UA
2982 - 110 250 200 220
2983 - 200 200 200 200
2984 - 160 270 200 240
2985 - -10 130 200 100
2986 -UG.CU..UA
2987 - 200 200 200 200
2988 - 200 250 200 220
2989 - 90 230 200 200
2990 - 120 170 200 140
2991 -UG.GA..UA
2992 - 170 200 210 220
2993 - 150 200 190 100
2994 - 50 200 90 180
2995 - 200 200 200 200
2996 -UG.GC..UA
2997 - 160 200 200 110
2998 - 240 200 240 190
2999 - 200 200 200 200
3000 - 240 200 240 190
3001 -UG.GG..UA
3002 - 50 200 90 180
3003 - 200 200 200 200
3004 - 100 200 140 150
3005 - 110 200 70 -120
3006 -UG.GU..UA
3007 - 200 200 200 200
3008 - 240 200 240 190
3009 - 210 200 170 -20
3010 - 240 200 200 190
3011 -UG.UA..UA
3012 - 200 340 100 290
3013 - 200 250 -30 170
3014 - 200 250 50 250
3015 - 200 200 200 200
3016 -UG.UC..UA
3017 - 200 260 -20 180
3018 - 200 250 70 160
3019 - 200 200 200 200
3020 - 200 250 70 160
3021 -UG.UG..UA
3022 - 200 250 50 250
3023 - 200 200 200 200
3024 - 200 270 30 220
3025 - 200 130 -250 130
3026 -UG.UU..UA
3027 - 200 200 200 200
3028 - 200 250 70 160
3029 - 200 230 -150 230
3030 - 200 170 70 80
3031 -UG.AA..GU
3032 - 280 280 170 200
3033 - 230 230 130 200
3034 - 170 170 70 200
3035 - 200 200 200 200
3036 -UG.AC..GU
3037 - 280 280 170 200
3038 - 340 280 270 200
3039 - 200 200 200 200
3040 - 340 280 270 200
3041 -UG.AG..GU
3042 - 170 170 70 200
3043 - 200 200 200 200
3044 - 210 210 110 200
3045 - 100 0 70 200
3046 -UG.AU..GU
3047 - 200 200 200 200
3048 - 310 250 240 200
3049 - 220 120 200 200
3050 - 290 190 270 200
3051 -UG.CA..GU
3052 - 230 340 200 310
3053 - 190 230 200 200
3054 - 130 270 200 240
3055 - 200 200 200 200
3056 -UG.CC..GU
3057 - 230 280 200 250
3058 - 230 280 200 250
3059 - 200 200 200 200
3060 - 230 280 200 250
3061 -UG.CG..GU
3062 - 130 270 200 240
3063 - 200 200 200 200
3064 - 170 270 200 240
3065 - -50 100 200 70
3066 -UG.CU..GU
3067 - 200 200 200 200
3068 - 200 250 200 220
3069 - 80 220 200 190
3070 - 150 190 200 160
3071 -UG.GA..GU
3072 - 170 200 210 220
3073 - 130 200 170 80
3074 - 70 200 110 200
3075 - 200 200 200 200
3076 -UG.GC..GU
3077 - 170 200 210 120
3078 - 270 200 270 220
3079 - 200 200 200 200
3080 - 270 200 270 220
3081 -UG.GG..GU
3082 - 70 200 110 200
3083 - 200 200 200 200
3084 - 110 200 150 160
3085 - 70 200 30 -160
3086 -UG.GU..GU
3087 - 200 200 200 200
3088 - 240 200 240 190
3089 - 200 200 160 -30
3090 - 270 200 230 220
3091 -UG.UA..GU
3092 - 200 340 100 290
3093 - 200 230 -50 150
3094 - 200 270 70 270
3095 - 200 200 200 200
3096 -UG.UC..GU
3097 - 200 280 0 190
3098 - 200 280 100 190
3099 - 200 200 200 200
3100 - 200 280 100 190
3101 -UG.UG..GU
3102 - 200 270 70 270
3103 - 200 200 200 200
3104 - 200 270 30 230
3105 - 200 100 -290 90
3106 -UG.UU..GU
3107 - 200 200 200 200
3108 - 200 250 70 160
3109 - 200 220 -160 220
3110 - 200 190 90 110
3111 -UG.AA..UG
3112 - 280 280 170 200
3113 - 250 250 150 200
3114 - 150 150 50 200
3115 - 200 200 200 200
3116 -UG.AC..UG
3117 - 260 260 160 200
3118 - 310 250 240 200
3119 - 200 200 200 200
3120 - 310 250 240 200
3121 -UG.AG..UG
3122 - 150 150 50 200
3123 - 200 200 200 200
3124 - 210 210 100 200
3125 - 130 30 110 200
3126 -UG.AU..UG
3127 - 200 200 200 200
3128 - 310 250 240 200
3129 - 230 130 210 200
3130 - 270 170 240 200
3131 -UG.CA..UG
3132 - 230 340 200 310
3133 - 210 250 200 220
3134 - 110 250 200 220
3135 - 200 200 200 200
3136 -UG.CC..UG
3137 - 220 260 200 230
3138 - 200 250 200 220
3139 - 200 200 200 200
3140 - 200 250 200 220
3141 -UG.CG..UG
3142 - 110 250 200 220
3143 - 200 200 200 200
3144 - 160 270 200 240
3145 - -10 130 200 100
3146 -UG.CU..UG
3147 - 200 200 200 200
3148 - 200 250 200 220
3149 - 90 230 200 200
3150 - 120 170 200 140
3151 -UG.GA..UG
3152 - 170 200 210 220
3153 - 150 200 190 100
3154 - 50 200 90 180
3155 - 200 200 200 200
3156 -UG.GC..UG
3157 - 160 200 200 110
3158 - 240 200 240 190
3159 - 200 200 200 200
3160 - 240 200 240 190
3161 -UG.GG..UG
3162 - 50 200 90 180
3163 - 200 200 200 200
3164 - 100 200 140 150
3165 - 110 200 70 -120
3166 -UG.GU..UG
3167 - 200 200 200 200
3168 - 240 200 240 190
3169 - 210 200 170 -20
3170 - 240 200 200 190
3171 -UG.UA..UG
3172 - 200 340 100 290
3173 - 200 250 -30 170
3174 - 200 250 50 250
3175 - 200 200 200 200
3176 -UG.UC..UG
3177 - 200 260 -20 180
3178 - 200 250 70 160
3179 - 200 200 200 200
3180 - 200 250 70 160
3181 -UG.UG..UG
3182 - 200 250 50 250
3183 - 200 200 200 200
3184 - 200 270 30 220
3185 - 200 130 -250 130
3186 -UG.UU..UG
3187 - 200 200 200 200
3188 - 200 250 70 160
3189 - 200 230 -150 230
3190 - 200 170 70 80
3191 ->Interior Loops 1x2
3192 ->CG.A..AU = 5'- C A A -3'
3193 -> 3'- G Y X U -5'
3194 ->Rows: X = A C G U (X constant for a row)
3195 ->Columns: Y = A C G U (Y constant in column)
3196 -AU.A..AU
3197 - 390 370 310 550
3198 - 360 320 310 550
3199 - 250 210 190 550
3200 - 550 550 550 550
3201 -AU.C..AU
3202 - 380 370 550 370
3203 - 370 400 550 370
3204 - 550 550 550 550
3205 - 400 340 550 370
3206 -AU.G..AU
3207 - 320 550 230 550
3208 - 550 550 550 550
3209 - 230 550 370 550
3210 - 550 550 550 550
3211 -AU.U..AU
3212 - 550 550 550 550
3213 - 550 370 550 280
3214 - 550 550 550 550
3215 - 550 320 550 270
3216 -AU.A..CG
3217 - 320 300 240 480
3218 - 290 250 240 480
3219 - 180 140 120 480
3220 - 480 480 480 480
3221 -AU.C..CG
3222 - 310 300 480 300
3223 - 300 330 480 300
3224 - 480 480 480 480
3225 - 330 270 480 300
3226 -AU.G..CG
3227 - 250 480 160 480
3228 - 480 480 480 480
3229 - 160 480 300 480
3230 - 480 480 480 480
3231 -AU.U..CG
3232 - 480 480 480 480
3233 - 480 300 480 210
3234 - 480 480 480 480
3235 - 480 250 480 200
3236 -AU.A..GC
3237 - 320 300 240 480
3238 - 290 250 240 480
3239 - 180 140 120 480
3240 - 480 480 480 480
3241 -AU.C..GC
3242 - 310 300 480 300
3243 - 300 330 480 300
3244 - 480 480 480 480
3245 - 330 270 480 300
3246 -AU.G..GC
3247 - 250 480 160 480
3248 - 480 480 480 480
3249 - 160 480 300 480
3250 - 480 480 480 480
3251 -AU.U..GC
3252 - 480 480 480 480
3253 - 480 300 480 210
3254 - 480 480 480 480
3255 - 480 250 480 200
3256 -AU.A..UA
3257 - 390 370 310 550
3258 - 360 320 310 550
3259 - 250 210 190 550
3260 - 550 550 550 550
3261 -AU.C..UA
3262 - 380 370 550 370
3263 - 370 400 550 370
3264 - 550 550 550 550
3265 - 400 340 550 370
3266 -AU.G..UA
3267 - 320 550 230 550
3268 - 550 550 550 550
3269 - 230 550 370 550
3270 - 550 550 550 550
3271 -AU.U..UA
3272 - 550 550 550 550
3273 - 550 370 550 280
3274 - 550 550 550 550
3275 - 550 320 550 270
3276 -AU.A..GU
3277 - 390 370 310 550
3278 - 360 320 310 550
3279 - 250 210 190 550
3280 - 550 550 550 550
3281 -AU.C..GU
3282 - 380 370 550 370
3283 - 370 400 550 370
3284 - 550 550 550 550
3285 - 400 340 550 370
3286 -AU.G..GU
3287 - 320 550 230 550
3288 - 550 550 550 550
3289 - 230 550 370 550
3290 - 550 550 550 550
3291 -AU.U..GU
3292 - 550 550 550 550
3293 - 550 370 550 280
3294 - 550 550 550 550
3295 - 550 320 550 270
3296 -AU.A..UG
3297 - 390 370 310 550
3298 - 360 320 310 550
3299 - 250 210 190 550
3300 - 550 550 550 550
3301 -AU.C..UG
3302 - 380 370 550 370
3303 - 370 400 550 370
3304 - 550 550 550 550
3305 - 400 340 550 370
3306 -AU.G..UG
3307 - 320 550 230 550
3308 - 550 550 550 550
3309 - 230 550 370 550
3310 - 550 550 550 550
3311 -AU.U..UG
3312 - 550 550 550 550
3313 - 550 370 550 280
3314 - 550 550 550 550
3315 - 550 320 550 270
3316 -CG.A..AU
3317 - 320 300 240 480
3318 - 290 250 240 480
3319 - 180 140 120 480
3320 - 480 480 480 480
3321 -CG.C..AU
3322 - 310 300 480 300
3323 - 300 330 480 300
3324 - 480 480 480 480
3325 - 330 270 480 300
3326 -CG.G..AU
3327 - 250 480 160 480
3328 - 480 480 480 480
3329 - 160 480 300 480
3330 - 480 480 480 480
3331 -CG.U..AU
3332 - 480 480 480 480
3333 - 480 300 480 210
3334 - 480 480 480 480
3335 - 480 250 480 200
3336 -CG.A..CG
3337 - 230 220 110 400
3338 - 210 170 160 400
3339 - 80 60 40 400
3340 - 400 400 400 400
3341 -CG.C..CG
3342 - 230 220 400 220
3343 - 220 250 400 220
3344 - 400 400 400 400
3345 - 250 190 400 220
3346 -CG.G..CG
3347 - 170 400 80 400
3348 - 400 400 400 400
3349 - 80 400 220 400
3350 - 400 400 400 400
3351 -CG.U..CG
3352 - 400 400 400 400
3353 - 400 220 400 150
3354 - 400 400 400 400
3355 - 400 170 400 120
3356 -CG.A..GC
3357 - 240 220 160 400
3358 - 210 170 160 400
3359 - 100 60 40 400
3360 - 400 400 400 400
3361 -CG.C..GC
3362 - 230 220 400 220
3363 - 220 250 400 220
3364 - 400 400 400 400
3365 - 250 190 400 220
3366 -CG.G..GC
3367 - 170 400 80 400
3368 - 400 400 400 400
3369 - 80 400 220 400
3370 - 400 400 400 400
3371 -CG.U..GC
3372 - 400 400 400 400
3373 - 400 220 400 130
3374 - 400 400 400 400
3375 - 400 170 400 120
3376 -CG.A..UA
3377 - 320 300 240 480
3378 - 290 250 240 480
3379 - 180 140 120 480
3380 - 480 480 480 480
3381 -CG.C..UA
3382 - 310 300 480 300
3383 - 300 330 480 300
3384 - 480 480 480 480
3385 - 330 270 480 300
3386 -CG.G..UA
3387 - 250 480 160 480
3388 - 480 480 480 480
3389 - 160 480 300 480
3390 - 480 480 480 480
3391 -CG.U..UA
3392 - 480 480 480 480
3393 - 480 300 480 210
3394 - 480 480 480 480
3395 - 480 250 480 200
3396 -CG.A..GU
3397 - 320 300 240 480
3398 - 290 250 240 480
3399 - 180 140 120 480
3400 - 480 480 480 480
3401 -CG.C..GU
3402 - 310 300 480 300
3403 - 300 330 480 300
3404 - 480 480 480 480
3405 - 330 270 480 300
3406 -CG.G..GU
3407 - 250 480 160 480
3408 - 480 480 480 480
3409 - 160 480 300 480
3410 - 480 480 480 480
3411 -CG.U..GU
3412 - 480 480 480 480
3413 - 480 300 480 210
3414 - 480 480 480 480
3415 - 480 250 480 200
3416 -CG.A..UG
3417 - 320 300 240 480
3418 - 290 250 240 480
3419 - 180 140 120 480
3420 - 480 480 480 480
3421 -CG.C..UG
3422 - 310 300 480 300
3423 - 300 330 480 300
3424 - 480 480 480 480
3425 - 330 270 480 300
3426 -CG.G..UG
3427 - 250 480 160 480
3428 - 480 480 480 480
3429 - 160 480 300 480
3430 - 480 480 480 480
3431 -CG.U..UG
3432 - 480 480 480 480
3433 - 480 300 480 210
3434 - 480 480 480 480
3435 - 480 250 480 200
3436 -GC.A..AU
3437 - 320 300 240 480
3438 - 290 250 240 480
3439 - 180 140 120 480
3440 - 480 480 480 480
3441 -GC.C..AU
3442 - 310 300 480 300
3443 - 300 330 480 300
3444 - 480 480 480 480
3445 - 330 270 480 300
3446 -GC.G..AU
3447 - 250 480 160 480
3448 - 480 480 480 480
3449 - 160 480 300 480
3450 - 480 480 480 480
3451 -GC.U..AU
3452 - 480 480 480 480
3453 - 480 300 480 210
3454 - 480 480 480 480
3455 - 480 250 480 200
3456 -GC.A..CG
3457 - 240 220 160 400
3458 - 210 170 160 400
3459 - 100 60 40 400
3460 - 400 400 400 400
3461 -GC.C..CG
3462 - 230 220 400 220
3463 - 220 250 400 220
3464 - 400 400 400 400
3465 - 250 190 400 220
3466 -GC.G..CG
3467 - 170 400 80 400
3468 - 400 400 400 400
3469 - 80 400 220 400
3470 - 400 400 400 400
3471 -GC.U..CG
3472 - 400 400 400 400
3473 - 400 220 400 130
3474 - 400 400 400 400
3475 - 400 170 400 120
3476 -GC.A..GC
3477 - 250 220 210 400
3478 - 210 170 160 400
3479 - 120 60 40 400
3480 - 400 400 400 400
3481 -GC.C..GC
3482 - 230 220 400 220
3483 - 220 250 400 220
3484 - 400 400 400 400
3485 - 250 190 400 220
3486 -GC.G..GC
3487 - 170 400 80 400
3488 - 400 400 400 400
3489 - 80 400 220 400
3490 - 400 400 400 400
3491 -GC.U..GC
3492 - 400 400 400 400
3493 - 400 220 400 120
3494 - 400 400 400 400
3495 - 400 170 400 120
3496 -GC.A..UA
3497 - 320 300 240 480
3498 - 290 250 240 480
3499 - 180 140 120 480
3500 - 480 480 480 480
3501 -GC.C..UA
3502 - 310 300 480 300
3503 - 300 330 480 300
3504 - 480 480 480 480
3505 - 330 270 480 300
3506 -GC.G..UA
3507 - 250 480 160 480
3508 - 480 480 480 480
3509 - 160 480 300 480
3510 - 480 480 480 480
3511 -GC.U..UA
3512 - 480 480 480 480
3513 - 480 300 480 210
3514 - 480 480 480 480
3515 - 480 250 480 200
3516 -GC.A..GU
3517 - 320 300 240 480
3518 - 290 250 240 480
3519 - 180 140 120 480
3520 - 480 480 480 480
3521 -GC.C..GU
3522 - 310 300 480 300
3523 - 300 330 480 300
3524 - 480 480 480 480
3525 - 330 270 480 300
3526 -GC.G..GU
3527 - 250 480 160 480
3528 - 480 480 480 480
3529 - 160 480 300 480
3530 - 480 480 480 480
3531 -GC.U..GU
3532 - 480 480 480 480
3533 - 480 300 480 210
3534 - 480 480 480 480
3535 - 480 250 480 200
3536 -GC.A..UG
3537 - 320 300 240 480
3538 - 290 250 240 480
3539 - 180 140 120 480
3540 - 480 480 480 480
3541 -GC.C..UG
3542 - 310 300 480 300
3543 - 300 330 480 300
3544 - 480 480 480 480
3545 - 330 270 480 300
3546 -GC.G..UG
3547 - 250 480 160 480
3548 - 480 480 480 480
3549 - 160 480 300 480
3550 - 480 480 480 480
3551 -GC.U..UG
3552 - 480 480 480 480
3553 - 480 300 480 210
3554 - 480 480 480 480
3555 - 480 250 480 200
3556 -UA.A..AU
3557 - 390 370 310 550
3558 - 360 320 310 550
3559 - 250 210 190 550
3560 - 550 550 550 550
3561 -UA.C..AU
3562 - 380 370 550 370
3563 - 370 400 550 370
3564 - 550 550 550 550
3565 - 400 340 550 370
3566 -UA.G..AU
3567 - 320 550 230 550
3568 - 550 550 550 550
3569 - 230 550 370 550
3570 - 550 550 550 550
3571 -UA.U..AU
3572 - 550 550 550 550
3573 - 550 370 550 280
3574 - 550 550 550 550
3575 - 550 320 550 270
3576 -UA.A..CG
3577 - 320 300 240 480
3578 - 290 250 240 480
3579 - 180 140 120 480
3580 - 480 480 480 480
3581 -UA.C..CG
3582 - 310 300 480 300
3583 - 300 330 480 300
3584 - 480 480 480 480
3585 - 330 270 480 300
3586 -UA.G..CG
3587 - 250 480 160 480
3588 - 480 480 480 480
3589 - 160 480 300 480
3590 - 480 480 480 480
3591 -UA.U..CG
3592 - 480 480 480 480
3593 - 480 300 480 210
3594 - 480 480 480 480
3595 - 480 250 480 200
3596 -UA.A..GC
3597 - 320 300 240 480
3598 - 290 250 240 480
3599 - 180 140 120 480
3600 - 480 480 480 480
3601 -UA.C..GC
3602 - 310 300 480 300
3603 - 300 330 480 300
3604 - 480 480 480 480
3605 - 330 270 480 300
3606 -UA.G..GC
3607 - 250 480 160 480
3608 - 480 480 480 480
3609 - 160 480 300 480
3610 - 480 480 480 480
3611 -UA.U..GC
3612 - 480 480 480 480
3613 - 480 300 480 210
3614 - 480 480 480 480
3615 - 480 250 480 200
3616 -UA.A..UA
3617 - 390 370 310 550
3618 - 360 320 310 550
3619 - 250 210 190 550
3620 - 550 550 550 550
3621 -UA.C..UA
3622 - 380 370 550 370
3623 - 370 400 550 370
3624 - 550 550 550 550
3625 - 400 340 550 370
3626 -UA.G..UA
3627 - 320 550 230 550
3628 - 550 550 550 550
3629 - 230 550 370 550
3630 - 550 550 550 550
3631 -UA.U..UA
3632 - 550 550 550 550
3633 - 550 370 550 280
3634 - 550 550 550 550
3635 - 550 320 550 270
3636 -UA.A..GU
3637 - 390 370 310 550
3638 - 360 320 310 550
3639 - 250 210 190 550
3640 - 550 550 550 550
3641 -UA.C..GU
3642 - 380 370 550 370
3643 - 370 400 550 370
3644 - 550 550 550 550
3645 - 400 340 550 370
3646 -UA.G..GU
3647 - 320 550 230 550
3648 - 550 550 550 550
3649 - 230 550 370 550
3650 - 550 550 550 550
3651 -UA.U..GU
3652 - 550 550 550 550
3653 - 550 370 550 280
3654 - 550 550 550 550
3655 - 550 320 550 270
3656 -UA.A..UG
3657 - 390 370 310 550
3658 - 360 320 310 550
3659 - 250 210 190 550
3660 - 550 550 550 550
3661 -UA.C..UG
3662 - 380 370 550 370
3663 - 370 400 550 370
3664 - 550 550 550 550
3665 - 400 340 550 370
3666 -UA.G..UG
3667 - 320 550 230 550
3668 - 550 550 550 550
3669 - 230 550 370 550
3670 - 550 550 550 550
3671 -UA.U..UG
3672 - 550 550 550 550
3673 - 550 370 550 280
3674 - 550 550 550 550
3675 - 550 320 550 270
3676 -GU.A..AU
3677 - 390 370 310 550
3678 - 360 320 310 550
3679 - 250 210 190 550
3680 - 550 550 550 550
3681 -GU.C..AU
3682 - 380 370 550 370
3683 - 370 400 550 370
3684 - 550 550 550 550
3685 - 400 340 550 370
3686 -GU.G..AU
3687 - 320 550 230 550
3688 - 550 550 550 550
3689 - 230 550 370 550
3690 - 550 550 550 550
3691 -GU.U..AU
3692 - 550 550 550 550
3693 - 550 370 550 280
3694 - 550 550 550 550
3695 - 550 320 550 270
3696 -GU.A..CG
3697 - 320 300 240 480
3698 - 290 250 240 480
3699 - 180 140 120 480
3700 - 480 480 480 480
3701 -GU.C..CG
3702 - 310 300 480 300
3703 - 300 330 480 300
3704 - 480 480 480 480
3705 - 330 270 480 300
3706 -GU.G..CG
3707 - 250 480 160 480
3708 - 480 480 480 480
3709 - 160 480 300 480
3710 - 480 480 480 480
3711 -GU.U..CG
3712 - 480 480 480 480
3713 - 480 300 480 210
3714 - 480 480 480 480
3715 - 480 250 480 200
3716 -GU.A..GC
3717 - 320 300 240 480
3718 - 290 250 240 480
3719 - 180 140 120 480
3720 - 480 480 480 480
3721 -GU.C..GC
3722 - 310 300 480 300
3723 - 300 330 480 300
3724 - 480 480 480 480
3725 - 330 270 480 300
3726 -GU.G..GC
3727 - 250 480 160 480
3728 - 480 480 480 480
3729 - 160 480 300 480
3730 - 480 480 480 480
3731 -GU.U..GC
3732 - 480 480 480 480
3733 - 480 300 480 210
3734 - 480 480 480 480
3735 - 480 250 480 200
3736 -GU.A..UA
3737 - 390 370 310 550
3738 - 360 320 310 550
3739 - 250 210 190 550
3740 - 550 550 550 550
3741 -GU.C..UA
3742 - 380 370 550 370
3743 - 370 400 550 370
3744 - 550 550 550 550
3745 - 400 340 550 370
3746 -GU.G..UA
3747 - 320 550 230 550
3748 - 550 550 550 550
3749 - 230 550 370 550
3750 - 550 550 550 550
3751 -GU.U..UA
3752 - 550 550 550 550
3753 - 550 370 550 280
3754 - 550 550 550 550
3755 - 550 320 550 270
3756 -GU.A..GU
3757 - 390 370 310 550
3758 - 360 320 310 550
3759 - 250 210 190 550
3760 - 550 550 550 550
3761 -GU.C..GU
3762 - 380 370 550 370
3763 - 370 400 550 370
3764 - 550 550 550 550
3765 - 400 340 550 370
3766 -GU.G..GU
3767 - 320 550 230 550
3768 - 550 550 550 550
3769 - 230 550 370 550
3770 - 550 550 550 550
3771 -GU.U..GU
3772 - 550 550 550 550
3773 - 550 370 550 280
3774 - 550 550 550 550
3775 - 550 320 550 270
3776 -GU.A..UG
3777 - 390 370 310 550
3778 - 360 320 310 550
3779 - 250 210 190 550
3780 - 550 550 550 550
3781 -GU.C..UG
3782 - 380 370 550 370
3783 - 370 400 550 370
3784 - 550 550 550 550
3785 - 400 340 550 370
3786 -GU.G..UG
3787 - 320 550 230 550
3788 - 550 550 550 550
3789 - 230 550 370 550
3790 - 550 550 550 550
3791 -GU.U..UG
3792 - 550 550 550 550
3793 - 550 370 550 280
3794 - 550 550 550 550
3795 - 550 320 550 270
3796 -UG.A..AU
3797 - 390 370 310 550
3798 - 360 320 310 550
3799 - 250 210 190 550
3800 - 550 550 550 550
3801 -UG.C..AU
3802 - 380 370 550 370
3803 - 370 400 550 370
3804 - 550 550 550 550
3805 - 400 340 550 370
3806 -UG.G..AU
3807 - 320 550 230 550
3808 - 550 550 550 550
3809 - 230 550 370 550
3810 - 550 550 550 550
3811 -UG.U..AU
3812 - 550 550 550 550
3813 - 550 370 550 280
3814 - 550 550 550 550
3815 - 550 320 550 270
3816 -UG.A..CG
3817 - 320 300 240 480
3818 - 290 250 240 480
3819 - 180 140 120 480
3820 - 480 480 480 480
3821 -UG.C..CG
3822 - 310 300 480 300
3823 - 300 330 480 300
3824 - 480 480 480 480
3825 - 330 270 480 300
3826 -UG.G..CG
3827 - 250 480 160 480
3828 - 480 480 480 480
3829 - 160 480 300 480
3830 - 480 480 480 480
3831 -UG.U..CG
3832 - 480 480 480 480
3833 - 480 300 480 210
3834 - 480 480 480 480
3835 - 480 250 480 200
3836 -UG.A..GC
3837 - 320 300 240 480
3838 - 290 250 240 480
3839 - 180 140 120 480
3840 - 480 480 480 480
3841 -UG.C..GC
3842 - 310 300 480 300
3843 - 300 330 480 300
3844 - 480 480 480 480
3845 - 330 270 480 300
3846 -UG.G..GC
3847 - 250 480 160 480
3848 - 480 480 480 480
3849 - 160 480 300 480
3850 - 480 480 480 480
3851 -UG.U..GC
3852 - 480 480 480 480
3853 - 480 300 480 210
3854 - 480 480 480 480
3855 - 480 250 480 200
3856 -UG.A..UA
3857 - 390 370 310 550
3858 - 360 320 310 550
3859 - 250 210 190 550
3860 - 550 550 550 550
3861 -UG.C..UA
3862 - 380 370 550 370
3863 - 370 400 550 370
3864 - 550 550 550 550
3865 - 400 340 550 370
3866 -UG.G..UA
3867 - 320 550 230 550
3868 - 550 550 550 550
3869 - 230 550 370 550
3870 - 550 550 550 550
3871 -UG.U..UA
3872 - 550 550 550 550
3873 - 550 370 550 280
3874 - 550 550 550 550
3875 - 550 320 550 270
3876 -UG.A..GU
3877 - 390 370 310 550
3878 - 360 320 310 550
3879 - 250 210 190 550
3880 - 550 550 550 550
3881 -UG.C..GU
3882 - 380 370 550 370
3883 - 370 400 550 370
3884 - 550 550 550 550
3885 - 400 340 550 370
3886 -UG.G..GU
3887 - 320 550 230 550
3888 - 550 550 550 550
3889 - 230 550 370 550
3890 - 550 550 550 550
3891 -UG.U..GU
3892 - 550 550 550 550
3893 - 550 370 550 280
3894 - 550 550 550 550
3895 - 550 320 550 270
3896 -UG.A..UG
3897 - 390 370 310 550
3898 - 360 320 310 550
3899 - 250 210 190 550
3900 - 550 550 550 550
3901 -UG.C..UG
3902 - 380 370 550 370
3903 - 370 400 550 370
3904 - 550 550 550 550
3905 - 400 340 550 370
3906 -UG.G..UG
3907 - 320 550 230 550
3908 - 550 550 550 550
3909 - 230 550 370 550
3910 - 550 550 550 550
3911 -UG.U..UG
3912 - 550 550 550 550
3913 - 550 370 550 280
3914 - 550 550 550 550
3915 - 550 320 550 270
3916 ->POLYC - Penalty for poly C hairpins.
3917 ->First number is penalty for polyC triloop
3918 ->Second number is "slope", i.e. penalty per C in a non triloop
3919 ->Third number is "intercept", i.e. constant penalty for non triloops
3920 - 140 30 160
3921 ->BETA - Pseudoknot energy parameters. B1 B2 B3 B1M B1P
3922 ->B1 = Constant penalty for "open" pseudoknot
3923 ->B2 = penalty per pair in pseudoknot
3924 ->B3 = penalty per base in pseudoknot
3925 ->B1M = constant penalty for pseudoknot in a "closed" loop
3926 ->B1P = constant penalty for pseudoknot in a pseudoknot.
3927 - 960 10 10 1500 1500
3928 ->BIMOLECULAR //TINOCO, 277
3929 - 409
1 -#!/usr/bin/python3
2 -
3 -# This script's purpose is to extract information about the CaRNAval
4 -# RINS from a Python pickle object containing RINs from their RIN.py class.
5 -# We do this because the official JSON file is hard to understand, and Antoine Soulé
6 -# recommended the pickle.
7 -
8 -import networkx, os, pickle, subprocess, sys
9 -
10 -if __name__=="__main__":
11 -
12 -
13 - rin_DIR = os.getcwd() + "/../data/modules/RIN/"
14 - filename = "CaRNAval_1_as_dictionnary.nxpickled"
15 -
16 - # Check that we can find CaRNAval RINs, and load the dataset
17 - try:
18 - sys.path.append(os.path.abspath(rin_DIR))
19 - import RIN
20 - except ImportError:
21 - # We have to download it
22 - subprocess.run(["wget", '-O', '../data/modules/carnaval_dataset.zip', "http://carnaval.lri.fr/carnaval_dataset.zip"])
23 - subprocess.run(["unzip", '-ou', '../data/modules/carnaval_dataset.zip', "carnaval_dataset/CaRNAval_1_as_dictionnary.nxpickled", "carnaval_dataset/RIN.py"])
24 - subprocess.run(["rm", "-f", "../data/modules/RIN/", "../data/modules/carnaval_dataset.zip"])
25 - subprocess.run(["mv", "carnaval_dataset/", "../data/modules/RIN/"])
26 - sys.path.append(os.path.abspath(rin_DIR))
27 - import RIN
28 -
29 - try:
30 - objects = []
31 - with (open(rin_DIR+filename, "rb")) as openfile:
32 - while True:
33 - try:
34 - objects.append(pickle.load(openfile))
35 - except EOFError:
36 - break
37 - print("Dataset loaded")
38 - except OSError:
39 - print("File not found : " + rin_DIR + filename)
40 - exit(1)
41 -
42 - # Creation of a directory to extract RINs from the pickle file to individual files
43 - try:
44 - os.makedirs(rin_DIR + "Subfiles", exist_ok=True)
45 - except OSError:
46 - print("Creation of the directory %s failed" % (rin_DIR + "Subfiles"))
47 - exit(1)
48 -
49 - # Loop on every CaRNAval module and extract it from the Python object to flat text file
50 - n_modules = len(objects[0]) # ? to
51 - for i in range(1,1+n_modules):
52 - motif = objects[0][i].graph
53 - f = open(rin_DIR + "Subfiles/" + str(i-1) + ".txt", "w+")
54 - f.write("ntA,ntB,long_range;...\n")
55 -
56 - components = []
57 - comp = []
58 - nodes = list(motif)
59 - nodes.sort()
60 - for node in nodes:
61 - if comp == []:
62 - comp.append(node)
63 - else:
64 - if comp[-1] + 1 != node : #not the same component
65 - components.append(comp)
66 - comp = []
67 - comp.append(node)
68 - else :
69 - comp.append(node)
70 - components.append(comp)
71 -
72 - #print(nodes)
73 -
74 - basepairs = ""
75 - edges = list(motif.edges())
76 - for a in edges:
77 - if motif.edges[a]['label'] == 'CWW' :
78 - ntA = nodes.index(a[0])
79 - ntB = nodes.index(a[1])
80 -
81 - if ntA <= ntB :
82 - basepairs += str(ntA) + "," + str(ntB) + "," + str(motif.edges[a]['long_range']) + ";"
83 -
84 - f.write(basepairs + "\n")
85 - f.write("pos;k;seq\n")
86 -
87 - num_nt = -1
88 - for a in components:
89 - seq = ""
90 - data_comp = str(num_nt+1)
91 - for b in a:
92 - num_nt += 1
93 -
94 - # sometimes in the nxpicled file, a node has the attribute "realnt",
95 - # and sometimes "real_nt", but it's the same thing
96 - try:
97 - seq += motif.nodes[b]["realnt"]
98 - except:
99 - seq += motif.nodes[b]["real_nt"]
100 - data_comp += "," + str(num_nt) + ";" + str(len(a)) + ";" + seq + "\n"
101 - f.write(data_comp)
102 -
103 - f.close()
104 - # print(str(i-1) + ".txt created")
105 -
106 - print("Successfully parsed "+filename, ", now individual RINs are saved in Subfiles/ folder.", sep='')
107 -
1 -# ============================ IMPORTS ====================================
2 -import subprocess
3 -import time
4 -import resource
5 -
6 -# take a RNA sequence and cut it from 100 bases to actual length
7 -# then measure computation time, peak memory, and number of solutions for each length
8 -
9 -# This RNA is actually a 16S rRNA from PDB 1J5E.
10 -# http://ndbserver.rutgers.edu/service/ndb/atlas/summary
11 -seq = "UUUGUUGGAGAGUUUGAUCCUGGCUCAGGGUGAACGCUGGCGGCGUGCCUAAGACAUGCAAGUCGUGCGGGCCGCGGGGUUUUACUCCGUGGUCAGCGGCGGACGGGUGAGUAACGCGUGGGUGACCUACCCGGAAGAGGGGGACAACCCGGGGAAACUCGGGCUAAUCCCCCAUGUGGACCCGCCCCUUGGGGUGUGUCCAAAGGGCUUUGCCCGCUUCCGGAUGGGCCCGCGUCCCAUCAGCUAGUUGGUGGGGUAAUGGCCCACCAAGGCGACGACGGGUAGCCGGUCUGAGAGGAUGGCCGGCCACAGGGGCACUGAGACACGGGCCCCACUCCUACGGGAGGCAGCAGUUAGGAAUCUUCCGCAAUGGGCGCAAGCCUGACGGAGCGACGCCGCUUGGAGGAAGAAGCCCUUCGGGGUGUAAACUCCUGAACCCGGGACGAAACCCCCGACGAGGGGACUGACGGUACCGGGGUAAUAGCGCCGGCCAACUCCGUGCCAGCAGCCGCGGUAAUACGGAGGGCGCGAGCGUUACCCGGAUUCACUGGGCGUAAAGGGCGUGUAGGCGGCCUGGGGCGUCCCAUGUGAAAGACCACGGCUCAACCGUGGGGGAGCGUGGGAUACGCUCAGGCUAGACGGUGGGAGAGGGUGGUGGAAUUCCCGGAGUAGCGGUGAAAUGCGCAGAUACCGGGAGGAACGCCGAUGGCGAAGGCAGCCACCUGGUCCACCCGUGACGCUGAGGCGCGAAAGCGUGGGGAGCAAACCGGAUUAGAUACCCGGGUAGUCCACGCCCUAAACGAUGCGCGCUAGGUCUCUGGGUCUCCUGGGGGCCGAAGCUAACGCGUUAAGCGCGCCGCCUGGGGAGUACGGCCGCAAGGCUGAAACUCAAAGGAAUUGACGGGGGCCCGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAAGCAACGCGAAGAACCUUACCAGGCCUUGACAUGCUAGGGAACCCGGGUGAAAGCCUGGGGUGCCCCGCGAGGGGAGCCCUAGCACAGGUGCUGCAUGGCCGUCGUCAGCUCGUGCCGUGAGGUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCCCGCCGUUAGUUGCCAGCGGUUCGGCCGGGCACUCUAACGGGACUGCCCGCGAAAGCGGGAGGAAGGAGGGGACGACGUCUGGUCAGCAUGGCCCUUACGGCCUGGGCGACACACGUGCUACAAUGCCCACUACAAAGCGAUGCCACCCGGCAACGGGGAGCUAAUCGCAAAAAGGUGGGCCCAGUUCGGAUUGGGGUCUGCAACCCGACCCCAUGAAGCCGGAAUCGCUAGUAAUCGCGGAUCAGCCAUGCCGCGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACGCCAUGGGAGCGGGCUCUACCCGAAGUCGCCGGGAGCCUACGGGCAGGCGCCGAGGGUAGGGCCCGUGACUGGGGCGAAGUCGUAACAAGGUAGCUGUACCGGAAGGUGCGGCUGGAUCACCUCCUUUCU"
12 -
13 -step = 100
14 -n = len(seq)
15 -
16 -while step < len(seq)+50:
17 - sub_seq = seq[0:(min(step,n))]
18 -
19 - # write the sequence to file
20 - fasta = open("data/fasta/ZDFS33.fa", 'w')
21 - fasta.write(">__'ZDFS33 : 0-" + str(len(sub_seq)) + "'\n" + sub_seq)
22 - fasta.close()
23 -
24 - # run biorseo on it, with default options
25 - cmd = ["./bin/biorseo", "-d", "./data/modules/DESC", "-s", "./ZDFS33.fa", "-v"]
26 - old_time = time.time()
27 - output = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode("utf-8").split("\n")[-5:]
28 - run_time = time.time() - old_time
29 - max_ram = resource.getrusage(resource.RUSAGE_CHILDREN).ru_maxrss
30 -
31 - for line in output :
32 - if "Quitting because combinatorial issues" in line :
33 - nb_sol = -1
34 - elif "solutions kept" in line :
35 - nb_sol = line.split(",")[1].split()[0]
36 -
37 - print(len(sub_seq), "first nucleotides :", nb_sol, "solutions in", run_time, "seconds, using", max_ram, "kb of RAM")
38 -
39 - step += 50
1 -#!/bin/bash
2 -
3 -echo "WARNING: The purpose of this file is to document how the docker image was built.";
4 -echo "You cannot execute it directly, because of licensing reasons. Please get your own:";
5 -echo "- CPLEX academic version: cplex_installer_12.8_Student.bin";
6 -echo "- Nupack header files: nupack_3.2.2.tar.gz";
7 -exit 0;
8 -
9 -cd ../
10 -THISDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
11 -
12 -####################################################### Dependencies ##############################################################
13 -sudo apt install -y clang-7 cmake make automake libboost-program-options-dev libboost-filesystem-dev openjdk-11-jre
14 -sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-7 100
15 -sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-7 100
16 -
17 -# CPLEX: only to build biorseo
18 -# HERE YOU SHOULD GET YOUR OWN cplex_installer_12.8_Student.bin ! I am not allowed to share mine anymore.
19 -chmod +x cplex_installer_12.8_Student.bin
20 -printf "4\n\n1\n\n\n\n\n" | sudo ./cplex_installer_12.8_Student.bin
21 -rm cplex_installer_12.8_Student.bin
22 -
23 -# Eigen: only to build biorseo (no need to give it to the docker image)
24 -wget http://bitbucket.org/eigen/eigen/get/3.3.7.tar.gz -O eigen_src.tar.gz
25 -tar -xf eigen_src.tar.gz
26 -cd eigen-eigen-323c052e1731
27 -mkdir build
28 -cd build
29 -cmake ..
30 -sudo make install
31 -cd ../..
32 -rm -rf eigen_src.tar.gz eigen-eigen-323c052e1731
33 -
34 -# Nupack: only to build biorseo (no need to give it to the docker image)
35 -#curl -u yourname@yourUni.com:yourPassword http://www.nupack.org/downloads/serve_file/nupack3.2.2.tar.gz --output nupack3.2.2.tar.gz
36 -tar -xf nupack3.2.2.tar.gz
37 -cd nupack3.2.2
38 -mkdir build
39 -cd build
40 -cmake ..
41 -make -j8
42 -sudo make install
43 -cd ../..
44 -sudo cp nupack3.2.2/src/thermo/*.h /usr/local/include/nupack/thermo/
45 -rm -rf nupack3.2.2.tar.gz nupack3.2.2/
46 -
47 -# BayesPairing: install on the docker image (done by the Dockerfile)
48 -git clone http://jwgitlab.cs.mcgill.ca/sarrazin/rnabayespairing.git BayesPairing
49 -
50 -######################################################### Build Biorseo ###########################################################
51 -# build here, install later on the docker image (done by the Dockerfile)
52 -mkdir -p results
53 -make -j 8
54 -make clean
55 -rm -rf doc/ obj/
56 -
57 -######################################################## Build Docker container ##################################################
58 -# Execute the Dockerfile and build the image
59 -docker build . -t biorseo
1 -
2 -#!/bin/bash
3 -######################################################## RNA modules ##############################################################
4 -
5 -cd ../
6 -
7 -# Rna3Dmotifs data
8 -mkdir -p data/modules/DESC
9 -wget https://github.com/McGill-CSB/RNAMoIP/raw/master/CATALOGUE.tgz
10 -tar -xvzf CATALOGUE.tgz
11 -mv No_Redondance_DESC/*.desc data/modules/DESC/
12 -rm -r No_Redondance_VIEW3D No_Redondance_DESC CATALOGUE.tgz
13 -
14 -# The RNA 3D Motif Atlas
15 -mkdir -p data/modules/BGSU
16 -wget http://rna.bgsu.edu/data/jar3d/models/HL/HL_3.2_models.zip
17 -unzip HL_3.2_models.zip
18 -mv HL data/modules/BGSU
19 -rm HL_3.2_models.zip
20 -wget http://rna.bgsu.edu/data/jar3d/models/IL/IL_3.2_models.zip
21 -unzip IL_3.2_models.zip
22 -mv IL data/modules/BGSU
23 -rm IL_3.2_models.zip
24 -
25 -# Install BayesPairing
26 -sudo -H pip3 install --upgrade pip
27 -sudo -H pip3 install networkx numpy regex wrapt biopython
28 -git clone http://jwgitlab.cs.mcgill.ca/sarrazin/rnabayespairing.git BayesPairing
29 -cd BayesPairing
30 -sudo -H pip3 install .
31 -
32 -# Train Bayes Pairing (it has been installed on the image and the source has been deleted, we train the models now, and will remount it as volume at run time)
33 -cd bayespairing/src
34 -python3 parse_sequences.py -d rna3dmotif -seq ACACGGGGUAAGAGCUGAACGCAUCUAAGCUCGAAACCCACUUGGAAAAGAGACACCGCCGAGGUCCCGCGUACAAGACGCGGUCGAUAGACUCGGGGUGUGCGCGUCGAGGUAACGAGACGUUAAGCCCACGAGCACUAACAGACCAAAGCCAUCAU -ss ".................................................................((...............)xxxx(...................................................)xxx).............."
35 -python3 parse_sequences.py -d 3dmotifatlas -seq ACACGGGGUAAGAGCUGAACGCAUCUAAGCUCGAAACCCACUUGGAAAAGAGACACCGCCGAGGUCCCGCGUACAAGACGCGGUCGAUAGACUCGGGGUGUGCGCGUCGAGGUAACGAGACGUUAAGCCCACGAGCACUAACAGACCAAAGCCAUCAU -ss ".................................................................((...............)xxxx(...................................................)xxx).............."
36 -cd ../../..
37 -
38 -######################################################## Run it ##############################################################
39 -
40 -# docker run -v `pwd`/data/modules:/modules -v `pwd`/BayesPairing/bayespairing:/byp -v `pwd`/results:/biorseo/results biorseo ./biorseo.py -i /biorseo/data/fasta/applications.fa --rna3dmotifs --patternmatch --func B
...\ No newline at end of file ...\ No newline at end of file
1 -#!/usr/bin/python3
2 -# Created by Louis Becquey, louis.becquey@univ-evry.fr, Oct 2019
3 -# This script processes files containing RNA structures obtained from bi-objective
4 -# optimization programs, and a dot-bracket database of reference structures, to plot
5 -# where are the best solutions in the Pareto set.
6 -#
7 -# The result files should follow this kind of format:
8 -# for Biokop: (option --biokop)
9 -# Structure Free energy score Expected accuracy score
10 -# (((...(((...)))))) <tab> obj1_value <tab> obj2_value
11 -# (((............))) <tab> obj1_value <tab> obj2_value
12 -# ((((((...)))...))) <tab> obj1_value <tab> obj2_value
13 -# ...
14 -#
15 -# for BiORSEO: (options --biorseo_**stuff**)
16 -# >Header of the sequence
17 -# GGCACAGAGUUAUGUGCC
18 -# (((...(((...)))))) + Motif1 + Motif2 <tab> obj1_value <tab> obj2_value
19 -# (((............))) <tab> obj1_value <tab> obj2_value
20 -# ((((((...)))...))) + Motif1 <tab> obj1_value <tab> obj2_value
21 -#
22 -# typical Biokop usage:
23 -# python3 pareto_visualizer.py --biokop --folder path/to/your/results/folder --database path/to/the/database_file.dbn
24 -# typical Biorseo usage:
25 -# python3 pareto_visualizer.py --folder path/to/your/results/folder --database path/to/the/database_file.dbn
26 -#
27 -
28 -from math import sqrt
29 -import numpy as np
30 -import matplotlib.pyplot as plt
31 -from matplotlib import cm
32 -import scipy.stats as st
33 -import sys
34 -import os
35 -import subprocess
36 -import getopt
37 -
38 -
39 -class SecStruct:
40 - def __init__(self, dot_bracket, obj1_value, obj2_value):
41 - self.dbn = dot_bracket
42 - self.objectives = [obj1_value, obj2_value]
43 - self.basepair_list = self.get_basepairs()
44 - self.length = len(dot_bracket)
45 -
46 - def get_basepairs(self):
47 - parenthesis = []
48 - brackets = []
49 - braces = []
50 - rafters = []
51 - basepairs = []
52 - As = []
53 - Bs = []
54 - for i, c in enumerate(self.dbn):
55 - if c == '(':
56 - parenthesis.append(i)
57 - if c == '[':
58 - brackets.append(i)
59 - if c == '{':
60 - braces.append(i)
61 - if c == '<':
62 - rafters.append(i)
63 - if c == 'A':
64 - As.append(i)
65 - if c == 'B':
66 - Bs.append(i)
67 - if c == '.':
68 - continue
69 - if c == ')':
70 - basepairs.append((i, parenthesis.pop()))
71 - if c == ']':
72 - basepairs.append((i, brackets.pop()))
73 - if c == '}':
74 - basepairs.append((i, braces.pop()))
75 - if c == '>':
76 - basepairs.append((i, rafters.pop()))
77 - if c == 'a':
78 - basepairs.append((i, As.pop()))
79 - if c == 'b':
80 - basepairs.append((i, Bs.pop()))
81 - return basepairs
82 -
83 - def get_MCC_with(self, reference_structure):
84 - # Get true and false positives and negatives
85 - tp = 0
86 - fp = 0
87 - tn = 0
88 - fn = 0
89 - for bp in reference_structure.basepair_list:
90 - if bp in self.basepair_list:
91 - tp += 1
92 - else:
93 - fn += 1
94 - for bp in self.basepair_list:
95 - if bp not in reference_structure.basepair_list:
96 - fp += 1
97 - tn = reference_structure.length * (reference_structure.length - 1) * 0.5 - fp - fn - tp
98 -
99 - # Compute MCC
100 - if (tp + fp == 0):
101 - print("We have an issue : no positives detected ! (linear structure)")
102 - return (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
103 -
104 -
105 -class Pareto:
106 - def __init__(self, list_of_structs, reference):
107 - self.predictions = list_of_structs
108 - self.true_structure = reference
109 - self.n_pred = len(list_of_structs)
110 - self.max_obj1 = max([s.objectives[0] for s in self.predictions])
111 - self.max_obj2 = max([s.objectives[1] for s in self.predictions])
112 - self.index_of_best = self.find_best_solution()
113 -
114 - def find_best_solution(self):
115 - # returns the index of the solution of the Pareto set which is the closest
116 - # to the real 2D structure (the one with the max MCC)
117 - max_i = -1
118 - max_mcc = -1
119 - for i, s in enumerate(self.predictions):
120 - mcc = s.get_MCC_with(self.true_structure)
121 - if mcc > max_mcc:
122 - max_mcc = mcc
123 - max_i = i
124 - return max_i
125 -
126 - def get_normalized_coords(self):
127 - # retrieves the objective values of the best solution and normlizes them
128 - coords = self.predictions[self.index_of_best].objectives
129 - if self.max_obj1: # avoid divide by zero if all solutions are 0
130 - x = coords[0] / self.max_obj1
131 - else:
132 - x = 0.5
133 - if self.max_obj2: # avoid divide by zero if all solutions are 0
134 - y = coords[1] / self.max_obj2
135 - else:
136 - y = 0.5
137 - return (x, y)
138 -
139 -
140 -class RNA:
141 - def __init__(self, filename, header, seq, struct):
142 - self.seq_ = seq
143 - self.header_ = header
144 - self.struct_ = struct
145 - self.basename_ = filename
146 -
147 -
148 -ignored_nt_dict = {}
149 -
150 -
151 -def is_canonical_nts(seq):
152 - for c in seq[:-1]:
153 - if c not in "ACGU":
154 - if c in ignored_nt_dict.keys():
155 - ignored_nt_dict[c] += 1
156 - else:
157 - ignored_nt_dict[c] = 1
158 - return False
159 - return True
160 -
161 -
162 -def is_canonical_bps(struct):
163 - if "()" in struct:
164 - return False
165 - if "(.)" in struct:
166 - return False
167 - if "(..)" in struct:
168 - return False
169 - if "[]" in struct:
170 - return False
171 - if "[.]" in struct:
172 - return False
173 - if "[..]" in struct:
174 - return False
175 - return True
176 -
177 -def load_from_dbn(file, header_style=3):
178 - container = []
179 - pkcounter = 0
180 -
181 - db = open(file, "r")
182 - c = 0
183 - header = ""
184 - seq = ""
185 - struct = ""
186 - while True:
187 - l = db.readline()
188 - if l == "":
189 - break
190 - c += 1
191 - c = c % 3
192 - if c == 1:
193 - header = l[:-1]
194 - if c == 2:
195 - seq = l[:-1].upper()
196 - if c == 0:
197 - struct = l[:-1]
198 - n = len(seq)
199 -
200 - if n < 10 or n > 100:
201 - continue # ignore too short and too long RNAs
202 - if is_canonical_nts(seq) and is_canonical_bps(struct) and '(' in struct:
203 - if header_style == 1: container.append(RNA(header.replace('/', '_').split('(')[-1][:-1], header, seq, struct))
204 - if header_style == 2: container.append(RNA(header.replace('/', '_').split('[')[-1][:-41], header, seq, struct))
205 - if header_style == 3: container.append(RNA(header[1:], header, seq, struct))
206 - if '[' in struct: pkcounter += 1
207 - db.close()
208 - return container, pkcounter
209 -
210 -
211 -def parse_biokop(folder, basename, ext=".biok"):
212 - solutions = []
213 - err = 0
214 - if os.path.isfile(os.path.join(folder, basename + ext)):
215 - rna = open(os.path.join(folder, basename + ext), "r")
216 - lines = rna.readlines()
217 - rna.close()
218 - different_2ds = []
219 - for s in lines[1:]:
220 - if s == '\n':
221 - continue
222 - splitted = s.split('\t')
223 - db2d = splitted[0]
224 - if db2d not in different_2ds:
225 - different_2ds.append(db2d)
226 - # here is a negative sign because Biokop actually minimizes -MEA instead
227 - # of maximizing MEA : we switch back to MEA
228 - solutions.append(SecStruct(db2d, -float(splitted[2][:-1]), -float(splitted[1]))) # MEA first, MFE second
229 -
230 - # check the range of MEA in this pareto set
231 - min_mea = solutions[0].objectives[0]
232 - max_mea = min_mea
233 - for s in solutions:
234 - mea = s.objectives[0]
235 - if mea < min_mea:
236 - min_mea = mea
237 - if mea > max_mea:
238 - max_mea = mea
239 -
240 - # normalize so the minimum MEA of the set is 0
241 - for i in range(len(solutions)):
242 - solutions[i].objectives[0] -= min_mea
243 -
244 - if len(different_2ds) > 1:
245 - return solutions, err
246 - else:
247 - print("[%s] \033[36mWARNING: ignoring this RNA, only one 2D solution is found.\033[0m" % (basename))
248 - err = 1
249 - return None, err
250 -
251 -
252 -def parse_biorseo(folder, basename, ext):
253 - solutions = []
254 - err = 0
255 - if os.path.isfile(os.path.join(folder, basename + ext)):
256 - rna = open(os.path.join(folder, basename + ext), "r")
257 - lines = rna.readlines()
258 - rna.close()
259 - different_2ds = []
260 - for s in lines[2:]:
261 - if s == '\n':
262 - continue
263 - splitted = s.split('\t')
264 - db2d = splitted[0].split(' ')[0]
265 - if db2d not in different_2ds:
266 - different_2ds.append(db2d)
267 - solutions.append(SecStruct(db2d, float(splitted[2][:-1]), float(splitted[1]))) # put MEA first, modules in 2nd (y axis)
268 - if len(different_2ds) > 1:
269 - return solutions, err
270 - else:
271 - print("[%s] \033[36mWARNING: ignoring this RNA, only one 2D solution is found.\033[0m" % (basename))
272 - err = 1
273 - return None, err
274 -
275 -
276 -def prettify_biorseo(code):
277 - name = ""
278 - if "bgsu" in code:
279 - name += "RNA 3D Motif Atlas + "
280 - elif "rin" in code:
281 - name += "CaRNAval + "
282 - else:
283 - name += "Rna3Dmotifs + "
284 - if "raw" in code:
285 - name += "Direct P.M."
286 - if "byp" in code:
287 - name += "BPairing"
288 - if "jar3d" in code:
289 - name += "Jar3d"
290 - # name += " + $f_{1" + code[-1] + "}$"
291 - return name
292 -
293 -def process_extension(ax, pos, ext, nsolutions=False, xlabel="Best solution performs\nwell on obj1", ylabel="Best solution performs\n well on obj2"):
294 - points = []
295 - sizes = []
296 - skipped = 0
297 - for rna in RNAcontainer:
298 - # Extracting the predictions from the results file
299 - solutions, err = parse(results_folder, rna.basename_, ext)
300 - if solutions is None:
301 - if err == 0:
302 - skipped += 1
303 - continue
304 - reference = SecStruct(rna.struct_, float("inf"), float("inf"))
305 - pset = Pareto(solutions, reference)
306 - points.append(pset.get_normalized_coords())
307 - sizes.append(pset.n_pred)
308 - print("[%s] Loaded %d solutions in a Pareto set, max(obj1)=%f, max(obj2)=%f" % (rna.basename_, pset.n_pred, pset.max_obj1, pset.max_obj2))
309 - print("Loaded %d points on %d." % (len(points), len(RNAcontainer)-skipped))
310 -
311 - x = np.array([p[0] for p in points])
312 - y = np.array([p[1] for p in points])
313 - xmin, xmax = 0, 1
314 - ymin, ymax = 0, 1
315 - xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
316 - positions = np.vstack([xx.ravel(), yy.ravel()])
317 - values = np.vstack([x, y])
318 - kernel = st.gaussian_kde(values)
319 - f = np.reshape(kernel(positions).T, xx.shape)
320 - ax[pos].axhline(y=0, alpha=0.2, color='black')
321 - ax[pos].axhline(y=1, alpha=0.2, color='black')
322 - ax[pos].axvline(x=0, alpha=0.2, color='black')
323 - ax[pos].axvline(x=1, alpha=0.2, color='black')
324 - ax[pos].contourf(xx, yy, f, cmap=cm.Blues, alpha=0.5)
325 - ax[pos].scatter(x, y, s=25, alpha=0.1)
326 - ax[pos].set_xlim((-0.1, 1.1))
327 - ax[pos].set_ylim((-0.1, 1.1))
328 - ax[pos].set_title(prettify_biorseo(ext[1:]), fontsize=10)
329 - ax[pos].annotate("(" + str(len(points)) + '/' + str(len(RNAcontainer)-skipped) + " RNAs)", (0.08, 0.15))
330 - ax[pos].set_xlabel(xlabel)
331 - ax[pos].set_ylabel(ylabel)
332 -
333 - if nsolutions:
334 - ax[pos + 1].hist(sizes, bins=range(0, max(sizes) + 1, 2), histtype='bar')
335 - ax[pos + 1].set_xlim((0, max(sizes) + 2))
336 - ax[pos + 1].set_xticks(range(0, max(sizes), 10))
337 - ax[pos + 1].set_xticklabels(range(0, max(sizes), 10), rotation=90)
338 - ax[pos + 1].set_xlabel("# solutions")
339 - ax[pos + 1].set_ylabel("# RNAs")
340 -
341 -
342 -if __name__ == "__main__":
343 - try:
344 - opts, args = getopt.getopt( sys.argv[1:], "",
345 - [ "biorseo_desc_byp_A", "biorseo_desc_byp_B",
346 - "biorseo_desc_byp_C", "biorseo_desc_byp_D",
347 - "biorseo_bgsu_byp_A", "biorseo_bgsu_byp_B",
348 - "biorseo_bgsu_byp_C", "biorseo_bgsu_byp_D",
349 - "biorseo_desc_raw_A", "biorseo_desc_raw_B",
350 - "biorseo_bgsu_jar3d_A", "biorseo_bgsu_jar3d_B",
351 - "biorseo_bgsu_jar3d_C", "biorseo_bgsu_jar3d_D",
352 - "biorseo_rin_raw_A", "biorseo_rin_raw_B",
353 - "biokop", "folder=", "database=", "output="
354 - ])
355 - except getopt.GetoptError as err:
356 - print(err)
357 - sys.exit(2)
358 -
359 - results_folder = "."
360 - extension = "all"
361 - outputf = ""
362 - for opt, arg in opts:
363 - if opt == "--biokop":
364 - extension = ".biok"
365 - parse = parse_biokop
366 - elif opt == "--folder":
367 - results_folder = arg
368 - elif opt == "--database":
369 - database = arg
370 - elif opt == "--output":
371 - outputf = arg
372 - else:
373 - extension = '.' + opt[2:]
374 - parse = parse_biorseo
375 -
376 - RNAcontainer, _ = load_from_dbn(database)
377 -
378 - if results_folder[-1] != '/':
379 - results_folder = results_folder + '/'
380 - if outputf == "":
381 - outputf = results_folder
382 - if outputf[-1] != '/':
383 - outputf = outputf + '/'
384 -
385 - if extension == "all":
386 - parse = parse_biorseo
387 - fig, ax = plt.subplots(4,5,figsize=(12,10), sharex=True, sharey=True)
388 - ax = ax.flatten()
389 - process_extension(ax, 0, ".biorseo_desc_raw_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
390 - process_extension(ax, 1, ".biorseo_rin_raw_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
391 - process_extension(ax, 2, ".biorseo_desc_byp_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
392 - process_extension(ax, 3, ".biorseo_bgsu_byp_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
393 - process_extension(ax, 4, ".biorseo_bgsu_jar3d_A", ylabel="Normalized $f_{1A}$", xlabel="Normalized MEA")
394 - ax[0].set_title(prettify_biorseo("biorseo_desc_raw_A"), fontsize=10)
395 - ax[1].set_title(prettify_biorseo("biorseo_rin_raw_A"), fontsize=10)
396 - ax[2].set_title(prettify_biorseo("biorseo_desc_byp_A"), fontsize=10)
397 - ax[3].set_title(prettify_biorseo("biorseo_bgsu_byp_A"), fontsize=10)
398 - ax[4].set_title(prettify_biorseo("biorseo_bgsu_jar3d_A"), fontsize=10)
399 -
400 - process_extension(ax, 5, ".biorseo_desc_raw_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
401 - process_extension(ax, 6, ".biorseo_rin_raw_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
402 - process_extension(ax, 7, ".biorseo_desc_byp_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
403 - process_extension(ax, 8, ".biorseo_bgsu_byp_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
404 - process_extension(ax, 9, ".biorseo_bgsu_jar3d_B", ylabel="Normalized $f_{1B}$", xlabel="Normalized MEA")
405 -
406 - process_extension(ax, 12, ".biorseo_desc_byp_C", ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA")
407 - process_extension(ax, 13, ".biorseo_bgsu_byp_C", ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA")
408 - process_extension(ax, 14, ".biorseo_bgsu_jar3d_C", ylabel="Normalized $f_{1C}$", xlabel="Normalized MEA")
409 - ax[10].axis("off")
410 - ax[11].axis("off")
411 -
412 - process_extension(ax, 17, ".biorseo_desc_byp_D", ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA")
413 - process_extension(ax, 18, ".biorseo_bgsu_byp_D", ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA")
414 - process_extension(ax, 19, ".biorseo_bgsu_jar3d_D", ylabel="Normalized $f_{1D}$", xlabel="Normalized MEA")
415 - ax[15].axis("off")
416 - ax[16].axis("off")
417 - for a in ax:
418 - a.label_outer()
419 - plt.subplots_adjust(bottom=0.05, top=0.95, left=0.07, right=0.98, hspace=0.1, wspace = 0.05)
420 - plt.savefig("pareto_visualizer.png")
421 - else:
422 - fig, ax = plt.subplots(2,1, figsize=(6,10))
423 - plt.subplots_adjust(bottom=0.12, top=0.9, left=0.15, right=0.9, hspace=0.4)
424 - if extension == ".biok":
425 - process_extension(ax, 0, extension, nsolutions=True, ylabel="Normalized MFE", xlabel="Normalized MEA")
426 - else:
427 - process_extension(ax, 0, extension, nsolutions=False)
428 - plt.savefig("pareto_visualizer_ext.png")
...\ No newline at end of file ...\ No newline at end of file